获取页面url,页面url,import reimp


import reimport urllibimport urllib2from bs4 import BeautifulSoupprint 'nihao'url = "http://zhiqq.com"s = urllib2.urlopen('http://zhiqq.com')s = s.read()htm = unicode(s,'gb2312','ignore').encode('utf-8','ignore')soup = BeautifulSoup(htm)sou = soup.prettify()f = open('C:/Peng,meijie/sou.txt','w')f.write(sou)n = 0for link in soup.find_all('a'):    print link.get('href')    print n    n = n+1#该片段来自于http://byrx.net

评论关闭