页面url的可连接性,页面url连接性,import reimp


import reimport urllibimport urllib2from bs4 import BeautifulSoupprint 'nihao'url = "http://zhiqq.com"s = urllib2.urlopen('http://zhiqq.com')s = s.read()htm = unicode(s,'gb2312','ignore').encode('utf-8','ignore')soup = BeautifulSoup(htm)sou = soup.prettify()f = open('C:/sou.txt','w')f.write(sou)n = 0for link in soup.find_all('a'):    print link.get('href')    print n    n = n+1    try:        urllib2.urlopen(link.get('href')).getcode()    except:        print "*******connect failed"        continue#该片段来自于http://byrx.net

评论关闭