使用python获取糗百图片,,from sgmllib


from sgmllib import SGMLParserimport urllib2class sgm(SGMLParser):    def reset(self):        SGMLParser.reset(self)        self.srcs=[]        self.ISTRUE=True    def start_div(self,artts):        for k,v in artts:            if v=="author":                self.ISTRUE=False    def end_div(self):        self.ISTRUE=True    def start_img(self,artts):        for k,v in artts:            if k=="src" and self.ISTRUE==True:                self.srcs.append(v)    def download(self):        for src in self.srcs:            f=open(src[-12:],"wb")            print src            img=urllib2.urlopen(src)            f.write(img.read())            f.close()sgm=sgm()for page in range(1,500):    url="http://www.qiushibaike.com/late/page/%s?s=4622726" % page    data=urllib2.urlopen(url).read()    sgm.feed(data)    sgm.download()#该片段来自于http://byrx.net

评论关闭