python爬虫,抓豆瓣勾搭组妹纸照片,,import urlli
文章由Byrx.net分享于2019-03-23 09:03:11
python爬虫,抓豆瓣勾搭组妹纸照片,,import urlli
import urllib.requestimport reimport time#获取输入的帖子单页htmldef getHtml2(url2): html2=urllib.request.urlopen(url2).read().decode('utf-8') return html2#抽取图片相关列表,并下载图片def gettopic(html2): reg2=r'http://www.douban.com/group/topic/\\d+' topiclist=re.findall(reg2,html2) x=0 #限制下载的图片数 for topicurl in topiclist: x+=1 return topicurl#下载图片到本地def download(topic_page): reg3=r'http://img3.douban.com/view/group_topic/large/public/.+\\.jpg' imglist=re.findall(reg3,topic_page) i=1 download_img=None for imgurl in imglist:#取图片ID为文件名 img_numlist=re.findall(r'p\\d{7}',imgurl) for img_num in img_numlist: download_img=urllib.request.urlretrieve(imgurl,'D:\\python\\code\\girls\\%s.jpg'%img_num) time.sleep(1) i+=1 print(imgurl) return download_img#调用函数page_end=int(input('请输入结束时的页码:'))num_end=page_end*25num=0page_num=1while num<=num_end: html2=getHtml2('http://www.douban.com/group/kaopulove/discussion?start=%d'%num) topicurl=gettopic(html2) topic_page=getHtml2(topicurl) download_img=download(topic_page) num=page_num*25 page_num+=1else: print('采集完成!')#该片段来自于http://byrx.net
评论关闭