从订阅源下载图片的例子,订阅图片例子,__author__ =
文章由Byrx.net分享于2019-03-23 05:03:39
从订阅源下载图片的例子,订阅图片例子,__author__ =
__author__ = 'Saint'import osimport urllib.requestimport jsonfrom html.parser import HTMLParser# 从获取的网页内容筛选图片的内容class MyHtmlParser(HTMLParser): links = [] def handle_starttag(self, tag, attrs): if tag == "img": if len(attrs) == 0: pass else: for name, value in attrs: if name == "src": self.links.append(value)class Down(object): # 总的目录 img_path = "E:/saint" # 下载目录 dir = '' # 采集源地址 collect_links = ["http://dy.163.com/v2/media/articlelist/T1374483113516-1", "http://dy.163.com/v2/media/articlelist/T1420776257254-1", "http://dy.163.com/v2/media/articlelist/T1376641060407-1"] img_links = "http://dy.163.com/v2/article" def handleCollect(self): for collect_link in self.collect_links: notice = "开始从[" + collect_link + "]采集图片" print(notice) # 建立下载的目录 dir_name = collect_link.split("/")[-1] self.isDirExists(dir_name) dict = self.getListFromSubscribe(collect_link) if dict == False: print("数据采集失败,是否继续(y/n)") op = input(); if op == "y": os.system("cls") pass elif op == "n": print("停止采集") break else: os.system("cls") print("非法输入") break else: for page in dict: page_uri = self.img_links + "/" + page["tid"] + "/" + page["docid"] self.getImgFromUri(page_uri) print("是否继续(y/n)") new_op = input(); if new_op == "n": os.system("cls") print("采集完毕") break print("OK") # 从订阅源获取目录 def getListFromSubscribe(self, uri): res = urllib.request.urlopen(uri) if res.code < 200 or res.code > 300: os.system("clear") return False else: result = res.read().decode("gbk") # 3.4版本的read()返回的是byte类型,需要decode()处理,选项是网页编码 dict = json.loads(result) if dict['code'] != 1: print(dict['msg']) return False else: return dict['data'] # 获取本期订阅的网页,并从网页中提取出来需要的图片 def getImgFromUri(self, uri): html_code = urllib.request.urlopen(uri).read().decode("gbk") hp = MyHtmlParser() hp.feed(html_code) hp.close() for link in hp.links: # hp.links 是图片的下载地址的列表 self.writeToDisk(link) # 检查文件目录是否存在,如果不存在,则创建目录 def isDirExists(self, dir_name): self.dir = self.img_path + dir_name isExists = os.path.exists(self.dir) if not isExists: os.makedirs(self.dir) return True else: return True # 下载文件,并且写入磁盘 def writeToDisk(self, url): os.chdir(self.dir) file = urllib.request.urlopen(url).read() file_name = url.split("/")[-1] open(file_name, "wb").write(file) return Trueif __name__ == "__main__": down = Down() down.handleCollect()
相关内容
- uwsgi,,<uwsgi> <so
- python 多态实例,python多态,# coding:utf
- python继承练习,python继承,class Employ
- 发送邮件,可带附件,可群发,发送邮件群发,# 这是发布
- 开源软件批量下载,开源软件,import reque
- 检查系统模块的代码是否可用,检查模块代码可用,py
- Python发送邮件的例子,运维人员使用,python发送邮件
- 对当前目录下的所有APK包执行Monkey测试,并自动保存
- 抓取糗事百科文字笑话,抓取糗事百科,# -*- coding
- 一个简单的数学问题:200囚徒,数学问题200囚徒,n=1a=
评论关闭