百度图片下载,,#coding:utf-
文章由Byrx.net分享于2019-03-23 07:03:05
百度图片下载,,#coding:utf-
#coding:utf-8'''python 2.7.5获取一些百度图片1.指定标签2.http://image.baidu.com/channel/listjson?pn=0&rn=1000&tag1=%E6%98%8E%E6%98%9F&tag2=%E5%BC%A0%E5%AD%A6%E5%8F%8B&ftags=&sorttype=0&ie=utf8&oe=utf-8&image_id=692147105这是获取列表的方式pn 分页标识rn 页面内图片数量tag1 主标签tag2 分标签其他字段暂时不管3.返回的内容json 转字典4.取download_url就可以了5.下载6.多线程加快点效率'''from urllib import urlretrieve,urlcleanupfrom urllib2 import *import jsonfrom hashlib import md5import sysclass Baiduimage(): """ """ def __init__(self,tag1,tag2,number = 1,stored = "."): self.tag1 = tag1 self.tag2 = tag2 self.number = str(number) self.url = self.make_url() self.stored = stored print "work start" def make_url(self): url = "http://image.baidu.com/channel/listjson?pn=0&rn="+self.number+"&tag1="+self.tag1+"&tag2="+self.tag2+"&ftags=&sorttype=0&ie=utf8&oe=utf-8" return url def request_body(self): request = Request(self.url) #request.add_header(); r = urlopen(request) return r.read() def parse_body(self): jsonstr = json.loads(self.request_body()) urls = [i['download_url'] for i in jsonstr['data'] if i.has_key('download_url')] return (urls,urls.__len__()) def image_name(self, url): return self.stored+"/"+md5(url).hexdigest()+"."+url.split(".")[-1] def dowload_image(self): (urls,urlnumber) = self.parse_body() def dowload(url): try: urlretrieve(url,self.image_name(url)) urlcleanup() except: return False return True print "want "+self.number+" images, get images links "+str(urlnumber) if urlnumber == 0: print "Could not find a image link" pass else: print "Download start press Ctrl+Break to stop " count = 0 for id,i in enumerate(urls): if(dowload(i)): count = count + 1 sys.stdout.write("Dowdload["+str(id+1)+"] has download "+str(count)+chr(8)*80) sys.stdout.flush() print "\\nwork end" def dowload_image_thread(self,threadnumber = 2): """ """ (urls,urlnumber) = self.parse_body() print "Download start press Ctrl+Break to stop " def dowload(url): try: urlretrieve(url,self.image_name(url)) urlcleanup() except: return False return True from Queue import Queue from threading import Thread from itertools import count def worker(count=count()): while True: (id,item) = q.get() if(dowload(item)): sys.stdout.write("Dowdload["+str(id+1)+"] has download "+str(next(count) + 1)+chr(8)*80) sys.stdout.flush() q.task_done() q = Queue() for i in range(threadnumber): t = Thread(target=worker) t.daemon = True t.start() for id,item in enumerate(urls): q.put((id,item)) q.join() # block until all tasks are done print "work end"if __name__ == "__main__": print "this is a test with thread " Baiduimage("明星", "刘德华", 100).dowload_image_thread()#自定义分类 关键词 图片个数 存放路径#该片段来自于http://byrx.net
评论关闭