我也来个下美女图片的脚本,也来美女图片脚本,#!/usr/bin/e
文章由Byrx.net分享于2019-03-23 09:03:38
我也来个下美女图片的脚本,也来美女图片脚本,#!/usr/bin/e
#!/usr/bin/env python# -*- coding:utf-8 -*- import urllib2, urllib;from threading import Thread;import re, os;'''python是个神器'''class Spider(Thread): def __init__(self, url, output = "img/"): Thread.__init__(self); self.url = url; self.output = output; self.content = ""; self.urls = []; self.imgs = []; def run(self): self.getContent(); self.getUrls(); self.getImgs(); if os.path.exists(self.output): for img in self.imgs: print "download: %s" % img; filename = self.output + img.split("/")[-1]; urllib.urlretrieve(img, filename, None); else : print "the output file isn't found"; def getUrls(self): urls = re.findall("<a href=\\"(.*?)\\".*?>", self.content); self.urls.extend([url for url in set(urls) if url.count("http:") == 1]); def getImgs(self): images = re.findall("<img src=\\"(.*?)\\".*?>", self.content); self.imgs.extend([image for image in set(images) if image.count("http:") == 1]); def getContent(self): try: handler = urllib2.urlopen(self.url, None, 5); content = handler.read(); self.content = content; handler.close(); except urllib2.URLError, e: print "can't find the url: [%s]" % self.url; def getThreads(self): self.getContent(); self.getUrls(); self.getImgs(); threads = [self]; for url in self.urls : threads.append(Spider(url)); return threads;def download(url): spider = Spider(url); threads = spider.getThreads(); for thread in threads: print "begin thread: %s " % thread.url; thread.start();if __name__ == "__main__": download("http://www.22mm.cc/");#该片段来自于http://byrx.net
评论关闭