我也来个下美女图片的脚本,也来美女图片脚本,#!/usr/bin/e


#!/usr/bin/env python# -*- coding:utf-8 -*- import urllib2, urllib;from threading import Thread;import re, os;'''python是个神器'''class Spider(Thread):    def __init__(self, url, output = "img/"):        Thread.__init__(self);        self.url = url;        self.output = output;        self.content = "";        self.urls = [];        self.imgs = [];    def run(self):        self.getContent();        self.getUrls();        self.getImgs();        if os.path.exists(self.output):             for img in self.imgs:                print "download: %s" % img;                filename = self.output + img.split("/")[-1];                urllib.urlretrieve(img, filename, None);        else :            print "the output file isn't found";    def getUrls(self):        urls = re.findall("<a href=\\"(.*?)\\".*?>", self.content);        self.urls.extend([url for url in set(urls) if url.count("http:") == 1]);    def getImgs(self):        images = re.findall("<img src=\\"(.*?)\\".*?>", self.content);        self.imgs.extend([image for image in set(images) if image.count("http:") == 1]);    def getContent(self):        try:            handler = urllib2.urlopen(self.url, None, 5);            content = handler.read();            self.content = content;            handler.close();        except urllib2.URLError, e:            print "can't find the url: [%s]" % self.url;    def getThreads(self):        self.getContent();        self.getUrls();        self.getImgs();        threads = [self];        for url in self.urls :            threads.append(Spider(url));        return threads;def download(url):    spider = Spider(url);    threads = spider.getThreads();    for thread in threads:        print "begin thread: %s " % thread.url;        thread.start();if __name__ == "__main__":    download("http://www.22mm.cc/");#该片段来自于http://byrx.net

评论关闭