python爬图,,闲的无事,看着知乎里


闲的无事,看着知乎里种种python优点,按捺不住,装起python3.4。

网上找了点爬行图片的代码,修改至兼容3.4,成功爬行指定url所有jpg图片,代码段如下:

import osimport urllibimport urllib.requestimport re#爬行图片download_path = os.path.dirname(os.path.abspath(__file__))class spider(object):    def __init__(self, url):        self.url = url    def parse(self,content):        pattern = ‘src="(http://.*\.jpg)\s*"‘        matchs = re.findall(pattern,content,re.M)        return matchs        def downloads(self,urls):        d_path = download_path + "/test"        if not os.path.exists(d_path):            os.mkdir(d_path)        for url in urls:            filename = url.split("/")[-1]            print (url)            print ("Downloads %s" % (filename))            output = "%s/%s" % (d_path, filename)            urllib.request.urlretrieve(url,output)               def run(self):        d_url = self.url        fd = urllib.request.urlopen(d_url)        try:            content = fd.read()            content = content.decode("UTF-8")            urls = self.parse(content)            self.downloads(urls)        finally:            fd.close()if __name__ == "__main__":    sp = spider("http://news.cnfol.com/img/20150814/17638.shtml")    sp.run()

python爬图

相关内容

    暂无相关文章

评论关闭