下载漫画小脚本,下载漫画脚本,#!/usr/bin/e
文章由Byrx.net分享于2019-03-23 05:03:50
下载漫画小脚本,下载漫画脚本,#!/usr/bin/e
#!/usr/bin/env python# -*- coding:utf-8 -*-"""Copyright (c) 2015, The Sun TechnologyThis Program could download files from the internet"""import urllib2import osimport timefrom urllib2 import HTTPErrorfrom bs4 import BeautifulSoupfrom urlparse import urlparseBASE_URL="/Users/mac/Documents%s"def get_file_name(req_url): path_obj=urlparse(req_url) return os.path.split(path_obj.path)def get_save_path(save_dir): dirs=get_file_name(save_dir) save_path=BASE_URL%dirs[0] if not os.path.exists(save_path): os.mkdir(save_path)def save_files(file_url,file_path): start=time.time() response=urllib2.urlopen(file_url) html=response.read() response.close() with open(file_path,"wb") as handler: handler.write(html) print "%s has been downloaded successfully "%file_url print "Total cost:%.3f ms"%(time.time()-start)def download(url_path): start = 82 for pageNum in range(start,start+10): try: combine_url=url_path%pageNum response=urllib2.urlopen(combine_url) page=response.read() if response.getcode()==200 else None """ Start parsing the HTML from web page""" if not page: return soup = BeautifulSoup(page,"html.parser") img_url=soup.find_all('img',id="main-comic") #parse the url url_parse=urlparse(url_path) #rebuild the url rebuild_url= url_parse.scheme+':'+img_url[0].get('src') #download comic from url get_name=get_file_name(rebuild_url) save_files(rebuild_url, BASE_URL%'/'.join(get_name)) except HTTPError, e: print "An error has accour",e continue finally: response.close()if __name__ == '__main__': req_url="http://explosm.net/comics/%s" get_save_path(req_url) download(req_url)
评论关闭