下载漫画小脚本,下载漫画脚本,#!/usr/bin/e


#!/usr/bin/env python# -*- coding:utf-8 -*-"""Copyright (c) 2015, The Sun TechnologyThis Program could download files from the internet"""import urllib2import osimport timefrom urllib2 import HTTPErrorfrom bs4 import BeautifulSoupfrom urlparse import urlparseBASE_URL="/Users/mac/Documents%s"def get_file_name(req_url):    path_obj=urlparse(req_url)    return os.path.split(path_obj.path)def get_save_path(save_dir):    dirs=get_file_name(save_dir)    save_path=BASE_URL%dirs[0]    if not os.path.exists(save_path):        os.mkdir(save_path)def save_files(file_url,file_path):    start=time.time()    response=urllib2.urlopen(file_url)    html=response.read()    response.close()    with open(file_path,"wb") as handler:        handler.write(html)    print "%s has been downloaded successfully "%file_url    print "Total cost:%.3f ms"%(time.time()-start)def download(url_path):    start = 82    for pageNum in range(start,start+10):        try:            combine_url=url_path%pageNum            response=urllib2.urlopen(combine_url)            page=response.read() if response.getcode()==200 else None            """ Start parsing the HTML from web page"""            if not page:                return            soup = BeautifulSoup(page,"html.parser")            img_url=soup.find_all('img',id="main-comic")            #parse the url            url_parse=urlparse(url_path)            #rebuild the url            rebuild_url= url_parse.scheme+':'+img_url[0].get('src')            #download comic from url            get_name=get_file_name(rebuild_url)            save_files(rebuild_url, BASE_URL%'/'.join(get_name))        except HTTPError, e:            print "An error has accour",e            continue        finally:            response.close()if __name__ == '__main__':    req_url="http://explosm.net/comics/%s"    get_save_path(req_url)    download(req_url)

评论关闭