简单的批量下载工具,简单批量下载工具,Read URLs li


Read URLs list from file;

Download each URL by chunk;

Save each file into specified directory;

#!/usr/bin/python# -*- coding: utf-8 -*-import sys, osimport urllib.request'''Update Logs:version 1.0: createdversion 1.1: add try ... catch for HttpError handlingversion 1.2: can save file to disk by chunk'''# --- class ---class MultiDownload():    '''    download urls in list, ignore one not in self.pfix    save each name according to its url name;    save them into directory fold;    '''    def __init__(self, urls, fold):        self.urls = urls        self.fold = self.pathFormat(fold)        # download by chunk        self.chunk = 512        self.pfix = ("jpg", "JPG", "JPEG", "jpeg", "png", "PNG")    def pathFormat(self, path):        """        make sure the path is a right directory path;        and end with '/'        """        path = path.strip()        path = path.replace("\\\\", "/")        size = len(path)        if(path[size-1]!="/"):            path += "/"        return path    def getPostfix(self, url):        if "." not in url:            return None        p = url.rfind(".") + 1        return url[p:]    def getFileName(self, url):        if "/" not in url:            return None        p = url.rfind("/") + 1        return url[p:]    def down(self, url):        filename = self.getFileName(url)        path = self.fold + filename        f = open(path, 'wb')        try:            w = urllib.request.urlopen(url)        except:            print("[ERROR]  %s" % (url))        else:            # save file by chunk (if file size is too big)            while True:                chunkD = w.read(self.chunk*1024)                if len(chunkD)==0 :                    break                f.write(chunkD)                print(" - Download %dK data" % (len(chunkD)/1024))            #data = w.read()            #f.write(data)            w.close()        finally:                        f.close()             def run(self):        for line in self.urls:            #if self.getPostfix(line) not in self.pfix:            #    continue            self.down(line)            print(line)        return True# --- global functions ---# --- main ---'''download files listed in file'''# variablesinputfile = input("input filename: ")targetList = set()basedir = input("where do you want to save files? ")# get url listf = open(inputfile, 'r')for line in f:    line = line.strip()    if len(line)>3:        targetList.add(line)f.close()# download each filemd = MultiDownload(targetList, basedir)md.run()#该片段来自于http://byrx.net

评论关闭