python多线程下载网页,python多线程网页,多线程下载文件[Pyth


多线程下载文件

[Python]代码

import httplibimport urllib2import timefrom threading import Threadfrom Queue import Queuefrom time import sleepproxy = 'your proxy';opener = urllib2.build_opener( urllib2.ProxyHandler({'http':proxy}) )urllib2.install_opener( opener )ids = {};for i in range(1,110):    try:        listUrl = "http://www.someweb.net/sort/list_8_%d.shtml" % (i);        print listUrl;        page = urllib2.urlopen(listUrl).read();        speUrl = "http://www.someweb.net/soft/";        speUrlLen = len(speUrl);        idx = page.find(speUrl,0);        while idx!=-1:            dotIdx = page.find(".",idx + speUrlLen);            if dotIdx != -1:                id = page[idx + speUrlLen:dotIdx];                ids[id] = 1;            idx = page.find("http://www.someweb.net/soft/",idx + speUrlLen);    except:        pass;q = Queue()NUM = 5failedId = [];def do_somthing_using(id):    try:        url = "http://www.someweb.net/download.php?softid=%s&type=dx" % (id);        h2 = httplib.HTTPConnection("your proxy", "you port");        h2.request("HEAD", url);        resp = h2.getresponse();        header = resp.getheaders();        location = header[3][1];                sContent = urllib2.urlopen(location).read();        savePath = "C:\\someweb\\%s.rar" % (id);        file=open(savePath,'wb');        file.write(sContent);        file.close();           print savePath + " saved";    except:        pass;def working():    while True:        arguments = q.get()        do_somthing_using(arguments)        sleep(1)        q.task_done()for i in range(NUM):    t = Thread(target=working)    t.setDaemon(True)    t.start()for id in ids:    q.put(id)q.join()

评论关闭