Python多线程下载(2)


 上篇:http://www.2cto.com/kf/201112/113937.html

 

#encoding: gbk
from urllib.request import *
from threading import *
import time
import os

 


PIECE_SIZE = 1024
class Block:
    def __init__(self, fileInfo, startPiece, endPiece, last = False, lastSize = None):
        self.fileInfo = fileInfo
        self.startPiece = startPiece
        self.endPiece = endPiece
        self.last = last
        self.lastSize = lastSize
        # [startPos, endPos]闭区间
        self.startPos = PIECE_SIZE * startPiece
        if not self.last:
            self.endPos = PIECE_SIZE * endPiece - 1
        else:
            self.endPos = PIECE_SIZE * endPiece - (PIECE_SIZE - lastSize) - 1
        print("range:(%d-%d)"%(self.startPos, self.endPos))


    def getNext(self):
        s = -1
        e = self.endPiece
        #print(self.fileInfo.pieceMap[self.startPiece:self.endPiece])
        for i in range(self.startPiece, self.endPiece):
            if s == -1 and self.fileInfo.pieceMap[i] == ord('0'):
                s = i
            if s >= 0 and self.fileInfo.pieceMap[i] == ord('1'):
                e = i
                break
        print("getNext.raw(", s, e, ")")
        if s >= 0 and e > s:
            start = PIECE_SIZE * s
            end = PIECE_SIZE * e
            if end > self.endPos:
                end = self.endPos
            self.pos = start
            print("getNext.result(%d, %d)"%(start, end))
            return (start, end)
        else:
            return None


    def write(self, data):
        self.fileInfo.write(self.pos, data)
        with self.fileInfo.lock:
            s = self.pos
            self.pos += len(data)
            e = self.pos
            if self.last and self.pos >= self.endPos:
                self.fileInfo.pieceMap[self.endPiece - 1] = ord('1')
                #print("set(%d)"%(self.endPiece - 1))
            sp = s // PIECE_SIZE
            ep = e // PIECE_SIZE
            for i in range(sp, ep):
                self.fileInfo.pieceMap[i] = ord('1')
                #print("set(%d)"%i)
            #self.fileInfo.saveInfo()

 

 

 


class FileInfo:
    def __init__(self, fileName, fileSize, numBlocks = 4):
        self.fileName = fileName
        self.fileSize = fileSize
        self.numBlocks = numBlocks
        try:
            self.file = open(self.downname(), "rb+")
        except:
            self.file = open(self.downname(), "wb")
        self.file.truncate(fileSize)
        self.lock = Lock()


        d, v = divmod(fileSize, PIECE_SIZE)
        if v:
            d += 1
        self.numPieces = d
        self.piecePad = 0
        self.pieceCount = d // 8
        if d % 8:
            self.pieceCount + 1
            self.piecePad = 8 - d % 8
        self.pieceMap = bytearray(b"0"*d + b"1" * self.piecePad)
        self.lastPieceSize = v
        pb = self.numPieces // self.numBlocks
        r = []
        for i in range(self.numBlocks):
            r.append(i * pb)
        r.append(self.numPieces)
        self.range = r
        self.blocks = []
        for i in range(self.numBlocks):
            self.blocks.append(Block(self, self.range[i], self.range[i + 1], i == self.numBlocks - 1, self.lastPieceSize))
        print("fileName:%s, size:%d, blocks:%d, pieces:%d, last:%d, range:%s"%(self.fileName, self.fileSize, self.numBlocks, self.numPieces, self.lastPieceSize, self.range))
        self.load()


    def write(self, pos, data):
        with self.lock:
            #print("file.pos = ", self.file.tell())
            #print("write(%d, %d)"%(pos, len(data)))
            self.file.seek(pos)
            self.file.write(data)
    def downname(self):
        return self.fileName + ".down"
    def infoname(self):
        return self.fileName + "info.txt"
    def saveInfo(self):
        with open(self.infoname(), "wb") as inf:
            inf.write(self.bits2hex(self.pieceMap))
    def load(self):
        if os.path.exists(self.infoname()):
            print("****************info.size:", os.path.getsize(self.infoname()), "pieces:", len(self.pieceMap))
        if os.path.exists(self.downname()) and os.path.exists(self.infoname()) and os.path.getsize(self.downname()) == self.fileSize:
            data = open(self.infoname(), "rb").read()
            data = self.hex2bits(data)
            self.pieceMap = bytearray(data)
            print("load:", self.pieceMap)
    def close(self):
        self.file.close()
        os.rename(self.downname(), self.fileName)
        os.remove(self.infoname())
    def bits2hex(self, bits):
        #print("bits:", bits)
        v = int(bits, 2)
        return bytes(hex(v)[2:], "ascii")


    def hex2bits(self, bs):
        #print("hex2bits(", bs, ")")
        v = int(bs, 16)
        return bytes(bin(v)[2:], "ascii")

 

 

www.2cto.com

class DownTask(Thread):
    def __init__(self, url, block):
        self.url = url
        self.block = block
        self.stopIt = False
        super().__init__()


    def run(self):
        print("Task(%s).run(%d, %d) begin"%(self.name, self.block.startPiece, self.block.endPiece))
        while not self.stopIt:
            rg = self.block.getNext()
            print("range:", rg)
            if rg:
                len = rg[1] - rg[0] + 1
                r = Request(url = self.url)
                r.add_header("Range", "bytes=%d-%d"%(rg[0], rg[1]))
                print(r.header_items())
                con = urlopen(r)
                print(con.info().as_string())
                while 1:
                    data = con.read(133)
                    if data:
                        self.block.write(data)
                    else:
                        break
            else:
                break
        print("Task(%s).run(%d, %d) end"%(self.name, self.block.startPiece, self.block.endPiece))
    def stop(self):
        self.stopIt = True


class Downloader:
    def __init__(self, url, numThread = 4, filename = None):
        self.url = url
        self.numThread = numThread
        self.filename = filename if filename else os.path.split(url)[1]


    def start(self):
        con = urlopen(self.url)
        print(con.info().as_string())
        supp = con.headers.get("Accept-Ranges")
        if supp:
            if supp.lower() == "none":
                supp = False
        length = int(con.headers.get("Content-length", "0"))
        cd = con.headers.get("Content-Disposition")
        self._fileName = None
        if cd:
            cd = cd.lower()
            if "filename=" in cd:
                self._filename = cd.split("filename=")[1]


        if supp:
            con.close()
            self.tasks = []
            self.fileInfo = FileInfo(self.filename, length, self.numThread)
            for b in self.fileInfo.blocks:
                self.tasks.append(DownTask(self.url, b))
            for t in self.tasks:
                t.start()
            while any(map(lambda t:t.isAlive(), self.tasks)):
                time.sleep(0.5)
                print(self.fileInfo.bits2hex(self.fileInfo.pieceMap))
                self.fileInfo.saveInfo()
            self.fileInfo.close()
        else:
            while 1:
                f = open(self.fileName, "wb")
                data = con.read(133)
                if data:
                    f.write(data)
                else:
                    break
            f.close()
            con.close()
               

 


d = Downloader("http://www.python.org/ftp/python/3.2.1/python-3.2.1.msi", 6)
d.start()

摘自 lqefn的专栏
 

相关内容

    暂无相关文章

评论关闭