像fileinput一样使用,处理tarfile文件组,fileinputtarfile,python的filei


python的fileinput模块是个很好用的工具,可以很方便地对一组文件进行逐行处理;其open_hook可以实现自动地将特定编码格式的行转换为unicode对象;

#!/usr/bin/python# encoding: utf-8## filename: common_lib/utl/tarfileinput.pyimport os.pathimport tarfileclass TarFileInput:    def __init__(self, srcfiles, mode='r|gz', coding='utf-8'):        self.srcfiles = srcfiles        self.coding = coding        self.mode = mode        self.handle = None        self._filename = None        self._filelineno = None        self._lineno = 0    def isfirstline(self):        return self._filelineno == 0    def filelineno(self):        return self._filelineno    def filename(self):        return os.path.join(self._tarname, self._filename)    def lineno(self):        return self._lineno    def _itertarhandle(self):        for filename in self.srcfiles:            if not os.path.isfile(filename):                continue            try:                handle = tarfile.open(filename, self.mode)                self._tarname = filename                self._filename = None                self._filelineno = None                yield handle                handle.close()            except:                continue    def __iter__(self):        self._lineno = 0        for handle in self._itertarhandle():            for entry in handle:                fileobj = handle.extractfile(entry)                if not fileobj:                    continue                self._filename = entry.path                for self._filelineno, ln in enumerate(fileobj.readlines()):                    if self.coding:                        yield ln.decode(self.coding)                    else:                        yield ln                    self._lineno += 1def tester():    import glob    tarfiles = glob.glob(r"/home/apps/log/history/*.tar.gz")    stream = TarFileInput(tarfiles)    for ln in stream:        if stream.isfirstline():            print stream.filename(), '='*64        print ln.encode("utf-8"),if __name__ == "__main__":    tester()#该片段来自于http://byrx.net

评论关闭