像fileinput一样使用,处理tarfile文件组,fileinputtarfile,python的filei
像fileinput一样使用,处理tarfile文件组,fileinputtarfile,python的filei
python的fileinput模块是个很好用的工具,可以很方便地对一组文件进行逐行处理;其open_hook可以实现自动地将特定编码格式的行转换为unicode对象;
#!/usr/bin/python# encoding: utf-8## filename: common_lib/utl/tarfileinput.pyimport os.pathimport tarfileclass TarFileInput: def __init__(self, srcfiles, mode='r|gz', coding='utf-8'): self.srcfiles = srcfiles self.coding = coding self.mode = mode self.handle = None self._filename = None self._filelineno = None self._lineno = 0 def isfirstline(self): return self._filelineno == 0 def filelineno(self): return self._filelineno def filename(self): return os.path.join(self._tarname, self._filename) def lineno(self): return self._lineno def _itertarhandle(self): for filename in self.srcfiles: if not os.path.isfile(filename): continue try: handle = tarfile.open(filename, self.mode) self._tarname = filename self._filename = None self._filelineno = None yield handle handle.close() except: continue def __iter__(self): self._lineno = 0 for handle in self._itertarhandle(): for entry in handle: fileobj = handle.extractfile(entry) if not fileobj: continue self._filename = entry.path for self._filelineno, ln in enumerate(fileobj.readlines()): if self.coding: yield ln.decode(self.coding) else: yield ln self._lineno += 1def tester(): import glob tarfiles = glob.glob(r"/home/apps/log/history/*.tar.gz") stream = TarFileInput(tarfiles) for ln in stream: if stream.isfirstline(): print stream.filename(), '='*64 print ln.encode("utf-8"),if __name__ == "__main__": tester()#该片段来自于http://byrx.net
评论关闭