批量提取word格式的调查表信息,提取word调查表,#coding:utf-


#coding:utf-8import osimport win32comfrom win32com.client import Dispatch, constantsfrom docx import Documentdef parse_doc(f):    """读取doc,返回姓名和行业    """    doc = w.Documents.Open( FileName = f )    t = doc.Tables[0]  # 根据文件中的图表选择信息    name = t.Rows[0].Cells[1].Range.Text        situation = t.Rows[0].Cells[5].Range.Text    people = t.Rows[1].Cells[1].Range.Text    title = t.Rows[1].Cells[3].Range.Text        print name, situation, people,title    doc.Close()def parse_docx(f):    """读取docx,返回姓名和行业    """    d = Document(f)    t = d.tables[0]    name = t.cell(0,1).text    situation = t.cell(0,8).text    people = t.cell(1,2).text    title = t.cell(1,8).text    print name, situation, people,titleif __name__ == "__main__":    w = win32com.client.Dispatch('Word.Application')    # 遍历文件    PATH = "H:\work\\aaa"  # windows文件路径    doc_files = os.listdir(PATH)    for doc in doc_files:        if os.path.splitext(doc)[1] == '.docx':            try:                parse_docx(PATH+'\\'+doc)            except Exception as e:                print e        elif os.path.splitext(doc)[1] == '.doc':            try:                parse_doc(PATH+'\\'+doc)            except Exception as e:                print e

评论关闭