lxml应用例子,lxml例子,#!pythonimpo


#!pythonimport lxml.htmlimport codecs, locale, syssys.stdout = codecs.getwriter(locale.getdefaultlocale()[1])(sys.stdout, 'replace')def process_row(row):   for cell in row.xpath('./td'):      inner_tables=cell.xpath('./table')      if len(inner_tables)<1:         yield cell.text_content()      else:         yield [process_table(t) for t in inner_tables]def process_table(table):   return [process_row(row) for row in table.xpath('./tr')]html= lxml.html.parse("11078.htm")tab=html.xpath("//*[contains(@class,'productlist')]")[0]# Output to terminalfor row in tab.xpath('./tr'):   k=[]   for cell in row.xpath('./td[3]|td[4]/a[1]'):      #inner_tables=cell.xpath('./table')      #if len(inner_tables)<1:      for sib in cell.itertext():        k.append('"'+sib.strip()+'"')   print ','.join(k)#该片段来自于http://byrx.net

评论关闭