Python 中文处理问题——分句,python分句,#片断1def Cut(


#片断1def Cut(cutlist,lines):       l = []       line = []       for i in lines:           if FindTok(cutlist,i):               l.append("".join(line))               l.append(i)               line = []           else:               line.append(i)       return l  #片断2cutlist = "[。,,!……!《》<>\\"'::?\\?、\\|“”‘’;]{}(){}【】(){}():?!。,;、~——+%%`:“”"'‘\\n\\r".decode('gbk')   for lines in file(inputfilename):       l = Cut(list(cutlist),list(lines.decode('gbk')))       for line in l:          if line.strip() <> "":#这里可能包含空格               li = line.strip().split()               for sentence in li:                   print "se:",sentence#该片段来自于http://byrx.net

评论关闭