Normalizing Character Data Before Output,,import sysfr


import sysfrom xml.parsers import expatdef normalize_whitespace(text):    return ' '.join(text.split())class SimpleParse:    def __init__(self):        self.parser   = expat.ParserCreate()        self.parser.StartElementHandler = self.start_element        self.parser.EndElementHandler = self.end_element        self.parser.CharacterDataHandler = self.character_data        self.cdata = [ ]    def parse(self,file):        self.parser.ParseFile(file)    def print_cdata(self):        txt = normalize_whitespace(''.join(self.cdata))        if txt: print normalize_whitespace(txt)        self.cdata = [ ]    def start_element(self,name,attrs):        self.print_cdata()        print 'Start:',name,attrs    def character_data(self,data):        self.cdata.append(data)    def end_element(self,name):        self.print_cdata()                print 'End:', namep = SimpleParse()p.parse(open(sys.argv[1]))

评论关闭