Python解析生成XML-ElementTree VS minidom


本文介绍用Python解析生成以下XML:
 
 
<Persons>
    <Person>
        <Name>LDL</Name>
        <Description Language='English'><![CDATA[cdata text]]></Description>
    </Person>
    <Person>
        <Name>China</Name>
        <Description Language='English'><![CDATA[cdata text]]></Description>
    </Person>
</Persons>
 
1.创建一个xml文件名为src.xml,内容如上,放到c:\temp
 
2.使用ElementTree读取src.xml,并创建一个内容相同的xml名为target-tree.xml。
 
ElementTreeSample.py如下:
 
 
# -*- coding: utf-8 -*-
"""
Sample of xml.etree.ElementTree
 
@author: ldlchina
"""
 
import os
import sys
import logging
import traceback
import xml.etree.ElementTree as ET
import time
 
def copy_node(src_node, target_node):
    # Copy attr
    for key in src_node.keys():
        target_node.set(key, src_node.get(key))
    
    if len(list(src_node)) > 0:
        for child in src_node:
            target_child = ET.Element(child.tag)
            target_node.append(target_child)
            copy_node(child, target_child)
    else:
        target_node.text = src_node.text
    
def read_write_xml(src, target):
    tree = ET.parse(src)
    root = tree.getroot()
    
    target_root = ET.Element(root.tag)
    start_time = time.time() * 1000
    copy_node(root, target_root)
    end_time = time.time() * 1000
    print('copy_node:' + str(end_time - start_time))
    
    target_tree = ET.ElementTree(target_root)
    target_tree.write(target)
    logging.info(target)
 
def main():
    try:
        current_file = os.path.realpath(__file__)
        
        # Configure logger
        log_file = current_file.replace('.py', '.log')
        logging.basicConfig(filename = log_file, filemode = 'w', level = logging.INFO)
        
        # Create console handler
        ch = logging.StreamHandler()
        ch.setLevel(logging.INFO)
        
        logger = logging.getLogger('')
        logger.addHandler(ch)
        
        #src = sys.argv[1]
        #target = sys.argv[2]
        
        # For debugging
        src = 'C:/temp/src.xml'
        target = 'C:/temp/target-tree.xml'
        
        # Generate results
        start_time = time.time() * 1000
        read_write_xml(src, target)
        end_time = time.time() * 1000
        print('read_write_xml:' + str(end_time - start_time))
    except:
        logging.exception(''.format(traceback.format_exc()))
    
    input('Press any key to exit...')
 
main()
 
 3.使用minidom读取src.xml,并创建一个内容相同的xml名为target-dom.xml。
 
MinidomSample.py如下:
 
 
# -*- coding: utf-8 -*-
"""
Sample of xml.dom.minidom
 
@author: ldlchina
"""
 
import os
import sys
import logging
import traceback
import xml.dom.minidom as MD
import time
 
def get_text(n):
    nodelist = n.childNodes
    rc = ""
    for node in nodelist:
        if node.nodeType == node.TEXT_NODE or node.nodeType == node.CDATA_SECTION_NODE:
            rc = rc + node.data
    return rc
 
def copy_node(target_doc, src_node, target_node):
    if not isinstance(src_node, MD.Document) and src_node.hasAttributes():
        for item in src_node.attributes.items():
            target_node.setAttribute(item[0], item[1])
    for node in src_node.childNodes:
        if node.nodeType == node.TEXT_NODE:
            target_child = target_doc.createTextNode(node.nodeValue)
            target_node.appendChild(target_child)
        elif node.nodeType == node.CDATA_SECTION_NODE:
            target_child = target_doc.createCDATASection(node.nodeValue)
            target_node.appendChild(target_child)
        elif node.nodeType == node.ELEMENT_NODE:
            target_child = target_doc.createElement(node.nodeName)
            target_node.appendChild(target_child)
            copy_node(target_doc, node, target_child)
    
def read_write_xml(src, target):
    doc = MD.parse(src)
    target_doc = MD.Document()
 
    start_time = time.time() * 1000
    copy_node(target_doc, doc, target_doc)
    end_time = time.time() * 1000
    print('copy_node: ' + str(end_time - start_time))
    
    # Write to file
    f = open(target, 'w')
    f.write(target_doc.documentElement.toxml())
    f.close()
    logging.info(target)
 
def main():
    try:
        current_file = os.path.realpath(__file__)
        
        # Configure logger
        log_file = current_file.replace('.py', '.log')
        logging.basicConfig(filename = log_file, filemode = 'w', level = logging.INFO)
        
        # Create console handler
        ch = logging.StreamHandler()
        ch.setLevel(logging.INFO)
        
        logger = logging.getLogger('')
        logger.addHandler(ch)
        
        #src = sys.argv[1]
        #target = sys.argv[2]
        
        # For debugging
        src = 'C:/temp/src.xml'
        target = 'C:/temp/target-dom.xml'
        
        # Generate results
        start_time = time.time() * 1000
        read_write_xml(src, target)
        end_time = time.time() * 1000
        print('read_write_xml: ' + str(end_time - start_time))
    except:
        logging.exception(''.format(traceback.format_exc()))
    
    input('Press any key to exit...')
 
main()
 
4.运行ElementTreeSample.py,得到XML如下:
 
<Persons><Person><Name>LDL</Name><Description Language="English">cdata text</Description></Person><Person><Name>China</Name><Description Language="Chinese">cdata text</Description></Person></Persons>
5.运行MinidomSample.py,得到XML如下:
 
<Persons>
    <Person>
        <Name>LDL</Name>
        <Description Language="English"><![CDATA[cdata text]]></Description>
    </Person>
    <Person>
        <Name>China</Name>
        <Description Language="Chinese"><![CDATA[cdata text]]></Description>
    </Person>
</Persons>
 
 
 
ElementTree VS minidom:
 
1.ElementTree执行速度会比minidom快一些。
 
2.ElemenTree不能分析XML的转行和缩进。minidom可以。
 
3.ElemenTree不支持CDATA,minidom可以。

相关内容

    暂无相关文章

评论关闭