测试环境

Python 3.6

Win10

代码实现

#!/usr/bin/env python 3.4.0
#-*- encoding:utf-8 -*- __author__ = 'shouke' import xml.etree.ElementTree as ET def compare_xml_node_attributes(xml_node1, xml_node2):
result = []
node1_attributes_dict = xml_node1.attrib
node2_attributes_dict = xml_node2.attrib
for attrib1, value in node1_attributes_dict.items():
value2 = node2_attributes_dict.get(attrib1)
if value == value2:
node2_attributes_dict.pop(attrib1)
else:
if value2:
attrib2 = attrib1
node2_attributes_dict.pop(attrib2)
else:
attrib2 = '不存在'
result.append('结点1属性:{attrib1} 值:{value1},结点2属性:{attrib1} 值:{value2}'.format(attrib1=attrib1 or '不存在',
value1=value or '不存在',
attrib2=attrib2,
value2=value2 or '不存在')) for attrib2, value2 in node2_attributes_dict.items():
result.append('结点1属性:{attrib1} 值:{value1},结点2属性:{attrib1} 值:{value2}'.format(attrib1='不存在',
value1='不存在',
attrib2=attrib2,
value2=value2))
return result def compare_xml_node_children(xml_node1, xml_node2, node1_xpath, node2_xpath):
def get_node_children(xml_node, node_xpath):
result = {}
for child in list(xml_node):
if child.tag not in result:
result[child.tag] = [{'node':child, 'xpath': '%s/%s[%s]' % (node_xpath, child.tag, 1)}]
else:
result[child.tag].append({'node':child, 'xpath': '%s/%s[%s]' % (node_xpath, child.tag, len(result[child.tag])+1)})
return result result = []
children_of_node1_dict = get_node_children(xml_node1, node1_xpath)
children_of_node2_dict = get_node_children(xml_node2, node2_xpath) temp_list1 = []
temp_list2 = []
for child_tag, child_node_list in children_of_node1_dict.items():
second_child_node_list = children_of_node2_dict.get(child_tag, [])
if not second_child_node_list:
# 获取xml1中比xml2中多出的子结点
for i in range(0, len(child_node_list)):
temp_list1.append('%s/%s[%s]' % (node1_xpath, child_node_list[i]['node'].tag, i+1))
continue for first_child, second_child in zip(child_node_list, second_child_node_list):
result.extend(compare_xml_nodes(first_child['node'], second_child['node'], first_child['xpath'], second_child['xpath'])) # 获取xml2中对应结点比xml1中对应结点多出的同名子结点
for i in range(len(child_node_list), len(second_child_node_list)):
temp_list2.append('%s/%s[%s]' % (node2_xpath, second_child_node_list[i]['node'].tag, i+1))
children_of_node2_dict.pop(child_tag) if temp_list1:
result.append('子结点不一样:xml1结点(xpath:{xpath1})比xml2结点(xpath:{xpath2})多了以下子结点:\n{differences}'.format (xpath1=node1_xpath,
xpath2=node2_xpath,
differences='\n'.join(temp_list1)))
# 获取xml2比xml1中多出的子结点
for child_tag, child_node_list in children_of_node2_dict.items():
for i in range(0, len(child_node_list)):
temp_list2.append('%s/%s[%s]' % (node1_xpath, child_node_list[i]['node'].tag, i+1)) if temp_list2:
result.append('子结点不一样:xml1结点(xpath:{xpath1})比xml2结点(xpath:{xpath2})少了以下子结点:\n{differences}'.format (xpath1=node1_xpath,
xpath2=node2_xpath,
differences='\n'.join(temp_list2)))
return result def compare_xml_nodes(xml_node1, xml_node2, node1_xpath='', node2_xpath=''):
result = []
# 比较标签
if xml_node1.tag != xml_node2.tag:
result.append('标签不一样:xml1结点(xpath:{xpath1}):{tag1},xml2结点(xpath:{xpath2}):{tag2}'.format (xpath1=node1_xpath,
tag1=xml_node1.tag,
xpath2=node2_xpath,
tag2=xml_node2.tag)) # 比较文本
if xml_node1.text != xml_node2.text:
result.append('文本不一样:xml1结点(xpath:{xpath1}):{text1},xml2结点(xpath:{xpath2}):{text2}'.format (xpath1=node1_xpath,
tag1=xml_node1.text or '',
xpath2=node2_xpath,
tag2=xml_node2.text or '')) # 比较属性
res = compare_xml_node_attributes(xml_node1, xml_node2)
if res:
result.append('属性不一样:xml1结点(xpath:{xpath1}),xml2结点(xpath:{xpath2}):\n{differences}'.format (xpath1=node1_xpath,
xpath2=node2_xpath,
differences='\n'.join(res)))
# 比较子结点
res = compare_xml_node_children(xml_node1, xml_node2, node1_xpath, node2_xpath)
if res:
result.extend(res) return result def compare_xml_strs(xml1_str, xml2_str, mode=3):
'''
@param: mode 比较模式,预留,暂时没用。目前默认 xml 子元素如果为列表,则列表有序列表,按序比较
'''
root1 = ET.fromstring(xml1_str.strip())
root2 = ET.fromstring(xml2_str.strip()) return compare_xml_nodes(root1, root2, '/%s' % root1.tag, '/%s' % root2.tag)

测试运行

xml_str1 = '''
<?xml version = "1.0" encoding="utf-8" ?>
<data>
<country name="Liechtenstein">
<rangk>1</rangk>
<year>2008</year>
<gdppc>141100</gdppc>
<neighbor name="Austria" direction="E" ></neighbor>
<neighbor name="Switzerland" direction="W" ></neighbor>
</country>
<country name="Singpore">
<rank>4</rank>
<year>2011</year>
<gdppc>59900</gdppc>
<neighbor name="Malaysia" direction="N" ></neighbor>
</country>
<country name="Panama">
<rank>68</rank>
<year>2011</year>
<gdppc>13600</gdppc>
<neighbor name="Costa Rica" direction="W" ></neighbor>
<neighbor name="Colombia" direction="W" ></neighbor>
</country>
</data>
'''
xml_str2 = '''
<?xml version = "1.0" encoding="utf-8" ?>
<data>
<country name="Liechtenstein">
<rangk>1</rangk>
<year>2008</year>
<gdppc>141100</gdppc>
<neighbor name="Austria" direction="E" ></neighbor>
<neighbor name="Switzerland" direction="W" ></neighbor>
</country>
<country name="Singpore">
<rank>4</rank>
<year>2011</year>
<gdppc>59900</gdppc>
<neighbor name="Malaysia" direction="N" ></neighbor>
</country>
<country name="Panama">
<rank>68</rank>
<year>2011</year>
<gdppc>13600</gdppc>
<neighbor name="Costa Rica" direction="W" ></neighbor>
<neighbor name="Colombia" direction="W" ></neighbor>
</country>
</data>
''' xml_str3 = '''
<?xml version = "1.0" encoding="utf-8" ?>
<data>
<class name="computer">
<rangk>1</rangk>
<year>unknow</year>
<addr>sz</addr>
<book name="java programming" price="10" ></book>
<book name="python programming" price="10" ></book>
</class>
<class name="philosophy">
<rangk>2</rangk>
<year>unknown</year>
<book name="A little history of philosophy" price="15" ></book>
<book name="contemporary introduction" price="15" ></book>
</class>
<class name="history">
<rangk>3</rangk>
<year>unknown</year>
<addr>other addr</addr>
<book name="The South China Sea" price="10" ></book>
<book name="Chinese Among Others" price="10" ></book>
</class>
</data>
''' xml_str4 = '''
<?xml version = "1.0" encoding="utf-8" ?>
<data>
<class name="computer">
<year>unknow</year>
<addr>sz</addr>
<book name="java programming" price="10" ></book>
<book name="python programming" price="10" ></book>
</class>
<class name="philosophy">
<year>unknown</year>
<addr>other addr</addr>
<book name="A little history of philosophy" price="15" ></book>
<book name="contemporary introduction" price="16" ></book>
</class>
</data>
''' if __name__ == '__main__':
res_list = compare_xml_strs(xml_str1, xml_str2)
if res_list:
print('xml1和xml2不一样:\n%s' % '\n'.join(res_list))
else:
print('xml1和xml2一样') res_list = compare_xml_strs(xml_str3, xml_str4)
if res_list:
print('xml3和xml4不一样:\n%s' % '\n'.join(res_list))
else:
print('xml3和xml4一样')

运行结果

xml1和xml2一样
xml3和xml4不一样:
子结点不一样:xml1结点(xpath:/data/class[1])比xml2结点(xpath:/data/class[1])多了以下子结点:
/data/class[1]/rangk[1]
属性不一样:xml1结点(xpath:/data/class[2]/book[2]),xml2结点(xpath:/data/class[2]/book[2]):
结点1属性:price 值:15,结点2属性:price 值:16
子结点不一样:xml1结点(xpath:/data/class[2])比xml2结点(xpath:/data/class[2])多了以下子结点:
/data/class[2]/rangk[1]
子结点不一样:xml1结点(xpath:/data/class[2])比xml2结点(xpath:/data/class[2])少了以下子结点:
/data/class[2]/addr[1]

Python 基于xml.etree.ElementTree实现XML对比的更多相关文章

  1. Python中xml.etree.ElementTree读写xml文件实例

    import osimport xml.etree.ElementTree as ET'''Python 标准库中,提供了6种可以用于处理XML的包,本文举实例说明第6种1.xml.dom2.xml. ...

  2. python3.x中xml.etree.ElementTree解析xml举例

    1.新建xml import xml.etree.ElementTree as ETa=ET.Element('elem')c=ET.SubElement(a,'child1')c.text=&quo ...

  3. python xml.etree.ElementTree解析xml文件获取节点

    <?xml version = "1.0" encoding = "utf-8"?> <root> <body name=&quo ...

  4. python 解析xml遇到xml.etree.ElementTree.ParseError: not well-formed (invalid token): line 4, column 34

    在调试数字驱动用xml文件的方式时,包含读取xml文件的步骤,运行程序报错: d:\test\0629>python XmlUtil.pyTraceback (most recent call ...

  5. python标准库xml.etree.ElementTree的bug

    使用python生成或者解析xml的方法用的最多的可能就数python标准库xml.etree.ElementTree和lxml了,在某些环境下使用xml.etree.ElementTree更方便一些 ...

  6. python模块:xml.etree.ElementTree

    """Lightweight XML support for Python. XML is an inherently hierarchical data format, ...

  7. python xml.etree.ElementTree模块

    使用的XML文件如下:file.xml <?xml version="1.0"?> <data name="ming"> <cou ...

  8. Python 标准库之 xml.etree.ElementTree

    Python 标准库之 xml.etree.ElementTree Python中有多种xml处理API,常用的有xml.dom.*模块.xml.sax.*模块.xml.parser.expat模块和 ...

  9. python解析xml文件之xml.etree.cElementTree和xml.etree.ElementTree区别和基本使用

    1.解析速度:ElementTree在 Python 标准库中有两种实现.一种是纯 Python 实现例如 xml.etree.ElementTree ,另外一种是速度快一点的 xml.etree.c ...

  10. [python 学习] 使用 xml.etree.ElementTree 模块处理 XML

    ---恢复内容开始--- 导入数据(读文件和读字符串) 本地文件 country_data.xml <?xml version="1.0"?> <data> ...

随机推荐

  1. 深入理解 Swoole 的底层加载原理

    首发原文链接:深入理解 Swoole 的底层加载原理 PHP 扩展加载 我们从 php-src/sapi/cli/php_cli.c:1159 文件的入口函数 int main(int argc, c ...

  2. Android 12(S) MultiMedia Learning(九)MediaCodec

    这一节来学习MediaCodec的工作原理,相关代码路径: http://aospxref.com/android-12.0.0_r3/xref/frameworks/av/media/libstag ...

  3. 记录一次由nginx配置引发出来的一系列的缓存问题

    问题描述: 在做一个企业微信的移动端项目时,每次修改代码后并且打包.部署完毕,再次打开页面总是会有上一次的缓存,一开始以为是cookie和webStorage缓存导致的,然后每次清除还是有缓存,后来把 ...

  4. 理解Vue 3响应式系统原理

    title: 理解Vue 3响应式系统原理 date: 2024/5/28 15:44:47 updated: 2024/5/28 15:44:47 categories: 前端开发 tags: Vu ...

  5. Docker 启动 Redis 就停止解决方案(2022-3)

    启动命令如下: docker run -itd \ -p 6379:6379 \ --name myredis \ -v /home/redis/redis.conf:/etc/redis/redis ...

  6. web服务器 传统开发和前后端分离开发 服务器相关概念

    web服务器 Web服务器一般指的是网站服务器,是指驻留因特网上某一台或N台计算机的程序,可以处理浏览器等Web客户端的请求并返回相应响应,目前最主流的三个Web服务器是Apache. Nginx . ...

  7. gRPC入门学习之旅(十)

    gRPC入门学习之旅目录 gRPC入门学习之旅(一) gRPC入门学习之旅(二) gRPC入门学习之旅(三) gRPC入门学习之旅(四) gRPC入门学习之旅(七)  gRPC入门学习之旅(九) 3. ...

  8. PB通过OLE方式调用C#.NET DLL时,DLL获取自身根目录

    PB通过OLE方式调用C#.NET DLL时, DLL获取自身根目录 System.IO.Path.GetDirectoryName(System.Reflection.Assembly.GetExe ...

  9. mysql case when使用

    ## mysql case when使用 SELECT order_no,case is_test when 0 then '否'when 1 then '是'end as '是否测试' from ` ...

  10. CF1854E Games Bundles

    乱搞题 设个 \(dp[i]\) 表示和为 \(i\) 的子序列个数,那么转移是容易的, \(dp[j]+=dp[j-i]\) ,然后就判下 \(dp[60]+dp[60-i]\) 是否大于 \(m\ ...