python xml 节点包含命名空间解析zip 压缩包

admin

# -*- coding: utf-8 -*-
import zipfile
import gzip
from xml.etree import ElementTree as ET
ZipFile=zipfile.ZipFile('E:/sipo/20200519/20200519-1-001.ZIP')
GZFile=ZipFile.read('1/CN102020000283503CN00001111659690AFULZH20200519CN000/CN102020000283503CN00001111659690AFULZH20200519CN000.XML')
xml=GZFile.decode(encoding="utf-8")
file_handle=open('1.xml',"w", encoding="utf-8")
file_handle.write(xml)
file_handle.close()
tree = ET.parse('1.xml')
#for i in tree.iter():
    print(i.tag)
#exit()
root = tree.getroot()
namespace = '{http://www.sipo.gov.cn/XMLSchema/business}'
iter_root = root.iter(namespace + 'BibliographicData')  # 直接给标签拼接上xmlns
ret = []

def get_xml_content(iter_root):
    """递归获取xml标签内容"""
    for node in iter_root:
        dic = node.attrib
        text = node.text
        #print(text)
        if len(dic) == 0 and text == '\n':
            pass
        else:
            dic['text'] = text
            t = node.tag.split('}')[-1]
            ret.append({t: dic})
        get_xml_content(node)


get_xml_content(iter_root)

print(ret)```