# -*- coding: utf-8 -*-
import zipfile
import gzip
from xml.etree import ElementTree as ET
ZipFile=zipfile.ZipFile('E:/sipo/20200519/20200519-1-001.ZIP')
GZFile=ZipFile.read('1/CN102020000283503CN00001111659690AFULZH20200519CN000/CN102020000283503CN00001111659690AFULZH20200519CN000.XML')
xml=GZFile.decode(encoding="utf-8")
file_handle=open('1.xml',"w", encoding="utf-8")
file_handle.write(xml)
file_handle.close()
tree = ET.parse('1.xml')
#for i in tree.iter():
print(i.tag)
#exit()
root = tree.getroot()
namespace = '{http://www.sipo.gov.cn/XMLSchema/business}'
iter_root = root.iter(namespace + 'BibliographicData') # 直接给标签拼接上xmlns
ret = []
def get_xml_content(iter_root):
"""递归获取xml标签内容"""
for node in iter_root:
dic = node.attrib
text = node.text
#print(text)
if len(dic) == 0 and text == '\n':
pass
else:
dic['text'] = text
t = node.tag.split('}')[-1]
ret.append({t: dic})
get_xml_content(node)
get_xml_content(iter_root)
print(ret)```