Create A Dataframe From A Xml File With The Paths And The Value
Here is the data from the xml file, <
Solution 1:
Try this.
from simplified_scrapy import SimplifiedDoc, utils
rows = []
rows.append(['Path', 'Value'])
xml = utils.getFileContent('file_try.xml')
doc = SimplifiedDoc(xml)
body = doc.select('SOAP-ENV:Body')
def getPathValue(node, path):
path = path + '/' + node['tag'] # Splicing path
children = node.children
if children:
traverseNodes(children, path)
else:
rows.append([path, node.text])
def traverseNodes(nodes, path):
for node in nodes: # Traversing child nodes
getPathValue(node, path)
traverseNodes(body.children, "Body")
# print(rows)
utils.save2csv('data_2.csv', rows)
Result:
[['Body/ADD_LandIndex_001/CNTROLAREA/BSR/status', 'ADD'], ['Body/ADD_LandIndex_001/CNTROLAREA/BSR/NOUN', 'LandIndex'], ['Body/ADD_LandIndex_001/CNTROLAREA/BSR/REVISION', '001'], ['Body/ADD_LandIndex_001/DATAAREA/LandIndex/reportId', 'AMI100031'], ['Body/ADD_LandIndex_001/DATAAREA/LandIndex/requestKey', 'R3278458'],
...
Solution 2:
I find my mistake by looking on the answer of @yazz.
Here is the code :
from lxml import etree as et
import pandas as pd
import os
filename = 'file_try.xml'
namespace = '{http://schemas.xmlsoap.org/soap/envelope/}'withopen(filename, 'rb') as file:
root = et.parse(file).getroot()
tree = et.ElementTree(root)
col_name = ['Path', 'Value']
data = []
deftraverse(el,d):
iflen(list(el)) > 0:
for child in el:
traverse(child,d)
else:
if el.text isnotNone:
d.append([(tree.getelementpath(el)+str(el.xpath('@Ccy'))).replace(namespace,''), el.text])
print(d)
return d
df = pd.DataFrame(traverse(root,data), columns = col_name)
df.to_excel("data_2.xlsx")
Post a Comment for "Create A Dataframe From A Xml File With The Paths And The Value"