Skip to content Skip to sidebar Skip to footer

Create A Dataframe From A Xml File With The Paths And The Value

Here is the data from the xml file, <

Solution 1:

Try this.

from simplified_scrapy import SimplifiedDoc, utils
rows = []
rows.append(['Path', 'Value'])
xml = utils.getFileContent('file_try.xml')
doc = SimplifiedDoc(xml)
body = doc.select('SOAP-ENV:Body')

def getPathValue(node, path):
    path = path + '/' + node['tag'] # Splicing path
    children = node.children
    if children:
        traverseNodes(children, path)
    else:
        rows.append([path, node.text])

def traverseNodes(nodes, path):
    for node in nodes:  # Traversing child nodes
        getPathValue(node, path)

traverseNodes(body.children, "Body")

# print(rows)
utils.save2csv('data_2.csv', rows)

Result:

[['Body/ADD_LandIndex_001/CNTROLAREA/BSR/status', 'ADD'], ['Body/ADD_LandIndex_001/CNTROLAREA/BSR/NOUN', 'LandIndex'], ['Body/ADD_LandIndex_001/CNTROLAREA/BSR/REVISION', '001'], ['Body/ADD_LandIndex_001/DATAAREA/LandIndex/reportId', 'AMI100031'], ['Body/ADD_LandIndex_001/DATAAREA/LandIndex/requestKey', 'R3278458'], 
...

Solution 2:

I find my mistake by looking on the answer of @yazz.

Here is the code :

from lxml import etree as et
import pandas as pd
import os


filename = 'file_try.xml' 
namespace = '{http://schemas.xmlsoap.org/soap/envelope/}'withopen(filename, 'rb') as file: 
    root = et.parse(file).getroot()
    
tree = et.ElementTree(root) 

col_name = ['Path', 'Value']
data = []

deftraverse(el,d):
    
    iflen(list(el)) > 0:
        for child in el:
            traverse(child,d)

    else:

        if el.text isnotNone:
            d.append([(tree.getelementpath(el)+str(el.xpath('@Ccy'))).replace(namespace,''), el.text])
            print(d)
            
    return d

df = pd.DataFrame(traverse(root,data), columns = col_name)

df.to_excel("data_2.xlsx") 

Post a Comment for "Create A Dataframe From A Xml File With The Paths And The Value"