I have the below code that reads a xml file and tries to convert it to csv. The below works fine, however when the data has one additional sub-level it throws an error child index out of range
Given below is the data set I am trying to work with:
<?xml version="1.0" encoding="utf-8" standalone="yes" ?>
<Document>
<Customer>
<CustomerCode>ABC</CustomerCode>
<CustomerName>ABC Co</CustomerName>
<CustomerBusinessHours>
<CustomerBusinessHoursTimeZoneOffset>1.000000</CustomerBusinessHoursTimeZoneOffset>
</CustomerBusinessHours>
</Customer>
</Document>
Code that I have tried building:
import xml.etree.ElementTree as ET
import csv
tree = ET.parse("/users/desktop/sample.xml")
root = tree.getroot()
# open a file for writing
Resident_data = open('/users/desktop/file.csv', 'w')
# create the csv writer object
csvwriter = csv.writer(Resident_data)
resident_head = []
count = 0
for member in root.findall('Customer'):
resident = []
address_list = []
if count == 0:
CustomerCode = member.find('CustomerCode').tag
resident_head.append(CustomerCode)
CustomerName = member.find('CustomerName').tag
resident_head.append(CustomerName)
CustomerBusinessHours = member[3].tag
resident_head.append(CustomerBusinessHours)
csvwriter.writerow(resident_head)
count = count + 1
CustomerCode = member.find('CustomerCode').text
resident.append(CustomerCode)
CustomerName = member.find('CustomerName').text
resident.append(CustomerName)
CustomerBusinessHours = member[3][1].text
address_list.append(CustomerBusinessHours)
CustomerBusinessHoursTimeZoneOffset = member[3][2].text
address_list.append(CustomerBusinessHoursTimeZoneOffset)
csvwriter.writerow(resident)
Resident_data.close()
I get the below error:
CustomerBusinessHours = member[3][1].text
IndexError: child index out of range
Expected output:
CustomerCode,CustomerName,CustomerBusinessHoursTimeZoneOffset
ABC,ABC Co,1.000000
The code below is able to collect the data you are looking for.
import xml.etree.ElementTree as ET
xml = '''<?xml version="1.0" encoding="utf-8" standalone="yes" ?>
<Document>
<Customer>
<CustomerCode>ABC</CustomerCode>
<CustomerName>ABC Co</CustomerName>
<CustomerBusinessHours>
<CustomerBusinessHoursTimeZoneOffset>1.000000</CustomerBusinessHoursTimeZoneOffset>
</CustomerBusinessHours>
</Customer>
</Document>'''
tree = ET.fromstring(xml)
for customer in tree.findall('Customer'):
print(customer.find('CustomerCode').text)
print(customer.find('CustomerName').text)
print(customer.find('CustomerBusinessHours').find('CustomerBusinessHoursTimeZoneOffset').text)
Output
ABC
ABC Co
1.000000
Related
I am new to python and would like to create XML tree with values.
I want to put both jsc://xxx.js" files as well as "EXT.FC.XML" under resource & policy element in XML via python code. All jsc://xxx.js" and "EXT.FC.XML" files are stored in my local folder named "resources" and "policies".
The desired output
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<APIProxy revision="2" name="Retirement_Digital_Account_Balance">
<ManifestVersion>SHA-512:f9ae03c39bf00f567559e</ManifestVersion>
<Policies>
<Policy>EXT.FC_Env_Host</Policy>
<Policy>EXT.FC_JWTVerf</Policy>
<Policy>EXT.JSC_Handle_Fault</Policy>
</Policies>
<ProxyEndpoints>
<ProxyEndpoint>default</ProxyEndpoint>
</ProxyEndpoints>
<Resources>
<Resource>jsc://createErrorMessage.js</Resource>
<Resource>jsc://jwtHdrExt.js</Resource>
<Resource>jsc://log-variables.js</Resource>
<Resource>jsc://swagger.json</Resource>
<Resource>jsc://tgtDataForm.js</Resource>
</Resources>
</APIProxy>
I use Element tree for converting into xml file, this is the code I run
import xml.etree.ElementTree as ET
from xml.etree.ElementTree import Element, SubElement, Comment
from xml.etree import ElementTree, cElementTree
from xml.dom import minidom
from ElementTree_pretty import prettify
import datetime
import os
generated_on = str(datetime.datetime.now())
#proxy = Element('APIProxy')
proxy = Element('APIProxy', revision = "2", name = "Retirement_Digital_Account_Balance")
ManifestVersion = SubElement(proxy, 'ManifestVersion')
ManifestVersion.text = 'SHA-512:f9ae03c39bf00f567559e'
Policies = SubElement(proxy, 'Policies')
Policy = SubElement(Policies, 'Policy')
path = '/policies'
#files = []
# r=root, d=directories, f = files
for r, d, f in os.walk(path):
for file in f:
if '.xml' in file:
Policy.append(os.path.join(r, file))
for p in Policy:
print(p)
ProxyEndpoints = SubElement(proxy, 'ProxyEndpoints')
ProxyEndpoint = SubElement(ProxyEndpoints, 'ProxyEndpoint')
ProxyEndpoint.text = 'default'
Resources = SubElement(proxy, 'Resources')
Resource = SubElement(Resources, 'Resource')
path = '/Resources'
# r=root, d=directories, f = files
for r, d, f in os.walk(path):
for file in f:
if 'js' in file:
Resource.append(os.path.join(r, file))
for R in Resource:
print(R)
Spec = SubElement(proxy, 'Spec')
Spec.text = ""
#proxy.append(Spec)
proxy.append(Element('TargetServers'))
TargetEndpoints = SubElement(proxy, 'TargetEndpoints')
TargetEndpoint = SubElement(TargetEndpoints, 'TargetEndpoint')
TargetEndpoint.text = 'default'
print(ET.tostring(proxy))
tree = cElementTree.ElementTree(proxy) # wrap it in an ElementTree instance, and save as XML
t = minidom.parseString(ElementTree.tostring(proxy)).toprettyxml() # Since ElementTree write() has no pretty printing support, used minidom to beautify the xml.
tree1 = ElementTree.ElementTree(ElementTree.fromstring(t))
tree1.write("Retirement_Digital_Account_Balance_v2.xml",encoding='UTF-8', xml_declaration=True)
Okay, the code is working but i didnt get the desired output, I got the following:
<?xml version='1.0' encoding='UTF-8'?>
<APIProxy name="Retirement_Digital_Account_Balance" revision="2">
<ManifestVersion>SHA-512:f9ae03c39bf00f567559e</ManifestVersion>
<Policies>
<Policy />
</Policies>
<ProxyEndpoints>
<ProxyEndpoint>default</ProxyEndpoint>
</ProxyEndpoints>
<Resources>
<Resource />
</Resources>
</APIProxy>
How to use loop in ElementTree in python to import the values from folder and create XML tree with its values?
I try to write a parse function to translate an specific xml data into a data frame in python.
The XML data has the following structure
<?xml version='1.0' encoding='UTF-8'?><package_D15D.HISTORY xmlns="http://xml.mscibarra.com/ns/msci/deal/D15D.HISTORY" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://xml.mscibarra.com/ns/msci/deal/D15D.HISTORY 20140602_20140630_CORE_DM_SEC_MAIN_DAILY.xsd">
<dataset_D15D>
<entry calc_date="2014-06-02" ... >
</entry>
<entry ...
One find a sample of data here.
Well, every row is between two </entry>. I wrote the following function:
def parse_XML(xml_file, df_cols):
xtree = et.parse(xml_file)
xroot = xtree.getroot()
rows = []
for node in xroot:
print(xroot)
res = []
res.append(node.attrib.get(df_cols[0]))
for el in df_cols[1:]:
if node is not None and node.find(el) is not None:
res.append(node.find(el).text)
else:
res.append(None)
rows.append({df_cols[i]: res[i]
for i, _ in enumerate(df_cols)})
out_df = pd.DataFrame(rows, columns=df_cols)
out_df.to_csv('C:/Users/dataa', sep=';', encoding='utf-8')
return out_df
However, the data frame is empty by calling the function.
Any idea?
if the XML file is empty i.e. no root element, how to handle in python? When there are XML records, below Python code works fine but whenever XML file empty, conversion process files. I am looking for an exit instead of failing the process.
XML:
<?xml version = '1.0' encoding = 'UTF-8'?>
<ns2:exportEmpData xmlns:ns2="http://webservice.example.com/"/>
Python Code:
import xml
import csv
import xml.etree.ElementTree as ET
tree = ET.parse('C:/emp/emplist.xml')
root = tree.getroot()
# open a file for writing
Emp_data = open('C:/emp/emplist.csv', 'wb')
# create the csv writer object
csvwriter = csv.writer(Emp_data)
emp_head = []
count = 0
for member in root.findall('emplist'):
emp_nodes = []
if count == 0:
empId = member.find('empId').tag
emp_head.append(empId)
fullName = member.find('fullName').tag
emp_head.append(fullName)
currentAddress = member.find('currentAddress').tag
emp_head.append(currentAddress)
csvwriter.writerow(emp_head)
count = count + 1
empId = member.find('empId').text
emp_nodes.append(empId)
fullName = member.find('fullName').text
emp_nodes.append(fullName)
currentAddress = member.find('currentAddress').attrib.get('city')
emp_nodes.append(currentAddress)
csvwriter.writerow(emp_nodes)
Emp_data.close()
i have a real (and maybe pretty stupid) problem to convert a xml-file into a dataframe from pandas. Im new in python and need some help. I trying a code from another thread and modificate it but it not works.
I want to iterate through this file:
<objects>
<object id="123" name="some_string">
<object>
<id>123</id>
<site id="456" name="somename" query="some_query_as_string"/>
<create-date>some_date</create-date>
<update-date>some_date</update-date>
<update-user id="567" name="User:xyz" query="some_query_as_string"/>
<delete-date/>
<delete-user/>
<deleted>false</deleted>
<system-object>false</system-object>
<to-string>some_string_notifications</to-string>
</object>
<workflow>
<workflow-type id="12345" name="WorkflowType_some_workflow" query="some_query_as_string"/>
<validated>true</validated>
<name>somestring</name>
<exported>false</exported>
</workflow>
Here is my code:
import xml.etree.ElementTree as ET
import pandas as pd
path = "C:/Users/User/Desktop/test.xml"
with open(path, 'rb') as fp:
content = fp.read()
parser = ET.XMLParser(encoding="utf-8")
tree = ET.fromstring(content, parser=parser)
def xml2df(tree):
root = ET.XML(tree)
all_records = []
for i, child in enumerate(root):
record ={}
for subchild in child:
record[subchild.tag] = subchild.text
all_records.append(record)
return pd.DataFrame(all_records)
Where is the problem? Please help :O
You are passing the file location string to ET.fromstring(), which is not the actual contents of the file. You need to read the contents of the file first, then pass that to ET.fromstring().
path = "C:/Users/User/Desktop/test.xml"
with open(path, 'rb') as fp:
content = fp.read()
parser = ET.XMLParser(encoding="utf-8")
tree = ET.fromstring(content, parser=parser)
i have this xml but having issue parsing it into csv, i tried simple print statement but still getting no value:
<?xml version="1.0" encoding="UTF-8"?>
<Document xmlns="urn:iso:std:iso:20022:tech:xsd:pain.008.001.02" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<CstmrDrctDbtInitn>
<GrpHdr>
<MsgId>1820</MsgId>
<CreDtTm>2016-05-17T11:56:12</CreDtTm>
<NbOfTxs>197</NbOfTxs>
<CtrlSum>136661.81</CtrlSum>
<InitgPty>
<Nm>GS Netherlands CDZ C.V.</Nm>
</InitgPty>
</GrpHdr>
</CstmrDrctDbtInitn>
<CstmrDrctDbtInitn>
<GrpHdr>
<CreDtTm>2016-05-18T10:34:51</CreDtTm>
<NbOfTxs>1</NbOfTxs>
<CtrlSum>758.99</CtrlSum>
<InitgPty>
<Nm>GS Netherlands CDZ C.V.</Nm>
</InitgPty></GrpHdr></CstmrDrctDbtInitn>
</Document>
and i want to iterate value for each node.
So far i have written code as below:
import xml.etree.ElementTree as ET
import csv
with open("D:\Python\Dave\\17_05_16_1820_DD201606B10_Base.xml") as myFile:
tree = ET.parse(myFile)
ns = {'d': 'urn:iso:std:iso:20022:tech:xsd:pain.008.001.02'}
# open a file for writing
Resident_data = open('Bank.csv', 'w')
# create the csv writer object
csvwriter = csv.writer(Resident_data)
resident_head = []
#write Header
MsgId = 'MsgId'
resident_head.append(MsgId)
CreDtTm = 'CreDtTm'
resident_head.append(CreDtTm)
NbOfTxs = 'NbOfTxs'
resident_head.append(NbOfTxs)
CtrlSum = 'CtrlSum'
resident_head.append(CtrlSum)
csvwriter.writerow(resident_head)
for member in tree.findall('.//d:Document/d:CstmrDrctDbtInitn/d:GrpHdr/d:MsgId', ns):
resident = []
#write values
MsgId = member.find('MsgId').text
resident.append(MsgId)
CreDtTm = member.find('CreDtTm').text
resident.append(CreDtTm)
NbOfTxs = member.find('NbOfTxs').text
resident.append(NbOfTxs)
CtrlSum = member.find('CtrlSum').text
resident.append(CtrlSum)
csvwriter.writerow(resident)
Resident_data.close()
I get no error and my Bank.csv has only header but no data please help