Python Key Error While Extracting From XML File - python

I'm trying to extract data from XML files and I can able to extract data from single file only or one by one but I want to extract them all instead of calling file name one by one and There are near 100 files inside the folder and files name started with numbers. Like 1.xml, 2.xml etc. Here is my XML file and python code. Please have a look. I'm a facing a KeyError: 'Value'
<ClinicalDocument xmlns="urn:hl7-org:v3" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:voc="urn:hl7-org:v3/voc" xmlns:sdtc="urn:hl7-org:sdtc" xsi:schemaLocation="CDA.xsd">
<realmCode code="US"/>
<languageCode code="en-US"/>
<recordTarget>
<patientRole>
<addr use="HP">
<streetAddressLine>3345 Elm Street</streetAddressLine>
<city>Aurora</city>
<state>CO</state>
<postalCode>80011</postalCode>
<country>US</country>
</addr>
<telecom value="tel:+1(303)-554-8889" use="HP"/>
<patient>
<name use="L">
<given>Janson</given>
<given>J</given>
<family>Example</family>
</name>
</patient>
</patientRole>
</recordTarget>
</ClinicalDocument>
Python Code
import os
import xml.etree.ElementTree as ET
path = 'C:\\Users\\Downloads\\files'
for filename in os.listdir(path):
if not filename.endswith('.xml'):
continue
fullname = os.path.join(path, filename)
tree = ET.parse(fullname)
root = tree.getroot()
for leads in tree.findall('.//{urn:hl7-org:v3}patientRole'):
number = leads.find('{urn:hl7-org:v3}telecom').attrib['value']
print(number)

import os
import xml.etree.ElementTree as ET
txt = """<ClinicalDocument> xmlns="urn:hl7-org:v3" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:voc="urn:hl7-org:v3/voc" xmlns:sdtc="urn:hl7-org:sdtc" xsi:schemaLocation="CDA.xsd">
<realmCode code="US"/>
<languageCode code="en-US"/>
<recordTarget>
<patientRole>
<addr use="HP">
<streetAddressLine>3345 Elm Street</streetAddressLine>
<city>Aurora</city>
<state>CO</state>
<postalCode>80011</postalCode>
<country>US</country>
</addr>
<telecom value="tel:+1(303)-554-8889" use="HP"/>
<patient>
<name use="L">
<given>Janson</given>
<given>J</given>
<family>Example</family>
</name>
</patient>
</patientRole>
</recordTarget>
</ClinicalDocument>"""
tree = ET.fromstring(txt)
for leads in tree.findall('.//*patientRole/telecom'):
print(leads.attrib["value"])
# tel:+1(303)-554-8889

Related

How do I sort XML alphabetically using python?

I have some XML files that i want to sort by the element name. These xml files are considered used as Profiles in my salesforce sandbox/org. Ive built some code that takes an xml file and appends it to the bottom of each profile xml file.Allowing me to add code to multiple files all at once rather than having to copy/paste to each file. The issue here, the xml needs to be sorted alphabetically by the element name, ex:(classAccesses, fieldPermissions, layoutAssignments, recordTypeVisibilities, objectPermissions) I have pasted an example of the xml below. The format of the file needs to be consistent and cant change as salesforce might not like it.
<?xml version="1.0" encoding="UTF-8"?>
<Profile xmlns="http://soap.sforce.com/2006/04/metadata">
<fieldPermissions>
<editable>false</editable>
<field>Branch_Queue__c.Cell_Phone_Number__c</field>
<readable>true</readable>
</fieldPermissions>
<fieldPermissions>
<editable>false</editable>
<field>Branch_Queue__c.Branch__c</field>
<readable>true</readable>
</fieldPermissions>
<fieldPermissions>
<editable>false</editable>
<field>Branch_Queue__c.Source__c</field>
<readable>true</readable>
</fieldPermissions>
<fieldPermissions>
<editable>false</editable>
<field>Branch_Queue__c.Served_By__c</field>
<readable>true</readable>
</fieldPermissions>
<fieldPermissions>
<editable>false</editable>
<field>Branch_Queue__c.Update__c</field>
<readable>true</readable>
</fieldPermissions>
<recordTypeVisibilities>
<default>false</default>
<recordType>Knowledge__kav.RealEstate</recordType>
<visible>true</visible>
</recordTypeVisibilities>
<recordTypeVisibilities>
<default>false</default>
<recordType>Knowledge__kav.RealEstate_Community_Connection</recordType>
<visible>true</visible>
</recordTypeVisibilities>
<objectPermissions>
<allowCreate>false</allowCreate>
<allowDelete>false</allowDelete>
<allowEdit>false</allowEdit>
<allowRead>true</allowRead>
<modifyAllRecords>false</modifyAllRecords>
<object>Branch_Queue__c</object>
<viewAllRecords>true</viewAllRecords>
</objectPermissions>
<classAccesses>
<apexClass>BranchQueueDisplayList</apexClass>
<enabled>true</enabled>
</classAccesses>
<classAccesses>
<apexClass>BranchQueueDisplayList_Test</apexClass>
<enabled>true</enabled>
</classAccesses>
<classAccesses>
<apexClass>BranchQueueService</apexClass>
<enabled>true</enabled>
</classAccesses>
</Profile>
if it helps, here is the python script i have built. if you have any questions please feel free to ask. Thanks!
import os
import json
directory = 'C:/Users/HB35401/MAXDev/force-app/main/default/profiles' #folder containing profiles to be modified
os.chdir(directory)
newData = 'C:/testXMLBatch/additionalXML/addXML.xml' #xml file to append to profile-xml files.
for nameOfFile in os.listdir(directory): #for each profile in the directory
if nameOfFile.endswith(".xml"):
g = open(newData)
data = g.read() #set the value of the newXML to the data variable
f = open(nameOfFile)
fileContent = f.read() #save the content of the profile to fileContent
if data in fileContent:
print('ERROR: XML is already inside the Profile.' + nameOfFile)
else:
EndLine = fileContent[-11:] #save the </Profile> tag from the bottom of the file to EndLine variable.
#print(EndLine) # theEndLine will be appended back after we add our new XML.
test = fileContent[:-11] #remove the </Profile> tag and write back to the profile the removal of the </Profile> tag
with open(nameOfFile, "w") as w:
w.write(test)
with open(nameOfFile) as t:
fileContent2 = t.read()
#print(fileContent2)
h = open(nameOfFile, "a") #add the new data to the profile along with the </Profile> tag
h.write(data + "\n"+ EndLine)
h.close()
Try this .
from simplified_scrapy import SimplifiedDoc, utils
xml = utils.getFileContent('your xml file.xml')
doc = SimplifiedDoc(xml)
root = doc.Profile
nodes = root.children # Get all nodes
count = len(nodes)
if count:
sorted_nodes = sorted(nodes, key=operator.itemgetter('tag')) # Sort by tag
sorted_htmls = []
for node in sorted_nodes:
sorted_htmls.append(node.outerHtml) # Get the string of sorted nodes
for i in range(0, count):
nodes[i].repleaceSelf(sorted_htmls[i]) # Replace the nodes in the original text with the sorted nodes
print(doc.html)

Generate XML tree with values using py script

I am new to python and would like to create XML tree with values.
I want to put both jsc://xxx.js" files as well as "EXT.FC.XML" under resource & policy element in XML via python code. All jsc://xxx.js" and "EXT.FC.XML" files are stored in my local folder named "resources" and "policies".
The desired output
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<APIProxy revision="2" name="Retirement_Digital_Account_Balance">
<ManifestVersion>SHA-512:f9ae03c39bf00f567559e</ManifestVersion>
<Policies>
<Policy>EXT.FC_Env_Host</Policy>
<Policy>EXT.FC_JWTVerf</Policy>
<Policy>EXT.JSC_Handle_Fault</Policy>
</Policies>
<ProxyEndpoints>
<ProxyEndpoint>default</ProxyEndpoint>
</ProxyEndpoints>
<Resources>
<Resource>jsc://createErrorMessage.js</Resource>
<Resource>jsc://jwtHdrExt.js</Resource>
<Resource>jsc://log-variables.js</Resource>
<Resource>jsc://swagger.json</Resource>
<Resource>jsc://tgtDataForm.js</Resource>
</Resources>
</APIProxy>
I use Element tree for converting into xml file, this is the code I run
import xml.etree.ElementTree as ET
from xml.etree.ElementTree import Element, SubElement, Comment
from xml.etree import ElementTree, cElementTree
from xml.dom import minidom
from ElementTree_pretty import prettify
import datetime
import os
generated_on = str(datetime.datetime.now())
#proxy = Element('APIProxy')
proxy = Element('APIProxy', revision = "2", name = "Retirement_Digital_Account_Balance")
ManifestVersion = SubElement(proxy, 'ManifestVersion')
ManifestVersion.text = 'SHA-512:f9ae03c39bf00f567559e'
Policies = SubElement(proxy, 'Policies')
Policy = SubElement(Policies, 'Policy')
path = '/policies'
#files = []
# r=root, d=directories, f = files
for r, d, f in os.walk(path):
for file in f:
if '.xml' in file:
Policy.append(os.path.join(r, file))
for p in Policy:
print(p)
ProxyEndpoints = SubElement(proxy, 'ProxyEndpoints')
ProxyEndpoint = SubElement(ProxyEndpoints, 'ProxyEndpoint')
ProxyEndpoint.text = 'default'
Resources = SubElement(proxy, 'Resources')
Resource = SubElement(Resources, 'Resource')
path = '/Resources'
# r=root, d=directories, f = files
for r, d, f in os.walk(path):
for file in f:
if 'js' in file:
Resource.append(os.path.join(r, file))
for R in Resource:
print(R)
Spec = SubElement(proxy, 'Spec')
Spec.text = ""
#proxy.append(Spec)
proxy.append(Element('TargetServers'))
TargetEndpoints = SubElement(proxy, 'TargetEndpoints')
TargetEndpoint = SubElement(TargetEndpoints, 'TargetEndpoint')
TargetEndpoint.text = 'default'
print(ET.tostring(proxy))
tree = cElementTree.ElementTree(proxy) # wrap it in an ElementTree instance, and save as XML
t = minidom.parseString(ElementTree.tostring(proxy)).toprettyxml() # Since ElementTree write() has no pretty printing support, used minidom to beautify the xml.
tree1 = ElementTree.ElementTree(ElementTree.fromstring(t))
tree1.write("Retirement_Digital_Account_Balance_v2.xml",encoding='UTF-8', xml_declaration=True)
Okay, the code is working but i didnt get the desired output, I got the following:
<?xml version='1.0' encoding='UTF-8'?>
<APIProxy name="Retirement_Digital_Account_Balance" revision="2">
<ManifestVersion>SHA-512:f9ae03c39bf00f567559e</ManifestVersion>
<Policies>
<Policy />
</Policies>
<ProxyEndpoints>
<ProxyEndpoint>default</ProxyEndpoint>
</ProxyEndpoints>
<Resources>
<Resource />
</Resources>
</APIProxy>
How to use loop in ElementTree in python to import the values from folder and create XML tree with its values?

Search multiple xml to find the ones with specific attribute values and copy them to another folder

I want to search through multiple xml files and check for specific attribute values and every time i find the values i need, i want to copy the xml and so on
<foo type="foo">
<foo1 sport="Ghh" Code="349133" timestamp="1553189828.6330519">
<rr result="false" Number="12" id="12" time="17:37:00">
<Trap trap="1">
<Runner id="493434" name="Dunb">
<hh>
<hh id="1" version="1" />
</hh>
</Runner>
</rr>
</foo1>
</foo>
So i want to find all the xml files and copy them with Code="349133" and rr id = "12"/
My code up to this point is the below
import os
import xml.etree.ElementTree as ET
from shutil import copyfile
def process(data):
xml_obj = ET.fromstring(data)
for rr in xml_obj:
for k,v in rr.items():
if k == 'Code' and v == '349133':
return True
return False
path = 'C:/Users/pp/.spyder-py3/data'
xml_files = os.listdir(path)
for xml_file in xml_files:
xml_file_path = os.path.join(path, xml_file)
fp = open(xml_file_path)
data = fp.read()
if process(data):
//copyfile(src, dst)
I need help to add the id attribute on the def process(data) function in order to check it. And then i need your help on the copyfile because doesn't seem to work..
The copyfile was found from a post here on stackoverflow How do I copy a file in Python?
Thanks in advance

Python - Error when trying to convert xml to csv

I have the below code that reads a xml file and tries to convert it to csv. The below works fine, however when the data has one additional sub-level it throws an error child index out of range
Given below is the data set I am trying to work with:
<?xml version="1.0" encoding="utf-8" standalone="yes" ?>
<Document>
<Customer>
<CustomerCode>ABC</CustomerCode>
<CustomerName>ABC Co</CustomerName>
<CustomerBusinessHours>
<CustomerBusinessHoursTimeZoneOffset>1.000000</CustomerBusinessHoursTimeZoneOffset>
</CustomerBusinessHours>
</Customer>
</Document>
Code that I have tried building:
import xml.etree.ElementTree as ET
import csv
tree = ET.parse("/users/desktop/sample.xml")
root = tree.getroot()
# open a file for writing
Resident_data = open('/users/desktop/file.csv', 'w')
# create the csv writer object
csvwriter = csv.writer(Resident_data)
resident_head = []
count = 0
for member in root.findall('Customer'):
resident = []
address_list = []
if count == 0:
CustomerCode = member.find('CustomerCode').tag
resident_head.append(CustomerCode)
CustomerName = member.find('CustomerName').tag
resident_head.append(CustomerName)
CustomerBusinessHours = member[3].tag
resident_head.append(CustomerBusinessHours)
csvwriter.writerow(resident_head)
count = count + 1
CustomerCode = member.find('CustomerCode').text
resident.append(CustomerCode)
CustomerName = member.find('CustomerName').text
resident.append(CustomerName)
CustomerBusinessHours = member[3][1].text
address_list.append(CustomerBusinessHours)
CustomerBusinessHoursTimeZoneOffset = member[3][2].text
address_list.append(CustomerBusinessHoursTimeZoneOffset)
csvwriter.writerow(resident)
Resident_data.close()
I get the below error:
CustomerBusinessHours = member[3][1].text
IndexError: child index out of range
Expected output:
CustomerCode,CustomerName,CustomerBusinessHoursTimeZoneOffset
ABC,ABC Co,1.000000
The code below is able to collect the data you are looking for.
import xml.etree.ElementTree as ET
xml = '''<?xml version="1.0" encoding="utf-8" standalone="yes" ?>
<Document>
<Customer>
<CustomerCode>ABC</CustomerCode>
<CustomerName>ABC Co</CustomerName>
<CustomerBusinessHours>
<CustomerBusinessHoursTimeZoneOffset>1.000000</CustomerBusinessHoursTimeZoneOffset>
</CustomerBusinessHours>
</Customer>
</Document>'''
tree = ET.fromstring(xml)
for customer in tree.findall('Customer'):
print(customer.find('CustomerCode').text)
print(customer.find('CustomerName').text)
print(customer.find('CustomerBusinessHours').find('CustomerBusinessHoursTimeZoneOffset').text)
Output
ABC
ABC Co
1.000000

python XML to CSV Parse result non

i have this xml but having issue parsing it into csv, i tried simple print statement but still getting no value:
<?xml version="1.0" encoding="UTF-8"?>
<Document xmlns="urn:iso:std:iso:20022:tech:xsd:pain.008.001.02" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<CstmrDrctDbtInitn>
<GrpHdr>
<MsgId>1820</MsgId>
<CreDtTm>2016-05-17T11:56:12</CreDtTm>
<NbOfTxs>197</NbOfTxs>
<CtrlSum>136661.81</CtrlSum>
<InitgPty>
<Nm>GS Netherlands CDZ C.V.</Nm>
</InitgPty>
</GrpHdr>
</CstmrDrctDbtInitn>
<CstmrDrctDbtInitn>
<GrpHdr>
<CreDtTm>2016-05-18T10:34:51</CreDtTm>
<NbOfTxs>1</NbOfTxs>
<CtrlSum>758.99</CtrlSum>
<InitgPty>
<Nm>GS Netherlands CDZ C.V.</Nm>
</InitgPty></GrpHdr></CstmrDrctDbtInitn>
</Document>
and i want to iterate value for each node.
So far i have written code as below:
import xml.etree.ElementTree as ET
import csv
with open("D:\Python\Dave\\17_05_16_1820_DD201606B10_Base.xml") as myFile:
tree = ET.parse(myFile)
ns = {'d': 'urn:iso:std:iso:20022:tech:xsd:pain.008.001.02'}
# open a file for writing
Resident_data = open('Bank.csv', 'w')
# create the csv writer object
csvwriter = csv.writer(Resident_data)
resident_head = []
#write Header
MsgId = 'MsgId'
resident_head.append(MsgId)
CreDtTm = 'CreDtTm'
resident_head.append(CreDtTm)
NbOfTxs = 'NbOfTxs'
resident_head.append(NbOfTxs)
CtrlSum = 'CtrlSum'
resident_head.append(CtrlSum)
csvwriter.writerow(resident_head)
for member in tree.findall('.//d:Document/d:CstmrDrctDbtInitn/d:GrpHdr/d:MsgId', ns):
resident = []
#write values
MsgId = member.find('MsgId').text
resident.append(MsgId)
CreDtTm = member.find('CreDtTm').text
resident.append(CreDtTm)
NbOfTxs = member.find('NbOfTxs').text
resident.append(NbOfTxs)
CtrlSum = member.find('CtrlSum').text
resident.append(CtrlSum)
csvwriter.writerow(resident)
Resident_data.close()
I get no error and my Bank.csv has only header but no data please help

Categories