/* Python Script */
import xml.etree.ElementTree as ET
tree = ET.parse('config.xml')
root = tree.getroot()
updateData = open('config.xml','w+')
print('Root Data is ',root.tag)
print('Root Attribute ',root.attrib)
old_version = root.attrib.values()[0]
print('Old_Version is ',old_version)
def increment_ver(old_version):
old_version = old_version.split('.')
old_version[2] = str(int(old_version[2]) + 1)
print('Old_Version 2 ',old_version[2])
return '.'.join(old_version)
new_Version = increment_ver(old_version);
print('New_version :',new_Version,root.attrib['version'])
root.attrib['version'] = new_Version
print(root.attrib)
tree.write(updateData)
updateData.close()
/* Original Config xml file */
<?xml version='1.0' encoding='utf-8'?>
<widget id="io.ionic.starter" version="0.0.1" xmlns="http://www.w3.org/ns/widgets" xmlns:cdv="http://cordova.apache.org/ns/1.0">
<name>aman</name>
<description>An awesome Ionic/Cordova app.</description>
<author email="hi#ionicframework.com" href="http://ionicframework.com/">Ionic Framework Team</author>
<content src="index.html" />
<access origin="*" />
<allow-intent href="http://*/*" />
<allow-intent href="https://*/*" />
<allow-intent href="tel:*" />
<allow-intent href="sms:*" />
<allow-intent href="mailto:*" />
<allow-intent href="geo:*" />
<preference name="ScrollEnabled" value="false" />
/* New Config.xml file */
<ns0:widget xmlns:ns0="http://www.w3.org/ns/widgets" xmlns:ns1="http://schemas.android.com/apk/res/android" id="io.ionic.starter" version="0.0.2">
<ns0:name>aman</ns0:name>
<ns0:description>An awesome Ionic/Cordova app.</ns0:description>
<ns0:author email="hi#ionicframework.com" href="http://ionicframework.com/">Ionic Framework Team</ns0:author>
<ns0:content src="index.html" />
<ns0:access origin="*" />
<ns0:allow-intent href="http://*/*" />
<ns0:allow-intent href="https://*/*" />
<ns0:allow-intent href="tel:*" />
<ns0:allow-intent href="sms:*" />
<ns0:allow-intent href="mailto:*" />
Once the script gets executed the version number is increased by 1 which i was trying to achieve. But, ns0 tag is added throughout the file and the header XML info tag gets removed [].
Please let me know what i have done wrong.
Your script slightly modified:
import xml.etree.ElementTree as ET
ET.register_namespace('', 'http://www.w3.org/ns/widgets')
tree = ET.parse('config.xml')
# (...) no changes in this part of code.
tree.write(f, xml_declaration=True, encoding="utf-8")
updateData.close()
The result:
<?xml version='1.0' encoding='utf-8'?>
<widget xmlns="http://www.w3.org/ns/widgets" id="io.ionic.starter" version="0.0.2">
<name>aman</name>
<description>An awesome Ionic/Cordova app.</description>
<author email="hi#ionicframework.com" href="http://ionicframework.com/">Ionic Framework Team</author>
<content src="index.html" />
<access origin="*" />
<allow-intent href="http://*/*" />
<allow-intent href="https://*/*" />
<allow-intent href="tel:*" />
<allow-intent href="sms:*" />
<allow-intent href="mailto:*" />
<allow-intent href="geo:*" />
<preference name="ScrollEnabled" value="false" />
</widget>
One of the namespace declarations has been dropped because it was not used in the XML body.
If you want to preserve namespaces use lxml library. In this case, your code would look like this (notice no ET.register_namespace):
import lxml.etree as ET
tree = ET.parse('config.xml')
root = tree.getroot()
updateData = open('config.xml','w+')
# (...) no changes in this part of code.
tree.write(f, xml_declaration=True, encoding="utf-8")
updateData.close()
In this case the output:
<?xml version='1.0' encoding='UTF-8'?>
<widget xmlns="http://www.w3.org/ns/widgets" xmlns:cdv="http://cordova.apache.org/ns/1.0" id="io.ionic.starter" version="0.0.2">
<name>aman</name>
<description>An awesome Ionic/Cordova app.</description>
<author email="hi#ionicframework.com" href="http://ionicframework.com/">Ionic Framework Team</author>
<content src="index.html"/>
<access origin="*"/>
<allow-intent href="http://*/*"/>
<allow-intent href="https://*/*"/>
<allow-intent href="tel:*"/>
<allow-intent href="sms:*"/>
<allow-intent href="mailto:*"/>
<allow-intent href="geo:*"/>
<preference name="ScrollEnabled" value="false"/>
</widget>
Related
We have a requirement to get the data from a SOAP XML Response.
Below is the associated XML file
<?xml version="1.0" encoding="utf-8"?>
<soap:Envelope xmlns:soap="http://www.w3.org/2003/05/soap-envelope" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema">
<soap:Body>
<GetResultResponse xmlns="http://www.relatics.com/">
<GetResultResult>
<Report ReportName="RFC" GeneratedOn="2022-12-22" EnvironmentID="XXXX" EnvironmentName="Systematic Assurance – an XXX Solution" EnvironmentURL="https://XXXX.relaticsonline.com/" WorkspaceID="XXXXX" WorkspaceName="P - ADL Program Management - XXX" TargetDevice="Pc" ReportField="" xmlns="">
<Change_module>
<applied_individual_change_request Change_Request="TestKZIreport" RFC_GUID="XXXXX">
<code RFC_Code="VtW-0101" />
<progress RFC_Progress="agreed" />
<applied_individual_project_organisation Organisation="XXXX" />
<applied__individual_discipline Discipline="Highways" />
<specification Specification="Context of Documents">
<code Specification_Code="1.1.1a" />
</specification>
<applied_individual_workpackage Workpackage="Enabling work">
<code Workpackage_Code="WP-01" />
</applied_individual_workpackage>
<physical_object Physical_Object="Train Station">
<code Physical_Object_Code="TFO-0001" />
</physical_object>
<person approver="XXX" />
<applied_individual_change_consequence_qualification Consequence_Value="10 days">
<applied_conceptual_change_consequence_aspect Consequence_Aspect="Schedule" />
</applied_individual_change_consequence_qualification>
<document Document_Name="WI 300 Design.pdf">
<code Document_Code="DOC-0002" />
</document>
<answer_status BR_Status="no" />
<applied_individual_business_rule Business_Rule="Change Review compliance">
<code BR_Code="BR-006" />
</applied_individual_business_rule>
<applied_individual_change_consequence_qualification Consequence_Value="XXX">
<applied_conceptual_change_consequence_aspect Consequence_Aspect="Finance" />
</applied_individual_change_consequence_qualification>
</applied_individual_change_request>
</Change_module>
</Report>
</GetResultResult>
</GetResultResponse>
</soap:Body>
</soap:Envelope>
i need all the tag values after Change_module.i tried some online help in Stack overflow but it didn't work.
I never worked with XML documents before and here is the sample code i
tried from Stack Overflow.
import xml.etree.ElementTree as ET
import pandas as pd
import numpy as np
tree = ET.parse("Relatics_XML.xml")
root = tree.getroot()
print(root.tag)
print(root.attrib)
namespaces = {"soap": "http://www.w3.org/2003/05/soap-envelope/",
"xsi": "http://www.w3.org/2001/XMLSchema-instance",
"xsd": "http://www.w3.org/2001/XMLSchema/",
'a': 'http://www.relatics.com/',}
names = tree.findall('./soap:Body''/a:GetResultResponse''/a:GetResultResult', namespaces)
print(names)
for name in names:
print(name.text)
i tried different methods like find and findall and also inside the method i try to pass different values but all its printing is null.
I'm not sure how to get the values out of tags.
Using xml.etree.ElementTree make life easier.
documentation in here
It can parsing tag attribute or innerText.
import xml.etree.ElementTree as ET
xml = """\
<?xml version="1.0" encoding="utf-8"?>
<soap:Envelope xmlns:soap="http://www.w3.org/2003/05/soap-envelope"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:xsd="http://www.w3.org/2001/XMLSchema">
<soap:Body>
<GetResultResponse xmlns="http://www.relatics.com/">
<GetResultResult>
<Report ReportName="RFC" GeneratedOn="2022-12-22" EnvironmentID="XXXX" EnvironmentName="Systematic Assurance – an XXX Solution" EnvironmentURL="https://XXXX.relaticsonline.com/" WorkspaceID="XXXXX" WorkspaceName="P - ADL Program Management - XXX" TargetDevice="Pc" ReportField=""
xmlns="">
<Change_module>
<applied_individual_change_request Change_Request="TestKZIreport" RFC_GUID="XXXXX">
<code RFC_Code="VtW-0101" />
<progress RFC_Progress="agreed" />
<applied_individual_project_organisation Organisation="XXXX" />
<applied__individual_discipline Discipline="Highways" />
<specification Specification="Context of Documents">
<code Specification_Code="1.1.1a" />
</specification>
<applied_individual_workpackage Workpackage="Enabling work">
<code Workpackage_Code="WP-01" />
</applied_individual_workpackage>
<physical_object Physical_Object="Train Station">
<code Physical_Object_Code="TFO-0001" />
</physical_object>
<person approver="XXX" />
<applied_individual_change_consequence_qualification Consequence_Value="10 days">
<applied_conceptual_change_consequence_aspect Consequence_Aspect="Schedule" />
</applied_individual_change_consequence_qualification>
<document Document_Name="WI 300 Design.pdf">
<code Document_Code="DOC-0002" />
</document>
<answer_status BR_Status="no" />
<applied_individual_business_rule Business_Rule="Change Review compliance">
<code BR_Code="BR-006" />
</applied_individual_business_rule>
<applied_individual_change_consequence_qualification Consequence_Value="XXX">
<applied_conceptual_change_consequence_aspect Consequence_Aspect="Finance" />
</applied_individual_change_consequence_qualification>
</applied_individual_change_request>
</Change_module>
</Report>
</GetResultResult>
</GetResultResponse>
</soap:Body>
</soap:Envelope>
"""
root = ET.fromstring(xml)
print("RFC_Code: " + str(root.find(".//code[#RFC_Code]").attrib))
print("RFC_Progress: " + str(root.find(".//progress[#RFC_Progress]").attrib))
print("specification: " + str(root.find(".//specification[#Specification]").attrib))
print("Specification_Code: " + str(root.find(".//code[#Specification_Code]").attrib))
print("Workpackage_Code: " + str(root.find(".//code[#Workpackage_Code]").attrib))
print("Document_Code: " + str(root.find(".//code[#Document_Code]").attrib))
Result
$ python get-data.py
RFC_Code: {'RFC_Code': 'VtW-0101'}
RFC_Progress: {'RFC_Progress': 'agreed'}
specification: {'Specification': 'Context of Documents'}
Specification_Code: {'Specification_Code': '1.1.1a'}
Workpackage_Code: {'Workpackage_Code': 'WP-01'}
Document_Code: {'Document_Code': 'DOC-0002'}
If you using xml file open, using this code
with open('data.xml', 'r') as xml_file:
root = ET.parse(xml_file)
I have these code to extract of xml file some elements:
for general in tree.iter('FOLDER'):
nameFolder = general.attrib.get('FOLDER_NAME')
for job_nodeOS in tablaGeneral.iterfind(".//JOB[#APPL_TYPE='OS']"):
listaOS.clear()
listaOS.append(job_name)
listaOS.append(nameFolder)
listaOS.append(daily)
for job_nodeOS3 in job_nodeOS.iterfind("ON"):
listaOS.append(job_nodeOS3.get('STMT',"NO APLICA"))
listaOS.append(job_nodeOS3.get('CODE',"NO APLICA"))
for job_nodeOS4 in job_nodeOS3.iterfind("DOMAIL"):
listaOS.append(job_nodeOS5.get('SUBJECT',"NO APLICA"))
listaOS.append(job_nodeOS5.get('MESSAGE',"NO APLICA"))
for variable_name in variablesOS:
variable_node = job_nodeOS.find(f"./VARIABLE[#NAME='{variable_name}']")
variable_value = variable_node.get("VALUE", default_value) if variable_node is not None else default_value
#print(job_name, variable_name.lstrip("%"), "=", variable_value)
listaOS.append(variable_value)
My problem is that if the clause for don't find any occurrences, I need listaOS add default values ('NO APLICA').
A piece of code xml:
<?xml version="1.0" encoding="utf-8"?>
<!--Exported at 11-06-2022 17:14:50-->
<DEFTABLE xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="Folder.xsd">
<FOLDER SERVER="PROD" VERSION="800" PLATFORM="UNIX" FOLDER_NAME="PALNF" >
<JOB ID="256" APPLICATION="HOUSE" SUBAPP="SERVER" JOBNAME="JOBA" APPL_TYPE="OS">
<SHOUT WHEN="LATESUB" TIME="0825" URGENCY="R" DEST="DESTINATION" MESSAGE="HI" DAYSOFFSET="0" />
<ON STMT="*" CODE="*NETWORK*">
<DOACTION ACTION="NOTOK" />
<DOMAIL URGENCY="U" DEST="EXMAPLE#EXMAPLE.COM" SUBJECT="SUBJECT" MESSAGE="HI" />
</ON>
</JOB>
<JOB ID="1" APPLICATION="OFFICE" SUBAPP="Google" JOBNAME="Google_Update_Task_Machine_UA" APPL_TYPE="OS">
<VARIABLE NAME="%%PARM1" VALUE="GoogleUpdate.exe" />
<VARIABLE NAME="%%PARM2" VALUE="/ua /installsource scheduler" />
</JOB>
</FOLDER>
<FOLDER SERVER="PROD" VERSION="800" PLATFORM="UNIX" FOLDER_NAME="PALNF_CALENDARIO">
<JOB ID="2" APPLICATION="APP" SUBAPP="SUB" JOBNAME="NOSCHEDULER" APPL_TYPE="OS" />
<JOB ID="3" APPLICATION="APP" SUBAPP="SUB" JOBNAME="NOSCHEDULER_CONMONHDAYS" APPL_TYPE="OS" />
</FOLDER>
</DEFTABLE>
Do you know how could I get that?
Thanks and sorry for my English!
I have the following XML:
<?xml version="1.0" encoding="UTF-8"?>
<data>
<columns>
<Leftover index="5">Leftover</Leftover>
<NODE5 index="6"></NODE5>
<NODE6 index="7"></NODE6>
<NODE8 index="9"></NODE8>
<Nomenk__Nr_ index="2">Nomenk.
Nr.</Nomenk__Nr_>
<Year index="8">2020</Year>
<Name index="1">Name</Name>
<Value_code index="3">Value code</Value_code>
</columns>
<records>
<record index="1">
<Leftover>Leftover</Leftover>
<NODE5>Test1</NODE5>
<NODE6>Test2</NODE6>
<NODE8>Test3</NODE8>
<Nomenk__Nr_></Nomenk__Nr_>
<Name></Name>
<Value_code></Value_code>
</record>
... (it repeats itself with different values and the index value increments)
My code is:
import lxml
import lxml.etree as et
xml = open('C:\outputfile.xml', 'rb')
xml_content = xml.read()
tree = et.fromstring(xml_content)
for bad in tree.xpath("//records[#index=\'*\']/NODE5"):
bad.getparent().remove(bad) # here I grab the parent of the element to call the remove directly on it
result = (et.tostring(tree, pretty_print=True, xml_declaration=True))
f = open( 'outputxml.xml', 'w' )
f.write( str(result) )
f.close()
What I need to do is to remove the NODE5, NODE6, NODE8. I tried using a wildcard and then specifying one of the nodes (see line 6) but that seems to not have worked... I'm also getting a syntax error right after the loop on the first character but the code executes.
My problem is also that the encoding by lxml is set to ASCII afterwards when the file is "exported".
UPDATE
I am getting this error on line 8:
return = ...
^
SyntaxError: invalid syntax
I took some code from https://stackoverflow.com/a/7981894/1987598
What I need to do is to remove the NODE5, NODE6, NODE8.
below
import xml.etree.ElementTree as ET
xml = '''<?xml version="1.0" encoding="UTF-8"?>
<data>
<columns>
<Leftover index="5">Leftover</Leftover>
<NODE5 index="6" />
<NODE6 index="7" />
<NODE8 index="9" />
<Nomenk__Nr_ index="2">Nomenk.
Nr.</Nomenk__Nr_>
<Year index="8">2020</Year>
<Name index="1">Name</Name>
<Value_code index="3">Value code</Value_code>
</columns>
<records>
<record index="1">
<Leftover>Leftover</Leftover>
<NODE5>Test1</NODE5>
<NODE6>Test2</NODE6>
<NODE8>Test3</NODE8>
<Nomenk__Nr_ />
<Name />
<Value_code />
</record>
<record index="21">
<Leftover>Leftover</Leftover>
<NODE5>Test11</NODE5>
<NODE6>Test21</NODE6>
<NODE8>Test39</NODE8>
<Nomenk__Nr_ />
<Name />
<Value_code />
</record>
</records>
</data>'''
root = ET.fromstring(xml)
col = root.find('./columns')
for x in ['5','6','8']:
nodes_to_remove = col.findall('./NODE{}'.format(x))
for node in nodes_to_remove:
col.remove(node)
records = root.find('./records')
records_lst = records.findall('./record'.format(x))
for r in records_lst:
for x in ['5','6','8']:
nodes_to_remove = r.findall('./NODE{}'.format(x))
for node in nodes_to_remove:
r.remove(node)
ET.dump(root)
output
<data>
<columns>
<Leftover index="5">Leftover</Leftover>
<Nomenk__Nr_ index="2">Nomenk.
Nr.</Nomenk__Nr_>
<Year index="8">2020</Year>
<Name index="1">Name</Name>
<Value_code index="3">Value code</Value_code>
</columns>
<records>
<record index="1">
<Leftover>Leftover</Leftover>
<Nomenk__Nr_ />
<Name />
<Value_code />
</record>
<record index="2">
<Leftover>Leftover</Leftover>
<Nomenk__Nr_ />
<Name />
<Value_code />
</record>
</records>
</data>
I have a huge xml file (thousands of lines) and I need to change some attribute parameters.
Xml looks like this:
<person id="name" name="pers_name">
<group id="Common">
<emotion id="smile">
<texture texture="smile" x="-131" y="-17" />
<effect name="name1" x="51" y="438" />
<effect name="name2" x="61" y="419" />
<effect name="name3" x="55" y="312" />
</emotion>
</group>
</person>
After I did it and wrote it with tree.write(path, encoding='utf-8', xml_declaration=True) I lost whitespaces before closing tag.
How can I preserve it?
<person id="name" name="pers_name">
<group id="Common">
<emotion id="smile">
<texture texture="smile" x="-131" y="-17"/>
<effect name="name1" x="51" y="438"/>
<effect name="name2" x="61" y="419"/>
<effect name="name3" x="55" y="312"/>
</emotion>
</group>
</person>
Code
from lxml import etree
# Offsets
x_offset = -10
y_offset = -20
tree = etree.parse(path)
XML = tree.getroot()
for effect in XML.iter('effect'):
texture_offset_x = int(effect.get('texture_offset_x')) + x_offset
texture_offset_y = int(effect.get('texture_offset_y')) + y_offset
effect.set('texture_offset_x', str(texture_offset_x))
effect.set('texture_offset_y', str(texture_offset_y))
tree.write(path, encoding='utf-8', xml_declaration=True)
I'm trying to edit xml files in a batch / python script
this is my xml file:
<?xml version="1.0" encoding="UTF-8"?>
<task name="analyse">
<taskInfo taskId="21a09311-ade3-4e9a-af21-d13be8b7ba45" runAt="2015-05-20 13:48:50" runTime="5 minutes, 53 seconds">
<project name="13955 - HMI Volvo Truck PA15" number="e20d51c0-71dc-4572-8f9b-4c150bf35222" />
<language lcid="1031" name="German (Germany)" />
<tm name="ENG-DEU_en-GB_de-DE.sdltm" />
<settings reportInternalFuzzyLeverage="yes" reportLockedSegments="no" reportCrossFileRepetitions="yes" minimumMatchScore="70" searchMode="bestWins" missingFormattingPenalty="1" differentFormattingPenalty="1" multipleTranslationsPenalty="1" autoLocalizationPenalty="0" textReplacementPenalty="0" />
</taskInfo>
<file name="VT MAIN TRACK_PA15_Default_DE-DE_20150520_102527.xlf.sdlxliff" guid="111f9ba6-82f6-45fb-ac49-8bf6cf57c169">
<analyse>
<perfect segments="0" words="0" characters="0" placeables="0" tags="0" />
<inContextExact segments="60" words="55" characters="755" placeables="3" tags="0" />
' Replace the Value word="55" with "0"
<exact segments="114" words="334" characters="1687" placeables="14" tags="3" />
<locked segments="0" words="0" characters="0" placeables="0" tags="0" />
<crossFileRepeated segments="2" words="20" characters="0" placeables="0" tags="0" />
'Cut the value words="20" replace with 0
<repeated segments="17" words="34" characters="293" placeables="2" tags="0" />
'add the value to current value 20 to 34 so the new value is words="54"
<total segments="449" words="1462" characters="7630" placeables="66" tags="24" />
<new segments="126" words="434" characters="2384" placeables="18" tags="5" />
<fuzzy min="75" max="84" segments="25" words="108" characters="528" placeables="6" tags="3" />
<fuzzy min="85" max="94" segments="23" words="92" characters="454" placeables="7" tags="4" />
<fuzzy min="95" max="99" segments="77" words="260" characters="1318" placeables="13" tags="6" />
<internalFuzzy min="75" max="84" segments="3" words="16" characters="100" placeables="2" tags="2" />
<internalFuzzy min="85" max="94" segments="4" words="25" characters="111" placeables="1" tags="1" />
<internalFuzzy min="95" max="99" segments="0" words="0" characters="0" placeables="0" tags="0" />
</analyse>
</file>
<file name="VT MAIN TRACK_PA15_Default_DE-DE_20150523_254796.xlf.sdlxliff" guid="111f9ba6-82f6-45fb-ac49-8bf6cf57c169">
<analyse>
<perfect segments="0" words="0" characters="0" placeables="0" tags="0" />
<inContextExact segments="60" words="67" characters="755" placeables="3" tags="0" />
' Replace the Value word="67" with "0"
<exact segments="114" words="334" characters="1687" placeables="14" tags="3" />
<locked segments="0" words="0" characters="0" placeables="0" tags="0" />
<crossFileRepeated segments="2" words="35" characters="0" placeables="0" tags="0" />
'Cut the value words="35" replace with 0
<repeated segments="17" words="54" characters="293" placeables="2" tags="0" />
'add the value to current value 35 to 54 so the new value is words="89"
<total segments="449" words="1462" characters="7630" placeables="66" tags="24" />
<new segments="126" words="434" characters="2384" placeables="18" tags="5" />
<fuzzy min="75" max="84" segments="25" words="108" characters="528" placeables="6" tags="3" />
<fuzzy min="85" max="94" segments="23" words="92" characters="454" placeables="7" tags="4" />
<fuzzy min="95" max="99" segments="77" words="260" characters="1318" placeables="13" tags="6" />
<internalFuzzy min="75" max="84" segments="3" words="16" characters="100" placeables="2" tags="2" />
<internalFuzzy min="85" max="94" segments="4" words="25" characters="111" placeables="1" tags="1" />
<internalFuzzy min="95" max="99" segments="0" words="0" characters="0" placeables="0" tags="0" />
</analyse>
</file>
<batchTotal>
<analyse>
<perfect segments="0" words="0" characters="0" placeables="0" tags="0" />
<inContextExact segments="60" words="139" characters="755" placeables="3" tags="0" />
<exact segments="114" words="334" characters="1687" placeables="14" tags="3" />
<locked segments="0" words="0" characters="0" placeables="0" tags="0" />
<crossFileRepeated segments="0" words="0" characters="0" placeables="0" tags="0" />
<repeated segments="17" words="54" characters="293" placeables="2" tags="0" />
<total segments="449" words="1462" characters="7630" placeables="66" tags="24" />
<new segments="126" words="434" characters="2384" placeables="18" tags="5" />
<fuzzy min="75" max="84" segments="25" words="108" characters="528" placeables="6" tags="3" />
<fuzzy min="85" max="94" segments="23" words="92" characters="454" placeables="7" tags="4" />
<fuzzy min="95" max="99" segments="77" words="260" characters="1318" placeables="13" tags="6" />
<internalFuzzy min="75" max="84" segments="3" words="16" characters="100" placeables="2" tags="2" />
<internalFuzzy min="85" max="94" segments="4" words="25" characters="111" placeables="1" tags="1" />
<internalFuzzy min="95" max="99" segments="0" words="0" characters="0" placeables="0" tags="0" />
</analyse>
</batchTotal>
</task>
general notes:
the <task> is the root element (end element </task>)
the important here is to modify a few tags in a section called file <file> and endtag </file>
there can be X occurrences of <file>*</file>
What i need,
for each <file> element, i would like to:
In <inContextExact>, Set the value of the attribute words with 0
<inContextExact ... words="55" ... /> => <inContextExact ... words="0" ... />
In <crossFileRepeated>, Set the value of the attribute words with 0
<crossFileRepeated ... words="20" ... /> => <crossFileRepeated ... words="0" ... />
In <total>, Set the value of the words attribute to be calculated by my own logic
<total ... words="1462" ... /> => <total ... words="??" ... />
I could really appreciate an example of processing XML files in batch / python
Let's utilize python!
it's extremely easy to do that in python. and since you said it's ok to make a solution in python, check the script below.
here's how you can iterate over a directory contains xml files and process them as requested in python while saving the file changes.
from xml.etree import ElementTree
import os
def edit_xml_file(data):
e = ElementTree.fromstring(data)
for file_element in e.findall('file'):
analyse_element = file_element.find('analyse')
in_context_exact_element = analyse_element.find('inContextExact')
in_context_exact_words = int(in_context_exact_element.get('words'))
in_context_exact_element.set('words', '0')
cross_file_repeated_element = analyse_element.find('crossFileRepeated')
cross_file_repeated_words = int(cross_file_repeated_element.get('words'))
cross_file_repeated_element.set('words', '0')
total_element = analyse_element.find('total')
total_element.set('words', str(in_context_exact_words + cross_file_repeated_words))
xmlstr = ElementTree.tostring(e)
return xmlstr
def main():
source_directory = 'xmlfiles'
for filename in os.listdir(source_directory):
if not filename.endswith('.xml'):
continue
xml_file_path = os.path.join(source_directory, filename)
with open(xml_file_path, 'r+b') as f:
data = f.read()
fixed_data = edit_xml_file(data)
f.seek(0)
f.write(fixed_data)
f.truncate()
if __name__ == '__main__':
main()
in this solution, iv'e used the built in ElementTree utility
Necessary tools
Here are the necessary tools you will need to create a script in Excel VBA or VBscript:
Looping text files in a directory: link
Reading text files: link
Writing text files: link
Replacing using RegExp: link
Example Regex to get you going:
<exact segments="114" words="334" characters="1687" placeables="14" tags="3" />
->
<exact segments="114" words="0" characters="1687" placeables="14" tags="3" />
Use this regex:
(words="[0-9]+?") or words="([0-9]+?)" even better
Below an example of processing a single row:
Dim re as RegExp
set re = new RegExp
re.Pattern = "words="([0-9]+?)"
newTextRow = re.Replace(textRow, 0) 'Replace word value with 0
The approach
Loop through your XML files using the Dir function
Read the contents of the file using the link above on how to read text files in VBA
Loop through all rows and use the RegExp function to replace the necessary word params
Save the output back to the XML file using the link above on how to write text files in VBA