Filling values under child through element tree - python

I have an xml file and another text file and i had written script to parse the text file and get a dictionary with keys and values ,Now i have to go inside the xml file and fill the values for child of child testgroup,the values include test case_title ,inddent etc,
And also based on the length of aa in the script i need to create child under test group,i have minimum exposure in elementtree,Any recomendation would be highly helpful.
xml = """<?xml version="1.0" encoding="UTF-8"?>
<testmodule title="hello" version="version 2">
<description> 'world' </description>
<engineer>
<info>
<name>Test </name>
<description> 'test' </description>
</info>
</engineer>
<preparation>
<initialize title="Set">
</initialize>
</preparation>
<variants>
<variant name="A">Test </variant>
<variant name="B">test</variant>
<variant name="C">Test test</variant>
</variants>
<testgroup title="Testing" ident="Testing" >
<testcase title="Check" ident= "3_1" name="Number" variants="A">
<param name="Testcase" type="string">Checking of Correct SW and Part identifiers </param>
<param name="TestcaseRequirements" type="string"></param>
<param name="Test" type="string">TS_Automation=Manual;TS_Method=Bench_Test;TS_Priority=1;TS_Tested_By=rjrjjn;TS_Written_By=SUN;TS_Review_done=No;TS_Regression=No;</param>
</testcase>
"""
ee=''
with open('C:\\Users\\rjrn8w\\Desktop\\Test.txt', "r") as f:
for i in f:
ee+=i
import re
aa=re.findall(r'<TC_HEADER_START>([\s\S]*)</TC_HEADER_END>',ee)
for j in aa:
k=j.strip()
new_dict={}
ak=dict(re.findall(r'(\S+)=(.*)', j.strip()))
print ak
import xml.etree.ElementTree as ET
tree = ET.parse('C:\\Users\\rjrn8w\\Documents\\My Received Files\\new.xml')
root = tree.getroot()
for child in root:
if child.tag=='testgroup':
for element in child:
for elem in element:
import pdb;pdb.set_trace()
print elem.tag
ak={'TS_Regression': 'No', 'ident': '1 ', 'TestcaseRequirements': '36978', 'name': '"T01">', 'title': '"DHCP " ', 'TS_Review_done': 'Yes;', 'TestcaseTestType': 'Test', 'TS_Priority': '1;', 'TS_Tested_By': 'qz9ghv;', 'TS_Techniques': 'Full Testing;', 'variants': '"A C" ', 'StakeholderRequirements': '1236\t\t\t\t', 'TS_Implemented': 'Yes;', 'TS_Automation': 'Automated;', 'TestcaseDescription': ' This test verifies DHCP discovery is halted after tester is connected'}

you can read python documentation, it have content of how to create Element, add attribute , add values and how to append the new element to existed element as a child.
child=xml.etree.ElementTree.Element(tag, attrib={}, **extra)
existedelement.append(child)

Related

Add tag with content to existing XML (resx) using python

I have an XML with a number of strings:
<?xml version="1.0" encoding="UTF-8"?>
<Strings>
<String id="TEST_STRING_FROM_XML">
<en>Test string from XML</en>
<de>Testzeichenfolge aus XML</de>
<es>Cadena de prueba de XML</es>
<fr>Tester la chaîne à partir de XML</fr>
<it>Stringa di test da XML</it>
<ja>XMLからのテスト文字列</ja>
<ko>XML에서 테스트 문자열</ko>
<nl>Testreeks van XML</nl>
<pl>Łańcuch testowy z XML</pl>
<pt>Cadeia de teste de XML</pt>
<ru>Тестовая строка из XML</ru>
<sv>Teststräng från XML</sv>
<zh-CHS>从XML测试字符串</zh-CHS>
<zh-CHT>從XML測試字符串</zh-CHT>
<Comment>A test string that comes from a shared XML file.</Comment>
</String>
<String id="TEST_STRING_FROM_XML_2">
<en>Another test string from XML.</en>
<de></de>
<es></es>
<fr></fr>
<it></it>
<ja></ja>
<ko></ko>
<nl></nl>
<pl></pl>
<pt></pt>
<ru></ru>
<sv></sv>
<zh-CHS></zh-CHS>
<zh-CHT></zh-CHT>
<Comment>Another test string that comes from a shared XML file.</Comment>
</String>
</Strings>
And I would like to append these strings to a resx file with a long list of strings in the following format:
<?xml version="1.0" encoding="utf-8"?>
<root>
<!--
Microsoft ResX Schema
Version 2.0
**a bunch of schema and header stuff...**
-->
<data name="STRING_NAME_1" xml:space="preserve">
<value>This is a value 1</value>
<comment>This is a comment 1</comment>
</data>
<data name="STRING_NAME_2" xml:space="preserve">
<value>This is a value 2</value>
<comment>This is a comment 2</comment>
</data>
</root>
But using the following snippet of python code:
import sys, os, os.path, re
import xml.etree.ElementTree as ET
from xml.dom import minidom
existingStrings = []
newStrings = {}
languages = []
resx = '*path to resx file*'
def LoadAllNewStrings():
src_root = ET.parse('Strings.xml').getroot()
for src_string in src_root.findall('String'):
src_id = src_string.get('id')
src_value = src_string.findtext("en")
src_comment = src_string.findtext("Comment")
content = [src_value, src_comment]
newStrings[src_id] = content
def ExscludeExistingStrings():
dest_root = ET.parse(resx)
for stringName in dest_root.findall('Name'):
for stringId in newStrings:
if stringId == stringName:
newStrings.remove(stringId)
def PrettifyXML(element):
roughString = ET.tostring(element, 'utf-8')
reparsed = minidom.parseString(roughString)
return reparsed.toprettyxml(indent=" ")
def AddMissingStringsToLocalResource():
ExscludeExistingStrings()
with open(resx, "a") as output:
root = ET.parse(resx).getroot()
for newString in newStrings:
data = ET.Element("data", name=newString)
newStringContent = newStrings[newString]
newStringValue = newStringContent[0]
newStringComment = newStringContent[1]
ET.SubElement(data, "value").text = newStringValue
ET.SubElement(data, "comment").text = newStringComment
output.write(PrettifyXML(data))
if __name__ == "__main__":
LoadAllNewStrings()
AddMissingStringsToLocalResource()
I get the following XML appended to the end of the resx file:
<data name="STRING_NAME_2" xml:space="preserve">
<value>This is a value 1</value>
<comment>This is a comment 1</comment>
</data>
</root><?xml version="1.0" ?>
<data name="TEST_STRING_FROM_XML">
<value>Test string from XML</value>
<comment>A test string that comes from a shared XML file.</comment>
</data>
<?xml version="1.0" ?>
<data name="TEST_STRING_FROM_XML_2">
<value>Another test string from XML.</value>
<comment>Another test string that comes from a shared XML file.</comment>
</data>
I.e. the root ends and then my new strings are added after. Any ideas on how to add the data tags to the existing root properly?
with open(resx, "a") as output:
No. Don't open XML files as text files. Not for reading, not for writing, not for appending. Never.
The typical life cycle of an XML file is:
parsing (with an XML parser)
reading or Modification (with a DOM API)
if there were changes: Serializition (also with a DOM API)
At no point should you ever call open() on an XML file. XML files are not supposed to be treated as if they were plain text. They are not.
# parsing
resx = ET.parse(resx_path)
root = resx.getroot()
# modification
for newString in newStrings:
newStringContent = newStrings[newString]
# create node
data = ET.Element("data", name=newString)
ET.SubElement(data, "value").text = newStringContent[0]
ET.SubElement(data, "comment").text = newStringContent[1]
# append node, e.g. to the top level element
root.append(data)
# serialization
resx.write(resx_path, encoding='utf8')

Parsing XML with namespace into dictionary

I'm having a hard time following the xml.etree.ElementTree documentation with regard to parsing an XML document with a namespace and nested tags.
To begin, the xml tree I am trying to parse looks like:
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<ROOT-MAIN xmlns="http://fakeurl.com/page">
<Alarm> <--- I dont care about these types of objects
<Node>
<location>Texas></location>
<name>John</name>
</Node>
</Alarm>
<Alarm> <--- I care about these types of objects
<CreateTime>01/01/2011</CreateTime>
<Story>
<Node>
<Name>Ethan</name
<Address category="residential>
<address>1421 Morning SE</address>
</address>
</Node>
</Story>
<Build>
<Action category="build_value_1">Build was successful</Action>
</Build>
<OtherData type="string" meaning="favoriteTVShow">Purple</OtherData>
<OtherData type="string" meaning="favoriteColor">Seinfeld</OtherData>
</Alarm>
</ROOT-MAIN>
I am trying to build an array of dictionaries that have a similar structure to the second < Alarm > object. When parsing this XML file, I do the following:
import xml.etree.ElementTree as ET
tree = ET.parse('data/'+filename)
root = tree.getroot()
namespace= '{http://fakeurl.com/page}'
for alarm in tree.findall(namespace+'Alarm'):
for elem in alarm.iter():
try:
creation_time = elem.find(namespace+'CreateTime')
for story in elem.findall(namespace+'Story'):
for node in story.findall(namespace+'Node'):
for Address in node.findall(namespace+'Address'):
address = Address.find(namespace+'address').text
for build in elem.findall(namespace+'Build'):
category= build.find(namespace+'Action').attrib
action = build.find(namespace+'Action').text
for otherdata in elem.findall(namespace+'OtherData'):
#not sure how to get the 'meaning' attribute value as well as the text value for these <OtherData> tags
except:
pass
Right I'm just trying to get values for:
< address >
< Action > (attribute value and text value)
< OtherData > (attribute value and text value)
I'm sort of able to do this with for loops within for-loops but I was hoping for a cleaner, xpath solution which I haven't figured out how to do with a namespace.
Any suggestions would be much appreciated.
Here (collecting a subset of the elements you mentioned -- add more code to collect rest of elements)
import xml.etree.ElementTree as ET
import re
xmlstring = '''<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<root xmlns="http://fakeurl.com/page">
<Alarm>
<Node>
<location>Texas></location>
<name>John</name>
</Node>
</Alarm>
<Alarm>
<CreateTime>01/01/2011</CreateTime>
<Story>
<Node>
<Name>Ethan</Name>
<Address category="residential">
<address>1421 Morning SE</address>
</Address>
</Node>
</Story>
<Build>
<Action category="build_value_1">Build was successful</Action>
</Build>
<OtherData type="string" meaning="favoriteTVShow">Purple</OtherData>
<OtherData type="string" meaning="favoriteColor">Seinfeld</OtherData>
</Alarm>
</root>'''
xmlstring = re.sub(' xmlns="[^"]+"', '', xmlstring, count=1)
root = ET.fromstring(xmlstring)
alarms = root.findall('Alarm')
alarms_list = []
for alarm in alarms:
create_time = alarm.find('CreateTime')
if create_time is not None:
entry = {'create_time': create_time.text}
alarms_list.append(entry)
actions = alarm.findall('Build/Action')
if actions:
entry['builds'] = []
for action in actions:
entry['builds'].append({'category': action.attrib['category'], 'status': action.text})
print(alarms_list)

How to properly use xmlfile api from lxml

I have a large (5+ gigs) XML file which I need to parse, do some operation & write a new XML file.
dummy.xml
<?xml version="1.0" encoding="UTF-8"?>
<catalog xmlns="http://www.namespace.com" catalog-id="test-catalog">
<header>
<name>Product Catalog</name>
</header>
<product product-id="1234">
<available-flag>false</available-flag>
<name>product1</name>
</product>
<product product-id="5678">
<available-flag>false</available-flag>
<name>product1</name>
</product>
<product product-id="9999">
<available-flag>false</available-flag>
<name>product1</name>
</product>
</catalog>
As you see the above XML has 3 product tags & I need to filter some product-ids on basis of a pre-defined list of ids.
I am using lxml iterparse to parse the XML iteratively & want to use xmlfile API to create a new XML incrementally to keep the memory footprint low. So, my motive is to filter out the product tags which don't meet the criteria & copy the rest of the XML tags as it is.
from lxml import etree
f = './dummy.xml'
f1 = './test.xml'
context = etree.iterparse(f, events=('start',))
productsToExport = ['1234']
with etree.xmlfile(f1, encoding='utf-8') as xf:
xf.write_declaration()
with xf.element('catalog xmlns="http://www.namespace.com" catalog-id="test-catalog"'):
for event, element in context:
tagName = etree.QName(element.tag).localname
if (tagName == 'product'):
pid = element.get('product-id')
if (pid in productsToExport):
xf.write(element)
elif (tagName == 'header'):
xf.write(element) # copy existing header tag as it is
Above code works ok & generates a XML as below
<?xml version='1.0' encoding='utf-8'?>
<catalog xmlns="http://www.namespace.com" catalog-id="test-catalog">
<header xmlns="http://www.namespace.com">
<name>Product Catalog</name>
</header>
<product xmlns="http://www.namespace.com" product-id="1234">
<available-flag>false</available-flag>
<name>product1</name>
</product>
</catalog xmlns="http://www.namespace.com" catalog-id="test-catalog">
If you observe the above XML it has few issues:
Closing <catalog> tag has xmlns & catalog-id present in it
All tags like header, product have xmlns attribute present in it
I checked xmlfile api documentation but couldn't find a way to fix above issues.
EDIT:
I managed to resolve the 1st issue by using below
attribs = {'xmlns' : 'http://www.namespace.com', 'catalog-id' : 'test-catalog'}
with xf.element('catalog', attribs):
# previous logic
So, now am left with removing the namespace from every element.
Consider simply rebuilding the XML tree with lxml.etree methods instead of the xmlfile API, still in the context of your iterparse:
from lxml import etree
f = './dummy.xml'
f1 = './test.xml'
productsToExport = ['1234']
# ROOT ELEMENT WITH DEFUALT NAMESPACE
my_nmsp = {None: 'http://www.namespace.com'}
# INITIALIZE ITERATOR
context = etree.iterparse(f, events=('start',))
for event, element in context:
tagName = etree.QName(element.tag).localname
for prod in productsToExport:
root = etree.Element('catalog', nsmap=my_nmsp)
root.text = '\n\t'
root.attrib['catalog-id'] = "test-catalog"
# PRODUCT ELEMENT
if tagName == 'product':
pid = element.get('product-id')
if pid == prod:
root.append(element)
# HEADER ELEMENT
elif (tagName == 'header'):
root.append(element)
# OUTPUT TREE TO FILE
with open(f1, 'wb') as f:
f.write(etree.tostring(root, pretty_print=True))
Output
<catalog xmlns="http://www.namespace.com" catalog-id="test-catalog">
<header>
<name>Product Catalog</name>
</header>
<product product-id="1234">
<available-flag>false</available-flag>
<name>product1</name>
</product>
</catalog>

Minidom element insertion into xml

I have some problems with insertion data structure into xml document.But with no big success.I have file eg.
<?xml version="1.0" ?>
<marl version="2.1" xmlns="xxxx.xsd">
<mcdata id="2" scope="all" type="plan">
<header>
<log action="created"/>
</header>
<mObject class="foo" distName="a-1">
<p name="Ethernet">false</p>
<list name="pass"/>
</mObject>
<mObject class="bar" distName="a-1/b-2">
<p name="Voltage">false</p>
</mObject>
</mcdata>
</marl>
Basic version of my code goes like this, but seems to have no effect because output.xml is thesame as mini.xml.
from xml.dom.minidom import *
document = parse('mini.xml')
mo = document.getElementsByTagName("mObject")
element = document.createElement("mObject")
mo.append(element)
with open('output.xml', 'wb') as out:
document.writexml(out)
out.close()
Create a new node and decorate it as needed :
#create node <mObject>
element = document.createElement("mObject")
#add text content to the node
element.appendChild(document.createTextNode("content"))
#add attribute id to the node
element.setAttribute("id" , "foo")
#result: <mObject id="foo">content</mObject>
Add the newly created node to a parent node :
#select a parent node
mc = document.getElementsByTagName("mcdata")[0]
#append the new node as child of the parent
mc.appendChild(element)

how can I select all descendants of a certain element with ElementTree in Python 3.3?

This is the sample data.
input.xml
<root>
<entry id="1">
<headword>go</headword>
<example>I <hw>go</hw> to school.</example>
</entry>
</root>
I'd like to put node and its descendants into . That is,
output.xml
<root>
<entry id="1">
<headword>go</headword>
<examplegrp>
<example>I <hw>go</hw> to school.</example>
</examplegrp>
</entry>
</root>
My poor and incomplete script is:
import codecs
import xml.etree.ElementTree as ET
fin = codecs.open(r'input.xml', 'rb', encoding='utf-8')
data = ET.parse(fin)
root = data.getroot()
example = root.find('.//example')
for elem in example.iter():
---and then I don't know what to do---
Here's an example of how it can be done:
text = """
<root>
<entry id="1">
<headword>go</headword>
<example>I <hw>go</hw> to school.</example>
</entry>
</root>
"""
import lxml.etree
import StringIO
data = lxml.etree.parse(StringIO.StringIO(text))
root = data.getroot()
for entry in root.xpath('//example/ancestor::entry[1]'):
examplegrp = lxml.etree.SubElement(entry,"examplegrp")
nodes = [node for node in entry.xpath('./example')]
for node in nodes:
entry.remove(node)
examplegrp.append(node)
print lxml.etree.tostring(root,pretty_print=True)
which will output:
<root>
<entry id="1">
<headword>go</headword>
<examplegrp><example>I <hw>go</hw> to school.</example>
</examplegrp></entry>
</root>
http://docs.python.org/3/library/xml.dom.html?highlight=xml#node-objects
http://docs.python.org/3/library/xml.dom.html?highlight=xml#document-objects
You probably want to follow some paradigm of creating a Document Element and appending reach result to it.
group = Document.createElement(tagName)
for found in founds:
group.appendNode(found)
Or something like this

Categories