Python print to file as XML - python

In the following function, I want to display the items of an embedded dictionary as XML tree and print it to a file.
def printToFile(self):
from lxml import etree as ET
for k,v in self.wordCount.items():
root = ET.Element(k)
tree = ET.ElementTree(root)
for k1,v1 in v.items():
DocID = ET.SubElement(root, 'DocID')
DocID.text = str(k1)
Occurences = ET.SubElement(root, 'Occurences')
Occurences.text = str(v1)
print ET.tostring(root, pretty_print=True, xml_declaration=False)
tree.write('output.xml', pretty_print=True, xml_declaration=False)
When I run the code, all the items are shown in the console screen but the problem is that it only prints the last item in the file.
In the console, I got this:
<weather>
<DocID>1</DocID>
<Occurences>1</Occurences>
</weather>
<london>
<DocID>1</DocID>
<Occurences>1</Occurences>
<DocID>2</DocID>
<Occurences>2</Occurences>
<DocID>3</DocID>
<Occurences>1</Occurences>
</london>
<expens>
<DocID>2</DocID>
<Occurences>1</Occurences>
</expens>
<nice>
<DocID>3</DocID>
<Occurences>1</Occurences>
</nice>
but when I open the file, I only got this:
<nice>
<DocID>3</DocID>
<Occurences>1</Occurences>
</nice>
Can someone help me solving this issue. Thanks

Based on the previous comments, I changed my function as follow and it worked:
def printToFile(self):
from lxml import etree as ET
with open('output.xml','a') as file:
for k,v in self.wordCount.items():
root = ET.Element(k)
for k1,v1 in v.items():
DocID = ET.SubElement(root, 'DocID')
DocID.text = str(k1)
Occurences = ET.SubElement(root, 'Occurences')
Occurences.text = str(v1)
//print ET.tostring(root, pretty_print=True, xml_declaration=False)
file.write(ET.tostring(root, pretty_print=True, xml_declaration=False))

Related

How to use ElemenTree for reading XML files in Python?

I've got an XML file which looks like this:
<?xml version="1.0"?>
-<Object>
<ID>Object_01</ID>
<Location>Manchester</Location>
<Date>01-01-2020</Date>
<Time>15u59m05s</Time>
-<Max_25Hz>
<25Hz>0.916631065043311</25Hz>
<25Hz>0.797958008447961</25Hz>
</Max_25Hz>
-<Max_75Hz>
<75Hz>1.96599232706463</75Hz>
<75Hz>1.48317837078523</75Hz>
</Max_75Hz>
</Object>
I still don't really understand the difference between attributes and text. With below code I tried to receive all the values using text.
import xml.etree.ElementTree as ET
root = r'c:\data\FF\Desktop\My_files\XML-files\Object_01.xml'
tree = ET.parse(root)
root = tree.getroot()
for elem in root:
for subelem in elem:
print(subelem.text)
Expected output:
Object_01
Manchester
01-01-2020
15u59m05s
0.916631065043311
0.797958008447961
1.96599232706463
1.48317837078523
Received output:
0.916631065043311
0.797958008447961
1.96599232706463
1.48317837078523
I tried to do to same with .attributes in the hope to receive all the 'column' names but then I received:
{}
{}
{}
{}
You can access them directly above the for-loop.
Ex:
tree = ET.ElementTree(ET.fromstring(X))
root = tree.getroot()
for elem in root:
print(elem.text) #! Access them Here
for subelem in elem:
print(subelem.text)
Output:
Object_01
Manchester
01-01-2020
15u59m05s
0.916631065043311
0.797958008447961
1.96599232706463
1.48317837078523
You could give a try to https://github.com/martinblech/xmltodict.
It is almost a replacement for json module. This allows to read an xml file into a python dict. This simplifies greatly accessing the xml content.
Something like:
from xmldict import *
root = r'c:\data\FF\Desktop\My_files\XML-files\Object_01.xml'
with open(root) as file:
xmlStr = file.read()
xmldict = xml.parse(xmlStr)
print (xmldict['Object']['Id'])

Problem on using lxml with tostring and pretty_print

I have read some of the answers for related questions, but none of them is directly related with lxml tostring and pretty_print.
I am using lxml and trying to create a xml file on Python 3.6.
The problem I found is that elements are not wrapped and ordered by parent element and believe it is related with the "pretty_print" option.
What I need to achieve is:
<root>
<element1></element1>
<element2></element2>
<child1></child1>
<child2></child2>
</root>
The result I get is:
<root><element1></element1><element2></element2><child1></child1><child2></child2></root>
Part of the code I am using:
from lxml import etree as et
CompanyID = "Company Identification"
TaxRegistrationNumber = "Company Reg. Number"
TaxAccountingBasis = "File Tipe"
CompanyName = "Company Name"
BusinessName = "Business Name"
root = et.Element("root")
header = et.SubElement(root, 'Header')
header.tail = '\n'
data = (
('CompanyID', str(CompanyID)),
('TaxRegistrationNumber', str(TaxRegistrationNumber)),
('TaxAccountingBasis', str(TaxAccountingBasis)),
('CompanyName', str(CompanyName)),
('BusinessName', str(BusinessName)),
)
for tag, value in data:
if value is None :
continue
et.SubElement(header, tag).text=value
xml_txt = et.tostring(root, pretty_print=True, encoding="UTF-8")
print(xml_txt)
If I print the elements with no data into it, it works fine and the "pretty_print" works fine.
If I add data to each of the elements (using the above variables), the "pretty_print" does not work and the structure gets messed up.
What could be wrong?
I found it.
I have removed the "header.tail = '\n'" from the code and it's working now.
root = et.Element("root")
header = et.SubElement(root, 'Header')
#header.tail = '\n'
Thank you all

prettify adding extra lines in xml

I'm using Prettify to make my XML file readable. I am adding some new info in to an excising XML file but when i save it to a file i get extra lines in between the lines. is there a way of removing these line? Below is the code i'm using
import xml.etree.ElementTree as xml
import xml.dom.minidom as minidom
from lxml import etree
def prettify(elem):
rough_string = xml.tostring(elem, 'utf-8')
reparsed = minidom.parseString(rough_string)
return reparsed.toprettyxml(indent="\t")
cid = "[123,123,123,123,123]"
doc = xml.parse('test.xml')
root = doc.getroot()
root.getchildren().index(root.find('card'))
e = xml.Element('card')
e.set('id', cid)
n = xml.SubElement(e, "name")
n.text = "FOLDER"
r = xml.SubElement(e, "red")
r.text = "FILE.AVI"
g = xml.SubElement(e, "green")
g.text = "FILE.AVI"
b = xml.SubElement(e, "blue")
b.text = "FILE.AVI"
root.insert(0, e)
doc2 = prettify(root)
with open("testnew.xml", "w") as f:
f.write(doc2)
Below is what i get in the file
<data>
<card id="[123,123,123,123,123]">
<name>FOLDER</name>
<red>FILE.AVI</red>
<green>FILE.AVI</green>
<blue>FILE.AVI</blue>
</card>
<card id="[000,000,000,000,000]">
<name>Colours</name>
<red>/media/usb/cow.avi</red>
<green>/media/usb/pig.avi</green>
<blue>/media/usb/cat.avi</blue>
</card>
</data>
input file "test.xml" looks like
<data>
<card id="[000,000,000,000,000]">
<name>Colours</name>
<red>/media/usb/cow.avi</red>
<green>/media/usb/pig.avi</green>
<blue>/media/usb/cat.avi</blue>
</card>
</data>
The new content added is being printed fine. Removing any "prettification" of the existing text solves the issue
Add
for elem in root.iter('*'):
if elem == e:
print "Added XML node does not need to be stripped"
continue
if elem.text is not None:
elem.text = elem.text.strip()
if elem.tail is not None:
elem.tail = elem.tail.strip()
before calling
doc2 = prettify(root)
Related answer: Python how to strip white-spaces from xml text nodes

How to change node value using lxml [duplicate]

I want to update xml file with new information by using lxml library.
For example, I have this code:
>>> from lxml import etree
>>>
>>> tree = etree.parse('books.xml')
where 'books.xml' file, has this content: http://www.w3schools.com/dom/books.xml
I want to update this file with new book:
>>> new_entry = etree.fromstring('''<book category="web" cover="paperback">
... <title lang="en">Learning XML 2</title>
... <author>Erik Ray</author>
... <year>2006</year>
... <price>49.95</price>
... </book>''')
My question is, how can I update tree element tree with new_entry tree and save the file.
Here you go, get the root of the tree, append your new element, save the tree as a string to a file:
from lxml import etree
tree = etree.parse('books.xml')
new_entry = etree.fromstring('''<book category="web" cover="paperback">
<title lang="en">Learning XML 2</title>
<author>Erik Ray</author>
<year>2006</year>
<price>49.95</price>
</book>''')
root = tree.getroot()
root.append(new_entry)
f = open('books-mod.xml', 'wb')
f.write(etree.tostring(root, pretty_print=True))
f.close()
I don't have enough reputation to comment, therefore I'll write an answer...
The most simple change make the code of Guillaume work is to change the line
f = open('books-mod.xml', 'w')
to
f = open('books-mod.xml', 'wb')

Generating XML files Using CSV

i have a questiion about formatting xml files after generating them. Here is my code:
import csv
from xml.etree.ElementTree import Element, SubElement, Comment, tostring
from xml.etree.ElementTree import ElementTree
import xml.etree.ElementTree as etree
root = Element('Solution')
root.set('version','1.0')
tree = ElementTree(root)
head = SubElement(root, 'DrillHoles')
head.set('total_holes', '238')
description = SubElement(head,'description')
with open ('1250_12.csv', 'r') as data:
current_group = None
reader = csv.reader(data)
i = 0
for row in reader:
if i > 0:
x1,y1,z1,x2,y2,z2,cost = row
if current_group is None or i != current_group.text:
current_group = SubElement(description, 'hole',{'hole_id':"%s"%i})
information = SubElement (current_group, 'hole',{'collar':', '.join((x1,y1,z1)),
'toe':', '.join((x2,y2,z2)),
'cost': cost})
i+=1
Which produces the following xml file:
<?xml version="1.0"?>
-<Solution version="1.0">
-<DrillHoles total_holes="238">
-<description>
-<hole hole_id="1">
<hole toe="5797.82, 3061.01, 2576.29" cost="102.12" collar="5720.44, 3070.94, 2642.19"/></hole>
that is just a part of the xml file but it is enough to serve this purpose.
There are many things i would like to change, first is i would like the toe,cost, and collar to be on different lines like so:
<collar>0,-150,0</collar>
<toe>69.9891,-18.731,-19.2345</toe>
<cost>15</cost>
and i would like it to be in the order of collar then toe then cost shown above.
Furthermore, in the xml file it displays : "hole toe ="5797.82, 3061.01, 2576.29", how do i get rid of the hole? Yea thats about it, i am really new to this python thing so go easy on me. haha

Categories