How to convert xml variable to xml file in python? - python

I convert my dictionary to xml but I cant save that in xml file
from dict2xml import dict2xml
xml = dict2xml(my_dictionary)
print(xml)

The following assigns an xml string to your variable xml.
xml = dict2xml(my_dictionary)
You can do the string to a file by doing the following:
from dict2xml import dict2xml
xml = dict2xml(my_dictionary)
with open("my_data.xml", 'w') as f:
f.write(xml)
This will write your xml data to the file my_data.xml

You need to make a XML String and then write file-
from xml.dom.minidom import parseString
xml_str = parseString(xml).toprettyxml()
save_path_file = "myfile.xml"
with open(save_path_file, "w") as f:
f.write(xml_str)

Related

Encoding with ZipFile Python

I'm trying to generate an .xml file using the library ZipFile in python 3.9.
The content i'm trying to add is the following : "Invoice n°123456789"
import xml.etree.ElementTree as ET
from zipfile import ZipFile
def gen_zip():
# Init xml element
root = ET.Element("MyXml")
# Fill SubElement
batch = ET.SubElement(
root,
"Batch",
Id="1",
DateTime=datetime.strftime(datetime.now(), "%Y%m%d%H%M%S"),
Value = "Invoice n°123456789"
)
zf = ZipFile("myzip.zip", "w")
# Write xml to zip
zf.writestr("file.xml", ET.tostring(root))
zf.close()
return None
The problem i have is that the content of my file file.xml in myzip.zip is the following :
<MyXml>
<Batch Id="1" DateTime="20221207093739" Value="Invoice n°123456789"/>
</MyXml>
I don't know why the ° symbol becomes °. As per the doc the writestr function does utf-8 encoding and not unicode. (https://docs.python.org/3/library/zipfile.html)
I'd like the content of my .xml to be Invoice n°123456789 i/o Invoice n°123456789
Add the correct encoding as a parameter when you call tostring.
zf.writestr("file.xml", ET.tostring(root, encoding='unicode'))

Converting xml to json for Mongo db

I am currently trying to convert an xml document with approx 2k records to json to upload to Mongo DB.
I have written a python script for the conversion but when I upload it to Mongo db the collection is reading this as one document with 2k sub arrays (objects) but I am trying to get 2k documents instead. My thoughts are it could be the python code? Can anyone help.
# Program to convert an xml
# file to json file
# import json module and xmltodict
# module provided by python
import json
import xmltodict
# open the input xml file and read
# data in form of python dictionary
# using xmltodict module
with open("test.xml") as xml_file:
data_dict = xmltodict.parse(xml_file.read())
# xml_file.close()
# generate the object using json.dumps()
# corresponding to json data
json_data = json.dumps(data_dict)
# Write the json data to output
# json file
with open("data.json", "w") as json_file:
json_file.write(json_data)
# json_file.close()
I am not sure why you would expect an XML-to-JSON converter to automatically split the XML at "record" boundaries. After all, XML doesn't have a built-in concept of "records" - that's something in the semantics of your vocabulary, not in the syntax of XML.
The easiest way to split an XML file into multiple files is with a simple XSLT 2.0+ stylesheet. If you use XSLT 3.0 then you can invoke the JSON conversion at the same time.
Here is my solution.
import xmltodict
import json
import pprint
# Open xml file
with open(r"test.xml", "rb") as xml_file:
# data_dict = xmltodict.parse(xml_file.read())
dict_data = xmltodict.parse(xml_file)
output_data = dict_data["root"]["course_listing"]
json_data = json.dumps(output_data, indent=2)
print(json_data)
with open("datanew.json", "w") as json_file:
json_file.write(json_data)

Write edited xml that replaced hypen with underscore

So I am trying to write a new xml file that I edited from the original by replacing the hyphen with an underscore and then start working on that xml file for the rest of the code.
This is my code:
import xml.etree.ElementTree as ET
from lxml import etree
#attaching xml file
xmlfile = "hook_zap.xml"
tree = ET.parse(xmlfile)
root = tree.getroot()
#replace hypen with underscore within the xml
doc = etree.parse(xmlfile)
for e in doc.xpath('//*[contains(local-name(),"-")]'):
e.tag = e.tag.replace('-','_')
refracted = etree.tostring(doc, method='xml')
#create a new xml file with refracted file
refracted.write('base.xml')
#print (refracted)
And I keep getting this error:
AttributeError: 'bytes' object has no attribute 'write'
Write refracted like any other kind data into a file:
with open('base.xml', 'w') as f:
f.write(refracted.decode('utf-8'))

Parsing an xml file and creating another from the parsed object

I am trying to parse an xml file(containing bad characters) using lxml module in recover = True mode.
Below is the code snippet
from lxml import etree
f=open('test.xml')
data=f.read()
f.close()
parser = etree.XMLParser(recover=True)
x = etree.fromstring(data, parser=parser)
Now I want to create another xml file (test1.xml) from the above object (x)
Could anyone please help in this matter.
Thanks
I think this is what you are searching for
from lxml import etree
# opening the source file
with open('test.xml','r') as f:
# reading the number
data=f.read()
parser = etree.XMLParser(recover=True)
# fromstring() parses XML from a string directly into an Element
x = etree.fromstring(data, parser=parser)
# taking the content retrieved
y = etree.tostring(x, pretty_print=True).decode("utf-8")
# writing the content on the output file
with open('test1.xml','w') as f:
f.write(y)

Generate output files from template file and csv data in python

I need to generate xml files poulated with data from a csv file in python
I have two input files:
one CSV file named data.csv containing data like this:
ID YEAR PASS LOGIN HEX_LOGIN
14Z 2013 (3e?k<.P#H}l hex0914Z F303935303031345A
14Z 2014 EAeW+ZM..--r hex0914Z F303935303031345A
.......
One Template file named template.xml
<?xml version="1.0"?>
<SecurityProfile xmlns="security_profile_v1">
<year></year>
<security>
<ID></ID>
<login></login>
<hex_login></hex_login>
<pass></pass>
</security>
</SecurityProfile>
I want to get as many output files as lines in the csv data file, each output filed named YEAR_ID, with the data from the csv file in the xml fields:
Output files contentes:
Content of output file #1 named 2013_0950014z:
<?xml version="1.0"?>
<SecurityProfile xmlns="security_profile_v1">
<year>2013</year>
<security>
<ID>14Z</ID>
<login>hex0914</login>
<hex_login>F303935303031345A</hex_login>
<pass>(3e?k<.P#H}l</pass>
</security>
</SecurityProfile>
Content of output file #2 named 2014_0950014z:
<?xml version="1.0"?>
<SecurityProfile xmlns="security_profile_v1">
<year>2014</year>
<security>
<ID>14Z</ID>
<login>hex0914</login>
<hex_login>F303935303031345A</hex_login>
<pass>EAeW+ZM..--r</pass>
</security>
</SecurityProfile>
Thank you for your suggestions.
Can you make changes the template? If so, I would do the following to make this a bit simpler:
<?xml version="1.0"?>
<SecurityProfile xmlns="security_profile_v1">
<year>{year}</year>
<security>
<ID>{id}</ID>
<login>{login}</login>
<hex_login>{hex_login}</hex_login>
<pass>{pass}</pass>
</security>
</SecurityProfile>
Then, something like this would work:
import csv
input_file_name = "some_file.csv" #name/path of your csv file
template_file_name = "some_file.xml" #name/path of your xml template
output_file_name = "{}_09500{}.xml"
with open(template_file_name,"r") as template_file:
template = template_file.read()
with open(input_file_name,"r") as csv_file:
my_reader = csv.DictReader(csv_file)
for row in my_reader:
with open(output_file_name.format(row["YEAR"],row["ID"]),"w") as current_out:
current_out.write(template.format(year=row["YEAR"],
id=row["ID"],
login=row["LOGIN"],
hex_login=row["HEX_LOGIN"],
pass=row["PASS"]))
If you can't modify the template, or want to process it as XML instead of basic string manipulation, then it's a bit more involved.
EDIT:
Modified answer to use csv.DictReader rather than csv.reader.
Fixed variable names opening input CSV file and writing the output. Removed 'binary' mode file operations.
import csv
from collections import defaultdict
header = '<?xml version="1.0"?><SecurityProfile xmlns="security_profile_v1">\n'
footer = '\n</SecurityProfile>'
entry = '''<security>
<ID>{0[ID]}</ID>
<login>{0[LOGIN]}</login>
<hex_login>{0[HEX_LOGIN]}</hex_login>
<pass>{0[PASS]}</pass>
</security>'''
rows = defaultdict(list)
with open('infile.csv') as f:
reader = csv.DictReader(f, delimiter='\t')
for item in reader:
rows[reader['YEAR']].append(item)
for year,data in rows.iteritems():
with open('{}.xml'.format(year), 'w') as f:
f.write(header)
f.write('<year>{}</year>\n'.format(year))
for record in data:
f.write(entry.format(record))
f.write('\n')
f.write(footer)

Categories