python script to convert csv to xml

python script to convert csv to xml - python

Please help to correct the python script to get the required output
I have written below code to convert csv to xml.
In input file have column from 1 to 278. In output file need to have tag from A1 to A278,
Code :
#!/usr/bin/python
import sys
import os
import csv
if len(sys.argv) != 2:
os._exit(1)
path=sys.argv[1] # get folder as a command line argument
os.chdir(path)
csvFiles = [f for f in os.listdir('.') if f.endswith('.csv') or f.endswith('.CSV')]
for csvFile in csvFiles:
xmlFile = csvFile[:-4] + '.xml'
csvData = csv.reader(open(csvFile))
xmlData = open(xmlFile, 'w')
xmlData.write('<?xml version="1.0"?>' + "\n")
# there must be only one top-level tag
xmlData.write('<TariffRecords>' + "\n")
rowNum = 0
for row in csvData:
if rowNum == 0:
tags = Tariff
# replace spaces w/ underscores in tag names
for i in range(len(tags)):
tags[i] = tags[i].replace(' ', '_')
else:
xmlData.write('<Tariff>' + "\n")
for i in range(len(tags)):
xmlData.write(' ' + '<' + tags[i] + '>' \
+ row[i] + '</' + tags[i] + '>' + "\n")
xmlData.write('</Tariff>' + "\n")
rowNum +=1
xmlData.write('</TariffRecords>' + "\n")
xmlData.close()
Getting below error from script:
Traceback (most recent call last):
File "ctox.py", line 20, in ?
tags = Tariff
NameError: name 'Tariff' is not defined
Sample Input file.(this is a sample record in actual input file will contain 278 columns).
If input file has two or three records, same needs to be appended in one XML file.
name,Tariff Summary,Record ID No.,Operator Name,Circle (Service Area),list
Prepaid Plan Voucher,test_All calls 2p/s,TT07PMPV0188,Ta Te,Gu,
Prepaid Plan Voucher,test_All calls 3p/s,TT07PMPV0189,Ta Te,HR,
Sample output file
The above two TariffRecords, tariff will be hard coded at the beginning and end of xml file.
<TariffRecords>
<Tariff>
<A1>Prepaid Plan Voucher</A1>
<A2>test_All calls 2p/s</A2>
<A3>TT07PMPV0188</A3>
<A4>Ta Te</A4>
<A5>Gu</A5>
<A6></A6>
</Tariff>
<Tariff>
<A1>Prepaid Plan Voucher</A1>
<A2>test_All calls 3p/s</A2>
<A3>TT07PMPV0189</A3>
<A4>Ta Te</A4>
<A5>HR</A5>
<A6></A6>
</Tariff>
</TariffRecords>

First off you need to replace
tags = Tariff
with
tags = row
Secondly you want to replace the write line to not write tags name but write A1, A2 etc..
Complete code:
import sys
import os
import csv
if len(sys.argv) != 2:
os._exit(1)
path=sys.argv[1] # get folder as a command line argument
os.chdir(path)
csvFiles = [f for f in os.listdir('.') if f.endswith('.csv') or f.endswith('.CSV')]
for csvFile in csvFiles:
xmlFile = csvFile[:-4] + '.xml'
csvData = csv.reader(open(csvFile))
xmlData = open(xmlFile, 'w')
xmlData.write('<?xml version="1.0"?>' + "\n")
# there must be only one top-level tag
xmlData.write('<TariffRecords>' + "\n")
rowNum = 0
for row in csvData:
if rowNum == 0:
tags = row
# replace spaces w/ underscores in tag names
for i in range(len(tags)):
tags[i] = tags[i].replace(' ', '_')
else:
xmlData.write('<Tariff>' + "\n")
for i, index in enumerate(range(len(tags))):
xmlData.write(' ' + '<' + 'A%s' % (index+1) + '>' \
+ row[i] + '</' + 'A%s' % (index+1) + '>' + "\n")
xmlData.write('</Tariff>' + "\n")
rowNum +=1
xmlData.write('</TariffRecords>' + "\n")
xmlData.close()
Output:
<?xml version="1.0"?>
<TariffRecords>
<Tariff>
<A1>Prepaid Plan Voucher</A1>
<A2>test_All calls 2p/s</A2>
<A3>TT07PMPV0188</A3>
<A4>Ta Te</A4>
<A5>Gu</A5>
<A6></A6>
</Tariff>
<Tariff>
<A1>Prepaid Plan Voucher</A1>
<A2>test_All calls 3p/s</A2>
<A3>TT07PMPV0189</A3>
<A4>Ta Te</A4>
<A5>HR</A5>
<A6></A6>
</Tariff>
</TariffRecords>

import pandas as pd
from xml.etree import ElementTree as xml
df = pd.read_csv("file_path")
csv_data = df.values
root = xml.Element("TariffRecords")
tariff = xml.subelement("Tariff", root)
for index, data in enumarate(csv_data):
row = xml.Element("A"+str(index), tariff)
row.set(str(data))

Related

Python for loop incomplete

I am trying to convert all csv files in a folder to XML but having issues, code as follows:
import glob
import csv
fileCount = 0
path = "csvs\*.csv"
for fname in glob.glob(path):
print(fname)
for fname in glob.glob(path):
csvFile = fname
xmlFile = "csvs\myData" + str(fileCount) + ".xml"
print (xmlFile)
print (csvFile)
print (fileCount)
fileCount +=1
csvData = csv.reader(open(csvFile))
xmlData = open(xmlFile, 'w')
xmlData.write('<?xml version="1.0" encoding="UTF-8"?>' + "\n")
# there must be only one top-level tag
xmlData.write('<userforms>' + "\n")
rowNum = 0
for row in csvData:
if rowNum == 0:
tags = row
# replace spaces w/ underscores in tag names
for i in range(len(tags)):
tags[i] = tags[i].replace(' ', ' ')
else:
xmlData.write('<userform>' + "\n")
for i in range(len(tags)):
xmlData.write('<response>' + "\n" + ' <field>' + tags[i] + '</field>' + "\n" + ' <value>' + row[i] + '</value>'+ "\n" + '</response>' + "\n")
xmlData.write('</userform>' + "\n")
rowNum +=1
#print (fileCount)
#print (xmlFile)
#print (csvFile)
xmlData.write('</userforms>' + "\n")
#xmlData.write('</csv_data>' + "\n")
xmlData.close()
There are 4 csv files in the original folder, the content of each is the same but the names are 1.csv, 2.csv, 3.csv and 4.csv. This code does generate 4 xml files but the first three are incomplete with just the xml header created.
Is there abyway to add a delay/check to a for loop to ensure it completes?
Console output is clean with only print info available:
csvs\myData0.xml
csvs\1.csv
0
csvs\myData1.xml
csvs\2.csv
1
csvs\myData2.xml
csvs\3.csv
2
csvs\myData3.xml
csvs\4.csv
3

Index out of range while converting file from csv to xml

While running the following code for converting csv to xml I'm getting index out of range error.
I used the code below a small subset of file with 16 columns it works fine but when I try it on more than 30 its giving following error
Traceback (most recent call last):
File "csv2xml.py", line 40, in <module>
+ rowData[i] + '</' + tags[i] + '>' + "\n")
IndexError: list index out of range
#!/usr/bin/python
import sys
import os
import glob
delimiter = "," # "\t" "|" # delimiter used in the CSV file(s)
# the optional command-line argument maybe a CSV file or a folder
if len(sys.argv) == 2:
arg = sys.argv[1].lower()
if arg.endswith('.csv'): # if a CSV file then convert only that file
csvFiles = [arg]
else: # if a folder path then convert all CSV files in the that folder
os.chdir(arg)
csvFiles = glob.glob('*.csv')
# if no command-line argument then convert all CSV files in the current folder
elif len(sys.argv) == 1:
csvFiles = glob.glob('*.csv')
else:
os._exit(1)
for csvFileName in csvFiles:
xmlFile = csvFileName[:-4] + '.xml'
# read the CSV file as binary data in case there are non-ASCII characters
csvFile = open(csvFileName, 'rb')
csvData = csvFile.readlines()
csvFile.close()
tags = csvData.pop(0).strip().replace(' ', '_').split(delimiter)
xmlData = open(xmlFile, 'w')
xmlData.write('<?xml version="1.0" encoding="UTF-8" ?>' + "\n")
# there must be only one top-level tag
xmlData.write('<CTS>' + "\n")
for row in csvData:
rowData = row.strip().split(delimiter)
xmlData.write('<Product>' + "\n")
for i in range(len(tags)):
xmlData.write(' ' + '<' + tags[i] + '>'
+ rowData[i] + '</' + tags[i] + '>' + "\n")
xmlData.write('</Product>' + "\n")
xmlData.write('</CTS>' + "\n")
xmlData.close()

It sounds like your for loop over the lines of data should check the length of rowData like this:
tags_length = len(tags)
for row in csvData:
rowData = row.strip().split(delimiter)
xmlData.write('<Product>' + "\n")
if len(rowData) >= tags_length:
for i in range(tags_length):
xmlData.write(' ' + '<' + tags[i] + '>'
+ rowData[i] + '</' + tags[i] + '>' + "\n")
xmlData.write('</Product>' + "\n")

Why does my Python XML parser break after the first file?

I am working on a Python (3) XML parser that should extract the text content of specific nodes from every xml file within a folder. Then, the script should write the collected data into a tab-separated text file. So far, all the functions seem to be working. The script returns all the information that I want from the first file, but it always breaks, I believe, when it starts to parse the second file.
When it breaks, it returns "TypeError: 'str' object is not callable." I've checked the second file and found that the functions work just as well on that as the first file when I remove the first file from the folder. I'm very new to Python/XML. Any advice, help, or useful links would be greatly appreciated. Thanks!
import xml.etree.ElementTree as ET
import re
import glob
import csv
import sys
content_file = open('WWP Project/WWP_texts.txt','wt')
quotes_file = open('WWP Project/WWP_quotes.txt', 'wt')
list_of_files = glob.glob("../../../Documents/WWPtextbase/distribution/*.xml")
ns = {'wwp':'http://www.wwp.northeastern.edu/ns/textbase'}
def content(tree):
lines = ''.join(ET.tostring(tree.getroot(),encoding='unicode',method='text')).replace('\n',' ').replace('\t',' ').strip()
clean_lines = re.sub(' +',' ', lines)
return clean_lines.lower()
def quotes(tree):
quotes_list = []
for node in tree.findall('.//wwp:quote', namespaces=ns):
quote = ET.tostring(node,encoding='unicode',method='text')
clean_quote = re.sub(' +',' ', quote)
quotes_list.append(clean_quote)
return ' '.join(str(v) for v in quotes_list).replace('\t','').replace('\n','').lower()
def pid(tree):
for node in tree.findall('.//wwp:sourceDesc//wwp:author/wwp:persName[1]', namespaces=ns):
pid = node.attrib.get('ref')
return pid.replace('personography.xml#','') # will need to replace 'p:'
def trid(tree): # this function will eventually need to call OT (.//wwp:publicationStmt//wwp:idno)
for node in tree.findall('.//wwp:sourceDesc',namespaces=ns):
trid = node.attrib.get('n')
return trid
content_file.write('pid' + '\t' + 'trid' + '\t' +'text' + '\n')
quotes_file.write('pid' + '\t' + 'trid' + '\t' + 'quotes' + '\n')
for file_name in list_of_files:
file = open(file_name, 'rt')
tree = ET.parse(file)
file.close()
pid = pid(tree)
trid = trid(tree)
content = content(tree)
quotes = quotes(tree)
content_file.write(pid + '\t' + trid + '\t' + content + '\n')
quotes_file.write(pid + '\t' + trid + '\t' + quotes + '\n')
content_file.close()
quotes_file.close()

You are overwriting your function calls with the values they returned. changing the function names should fix it.
import xml.etree.ElementTree as ET
import re
import glob
import csv
import sys
content_file = open('WWP Project/WWP_texts.txt','wt')
quotes_file = open('WWP Project/WWP_quotes.txt', 'wt')
list_of_files = glob.glob("../../../Documents/WWPtextbase/distribution/*.xml")
ns = {'wwp':'http://www.wwp.northeastern.edu/ns/textbase'}
def get_content(tree):
lines = ''.join(ET.tostring(tree.getroot(),encoding='unicode',method='text')).replace('\n',' ').replace('\t',' ').strip()
clean_lines = re.sub(' +',' ', lines)
return clean_lines.lower()
def get_quotes(tree):
quotes_list = []
for node in tree.findall('.//wwp:quote', namespaces=ns):
quote = ET.tostring(node,encoding='unicode',method='text')
clean_quote = re.sub(' +',' ', quote)
quotes_list.append(clean_quote)
return ' '.join(str(v) for v in quotes_list).replace('\t','').replace('\n','').lower()
def get_pid(tree):
for node in tree.findall('.//wwp:sourceDesc//wwp:author/wwp:persName[1]', namespaces=ns):
pid = node.attrib.get('ref')
return pid.replace('personography.xml#','') # will need to replace 'p:'
def get_trid(tree): # this function will eventually need to call OT (.//wwp:publicationStmt//wwp:idno)
for node in tree.findall('.//wwp:sourceDesc',namespaces=ns):
trid = node.attrib.get('n')
return trid
content_file.write('pid' + '\t' + 'trid' + '\t' +'text' + '\n')
quotes_file.write('pid' + '\t' + 'trid' + '\t' + 'quotes' + '\n')
for file_name in list_of_files:
file = open(file_name, 'rt')
tree = ET.parse(file)
file.close()
pid = get_pid(tree)
trid = get_trid(tree)
content = get_content(tree)
quotes = get_quotes(tree)
content_file.write(pid + '\t' + trid + '\t' + content + '\n')
quotes_file.write(pid + '\t' + trid + '\t' + quotes + '\n')
content_file.close()
quotes_file.close()

Python's csv reader keep reading the same file

i'm having a problem with the python's csv reader. The problem is that i want to open and read different csv files, but he keeps on reading always the same one.
from csv import reader
alphabet = ["a", "b", "c"]
for letter in alphabet:
csv_file = open('/home/desktop/csv/' + letter + '.csv', 'r')
csv_data = reader(csv_file)
The problem is that he seems to open the other files, but he keep on reading always the first file.
Is there a way to "clean" the reader or make him read another file? I even tried to close the csv file, but it didn't work.
The full code is this
from csv import reader
#Orario
orario_csv_file = '/home/andrea/Scrivania/orario.csv'
orario_csv = open(orario_csv_file)
orario_data = reader(orario_csv)
orario = []
#Corsi
corsi = ["EDILIZIA", "EDILE-ARCHIT", "ELETTRONICA", "TECNOLOGIE_DI_INTERNET", "INFORMATICA", "GESTIONALE", "ENERGETICA", "MECCANICA", "CIVILE_ED_AMBIENTALE", "MEDICA", "ENGINEERING_SCIENCES"]
giorni = ["Lun", "Mar", "Mer", "Gio", "Ven"]
for row in orario_data:
orario.append(row)
for corso in corsi:
nome_corso_file = '/home/andrea/Scrivania/xml/' + corso + '.xml'
nome_corso_xml = open(nome_corso_file, 'wt')
nome_corso_xml.write('<?xml version="1.0"?>' + "\n")
nome_corso_xml.write('<orario>' + "\n")
nome_csv = corso + '_csv'
nome_csv = '/home/andrea/Scrivania/csv/' + corso + '.csv'
nome_corso_csv = open(nome_csv, 'rt')
corso_data = reader(nome_corso_csv)
nome_corso_xml.write(' <corso name="' + corso + '">' + "\n")
for a in range(0, 3):
nome_corso_xml.write(' <anno num="' + str(a+1) + '">' + "\n")
for j in range(1, 6):
nome_corso_xml.write(' <giorno name="' + orario[2][j] + '">' + "\n")
for i in range(3, 12):
lez = orario[i + a*12][j]
if lez == "":
nome_corso_xml.write(' <lezione>' + "-" + '</lezione>' + "\n")
else:
for riga in corso_data:
if riga[0] == lez:
if riga[2] == "":
nome_corso_xml.write(' <lezione name="' + lez + '">' + riga[1] + '</lezione>' + "\n")
else:
for g in range(0, len(riga)):
if riga[g].lower() == orario[2][j].lower():
nome_corso_xml.write(' <lezione name="' + lez + '">' + riga[g+1] + '</lezione>' + "\n")
nome_corso_csv.seek(0)
nome_corso_xml.write(' </giorno>' + "\n")
nome_corso_xml.write(' </anno>' + "\n")
nome_corso_xml.write(' </corso>' + "\n")
nome_corso_xml.write('</orario>' + "\n")
nome_corso_xml.close()
He open the "EDILIZIA.csv" and compile the "EDILIZIA.xml", then he should open the "EDILE-ARCHIT.csv" and compile its xml, but when he read, he keeps on reading from "EDILIZIA.csv"
Here's the .csv files that you need.
http://pastebin.com/kJhL8HpK
If you try to make it read first EDILIZIA.csv and then EDILE-ARCHIT.csv he'll keep on using always the EDILIZIA.csv to compile the xml, but he should firt open EDILIZIA.csv, compile the EDILIZIA.xml, then read the EDILE-ARCHIT.csv and compile the EDILE-ARCHIT.xml.
If you take a look at the final xmls, you'll see that the EDILE-ARCHIT.xml will only display the common subjects of EDILIZIA.csv and EDILE-ARCHIT.csv

It took a long time to figure out what you are doing here. To tell the truth your code is a mess - there are many unused variables and lines that make no sense at all. Anyway, your code reads the appropriate csv file each time, thus the error is not where you thought it was.
If I am right, orario.csv contains the timetable of each course (stored in corsi list) for three semesters or years, and the corso.csv files contain the room where subjects are held. So you want to merge the information into an XML file.
You only forgot one thing: to proceed in orario.csv. Your code wants to merge the very first three anno with the current corso. To fix it, you have to make two changes.
First in this for loop header:
for corso in corsi:
Modify to:
for num, corso in enumerate(corsi):
And when you assign lez:
lez = orario[i + a*12][j]
Modify to:
lez = orario[i + a*12*(num+1)][j]
Now it should work.
This code produces exactly the same result, but it uses Python's XML module to build the output file:
from csv import reader
import xml.etree.cElementTree as ET
import xml.dom.minidom as DOM
corsi = ["EDILIZIA", "EDILE-ARCHIT", "ELETTRONICA", "TECNOLOGIE_DI_INTERNET", "INFORMATICA", "GESTIONALE", "ENERGETICA", "MECCANICA", "CIVILE_ED_AMBIENTALE", "MEDICA", "ENGINEERING_SCIENCES"]
with open('orario.csv', 'r') as orario_csv:
orario = reader(orario_csv)
orario_data = [ row for row in orario ]
for num, corso in enumerate(corsi):
with open(corso + '.csv', 'r') as corso_csv:
corso_raw = reader(corso_csv)
corso_data = [ row for row in corso_raw ]
root_elem = ET.Element('orario')
corso_elem = ET.SubElement(root_elem, 'corso')
corso_elem.set('name', corso)
for anno in range(0, 3):
anno_elem = ET.SubElement(corso_elem, 'anno')
anno_elem.set('num', str(anno + 1))
for giorno in range(1, 6):
giorno_elem = ET.SubElement(anno_elem, 'giorno')
giorno_elem.set('name', orario_data[2][giorno])
for lezione in range(3, 12):
lez = orario_data[lezione + anno * 12 * (num + 1)][giorno]
if lez == '':
lezione_elem = ET.SubElement(giorno_elem, 'lezione')
lezione_elem.text = '-'
else:
for riga in corso_data:
if riga[0] == lez:
if riga[2] == '':
lezione_elem = ET.SubElement(giorno_elem, 'lezione')
lezione_elem.set('name', lez)
lezione_elem.text = riga[1]
else:
for g in range(0, len(riga)):
if riga[g].lower() == orario_data[2][giorno].lower():
lezione_elem = ET.SubElement(giorno_elem, 'lezione')
lezione_elem.set('name', lez)
lezione_elem.text = riga[g + 1]
with open(corso + '_new.xml', 'w') as corso_xml:
xml_data = DOM.parseString(ET.tostring(root_elem, method = 'xml')).toprettyxml(indent = ' ')
corso_xml.write(xml_data)
Cheers.

I think I may have spotted the cause of your problem.
The second item in your corsi list ends with a full stop. This means that you will be looking for the file "EDILE-ARCHIT..csv", which is almost does not exist. When you try and open the file, the open() call will throw an exception, and your program will terminate.
Try removing the trailing full stop, and running it again.

Converting CSV to XML

I'm currently trying to make the input file for a hydrologic model (HBV-light) compatible with external calibration software (PEST). HBV-light requires that it's input files be in XML format, while PEST can only read text files. My issue relates to writing a script that will automatically convert a parameter set written by PEST (in CSV format) to an XML file that can be read by HBV-light.
Here's a short example of a text file that can be written by PEST:
W,X,Y,Z
1,2,3,4
and this is how I'm attempting to organize the XML file:
<Parameters>
<GroupA>
<W>1</W>
<X>2</X>
</GroupA>
<GroupB>
<Y>3</Y>
<Z>4</Z>
</GroupB>
</Parameters>
I don't have very much programming experience whatsoever, but here is a python code that I wrote so far:
import csv
csvFile = 'myCSVfile.csv'
xmlFile = 'myXMLfile.xml'
csvData = csv.reader(open(csvFile))
xmlData = open(xmlFile, 'w')
xmlData.write('<?xml version="1.0" encoding="utf-8"?>' + "\n")
# there must be only one top-level tag
xmlData.write('<Catchment xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema">' + "\n")
xmlData.write('<CatchmentParamters>' + "\n")
rowNum = 0
for row in csvData:
if rowNum == 0:
tags = row
# replace spaces w/ underscores in tag names
for i in range(0, 2):
tags[i] = tags[i].replace(' ', '_')
else:
for i in range(0, 2):
xmlData.write(' ' + '<' + tags[i] + '>' \
+ row[i] + '</' + tags[i] + '>' + "\n")
rowNum +=1
xmlData.write('</CatchmentParameters>' + "\n")
xmlData.write('<VegetationZone>' + "\n")
xmlData.write('<VegetationZoneParameters>' + "\n")
rowNum = 0
for row in csvData:
if rowNum == 0:
tags = row
# replace spaces w/ underscores in tag names
for i in range(3, 5):
tags[i] = tags[i].replace(' ', '_')
else:
for i in range(3, 5):
xmlData.write(' ' + '<' + tags[i] + '>' \
+ row[i] + '</' + tags[i] + '>' + "\n")
rowNum +=1
xmlData.write('</VegetationZoneParameters>' + "\n")
xmlData.write('</VegetationZone>' + "\n")
xmlData.write('</Catchment>' + "\n")
xmlData.close()
I can get the Group A (or CathmentParameters specifically) to be written, but the Group B section is NOT being written. Not sure what to do!

I think that the loop is wrong.
Try if this works for you
#! /usr/bin/env python
# coding= utf-8
import csv
csvFile = 'myCSVfile.csv'
xmlFile = 'myXMLfile.xml'
csvData = csv.reader(open(csvFile))
xmlData = open(xmlFile, 'w')
xmlData.write('<?xml version="1.0" encoding="utf-8"?>' + "\n")
# there must be only one top-level tag
xmlData.write('<Catchment xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema">' + "\n")
xmlData.write('<CatchmentParamters>' + "\n")
rowNum = 0
for row in csvData:
if rowNum == 0:
tags = row
# replace spaces w/ underscores in tag names
for i in range(0, 2):
tags[i] = tags[i].replace(' ', '_')
else:
for i in range(0, 2):
xmlData.write(' ' + '<' + tags[i] + '>' \
+ row[i] + '</' + tags[i] + '>' + "\n")
xmlData.write('</CatchmentParameters>' + "\n")
xmlData.write('<VegetationZone>' + "\n")
xmlData.write('<VegetationZoneParameters>' + "\n")
for i in range(2, 4):
xmlData.write(' ' + '<' + tags[i] + '>' \
+ row[i] + '</' + tags[i] + '>' + "\n")
xmlData.write('</VegetationZoneParameters>' + "\n")
xmlData.write('</VegetationZone>' + "\n")
rowNum +=1
xmlData.write('</Catchment>' + "\n")
xmlData.close()

I think the issue is in your range definition in the second part... range(3, 5) means elements 4 and 5, what you want is probably range(2,4) meaning elements 3 and 4.

The problem is that you iterate over the contents of the csv file twice - it appears that you need to "rewind" after your first loop. There is also a minor indexing issue, with the second range needing to be range(2,4) and not range(3,5) as was already pointed out.
I created a piece of code that appears to work. It can probably be improved upon by people who understand Python properly. Note - I added a couple of print statements to convince myself I understood what is happening. If you don't open the csvFile a second time (at "starting the second for loop"), then no rows get printed. That's your clue that this is the problem.
import csv
csvFile = 'myCSVfile.csv'
xmlFile = 'myXMLfile.xml'
csvData = csv.reader(open(csvFile))
xmlData = open(xmlFile, 'w')
xmlData.write('<?xml version="1.0" encoding="utf-8"?>' + "\n")
# there must be only one top-level tag
xmlData.write('<Catchment xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema">' + "\n")
xmlData.write('<CatchmentParamters>' + "\n")
rowNum = 0
for row in csvData:
print "row is ", row
if rowNum == 0:
tags = row
# replace spaces w/ underscores in tag names
for i in range(0, 2):
tags[i] = tags[i].replace(' ', '_')
else:
for i in range(0, 2):
xmlData.write(' ' + '<' + tags[i] + '>' \
+ row[i] + '</' + tags[i] + '>' + "\n")
rowNum +=1
xmlData.write('</CatchmentParameters>' + "\n")
xmlData.write('<VegetationZone>' + "\n")
xmlData.write('<VegetationZoneParameters>' + "\n")
rowNum = 0
print "starting the second for loop"
csvData = csv.reader(open(csvFile))
for row in csvData:
print "row is now ", row
if rowNum == 0:
tags = row
# replace spaces w/ underscores in tag names
for i in range(2, 4):
tags[i] = tags[i].replace(' ', '_')
else:
for i in range(2, 4):
xmlData.write(' ' + '<' + tags[i] + '>' \
+ row[i] + '</' + tags[i] + '>' + "\n")
rowNum +=1
xmlData.write('</VegetationZoneParameters>' + "\n")
xmlData.write('</VegetationZone>' + "\n")
xmlData.write('</Catchment>' + "\n")
xmlData.close()
Using the above with the little test file you had given resulted in the following XML file:
<?xml version="1.0" encoding="utf-8"?>
<Catchment xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema">
<CatchmentParamters>
<W>1</W>
<X>2</X>
</CatchmentParameters>
<VegetationZone>
<VegetationZoneParameters>
<Y>3</Y>
<Z>4</Z>
</VegetationZoneParameters>
</VegetationZone>
</Catchment>
Problem solved?

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

python script to convert csv to xml - python

import pandas as pd from xml.etree import ElementTree as xml df = pd.read_csv("file_path") csv_data = df.values root = xml.Element("TariffRecords") tariff = xml.subelement("Tariff", root) for index, data in enumarate(csv_data): row = xml.Element("A"+str(index), tariff) row.set(str(data))

Related

Python for loop incomplete

Index out of range while converting file from csv to xml

Why does my Python XML parser break after the first file?

Python's csv reader keep reading the same file

Converting CSV to XML

Categories

Resources