Python for loop incomplete - python

I am trying to convert all csv files in a folder to XML but having issues, code as follows:
import glob
import csv
fileCount = 0
path = "csvs\*.csv"
for fname in glob.glob(path):
print(fname)
for fname in glob.glob(path):
csvFile = fname
xmlFile = "csvs\myData" + str(fileCount) + ".xml"
print (xmlFile)
print (csvFile)
print (fileCount)
fileCount +=1
csvData = csv.reader(open(csvFile))
xmlData = open(xmlFile, 'w')
xmlData.write('<?xml version="1.0" encoding="UTF-8"?>' + "\n")
# there must be only one top-level tag
xmlData.write('<userforms>' + "\n")
rowNum = 0
for row in csvData:
if rowNum == 0:
tags = row
# replace spaces w/ underscores in tag names
for i in range(len(tags)):
tags[i] = tags[i].replace(' ', ' ')
else:
xmlData.write('<userform>' + "\n")
for i in range(len(tags)):
xmlData.write('<response>' + "\n" + ' <field>' + tags[i] + '</field>' + "\n" + ' <value>' + row[i] + '</value>'+ "\n" + '</response>' + "\n")
xmlData.write('</userform>' + "\n")
rowNum +=1
#print (fileCount)
#print (xmlFile)
#print (csvFile)
xmlData.write('</userforms>' + "\n")
#xmlData.write('</csv_data>' + "\n")
xmlData.close()
There are 4 csv files in the original folder, the content of each is the same but the names are 1.csv, 2.csv, 3.csv and 4.csv. This code does generate 4 xml files but the first three are incomplete with just the xml header created.
Is there abyway to add a delay/check to a for loop to ensure it completes?
Console output is clean with only print info available:
csvs\myData0.xml
csvs\1.csv
0
csvs\myData1.xml
csvs\2.csv
1
csvs\myData2.xml
csvs\3.csv
2
csvs\myData3.xml
csvs\4.csv
3

Related

Index out of range while converting file from csv to xml

While running the following code for converting csv to xml I'm getting index out of range error.
I used the code below a small subset of file with 16 columns it works fine but when I try it on more than 30 its giving following error
Traceback (most recent call last):
File "csv2xml.py", line 40, in <module>
+ rowData[i] + '</' + tags[i] + '>' + "\n")
IndexError: list index out of range
#!/usr/bin/python
import sys
import os
import glob
delimiter = "," # "\t" "|" # delimiter used in the CSV file(s)
# the optional command-line argument maybe a CSV file or a folder
if len(sys.argv) == 2:
arg = sys.argv[1].lower()
if arg.endswith('.csv'): # if a CSV file then convert only that file
csvFiles = [arg]
else: # if a folder path then convert all CSV files in the that folder
os.chdir(arg)
csvFiles = glob.glob('*.csv')
# if no command-line argument then convert all CSV files in the current folder
elif len(sys.argv) == 1:
csvFiles = glob.glob('*.csv')
else:
os._exit(1)
for csvFileName in csvFiles:
xmlFile = csvFileName[:-4] + '.xml'
# read the CSV file as binary data in case there are non-ASCII characters
csvFile = open(csvFileName, 'rb')
csvData = csvFile.readlines()
csvFile.close()
tags = csvData.pop(0).strip().replace(' ', '_').split(delimiter)
xmlData = open(xmlFile, 'w')
xmlData.write('<?xml version="1.0" encoding="UTF-8" ?>' + "\n")
# there must be only one top-level tag
xmlData.write('<CTS>' + "\n")
for row in csvData:
rowData = row.strip().split(delimiter)
xmlData.write('<Product>' + "\n")
for i in range(len(tags)):
xmlData.write(' ' + '<' + tags[i] + '>'
+ rowData[i] + '</' + tags[i] + '>' + "\n")
xmlData.write('</Product>' + "\n")
xmlData.write('</CTS>' + "\n")
xmlData.close()
It sounds like your for loop over the lines of data should check the length of rowData like this:
tags_length = len(tags)
for row in csvData:
rowData = row.strip().split(delimiter)
xmlData.write('<Product>' + "\n")
if len(rowData) >= tags_length:
for i in range(tags_length):
xmlData.write(' ' + '<' + tags[i] + '>'
+ rowData[i] + '</' + tags[i] + '>' + "\n")
xmlData.write('</Product>' + "\n")

python script to convert csv to xml

Please help to correct the python script to get the required output
I have written below code to convert csv to xml.
In input file have column from 1 to 278. In output file need to have tag from A1 to A278,
Code :
#!/usr/bin/python
import sys
import os
import csv
if len(sys.argv) != 2:
os._exit(1)
path=sys.argv[1] # get folder as a command line argument
os.chdir(path)
csvFiles = [f for f in os.listdir('.') if f.endswith('.csv') or f.endswith('.CSV')]
for csvFile in csvFiles:
xmlFile = csvFile[:-4] + '.xml'
csvData = csv.reader(open(csvFile))
xmlData = open(xmlFile, 'w')
xmlData.write('<?xml version="1.0"?>' + "\n")
# there must be only one top-level tag
xmlData.write('<TariffRecords>' + "\n")
rowNum = 0
for row in csvData:
if rowNum == 0:
tags = Tariff
# replace spaces w/ underscores in tag names
for i in range(len(tags)):
tags[i] = tags[i].replace(' ', '_')
else:
xmlData.write('<Tariff>' + "\n")
for i in range(len(tags)):
xmlData.write(' ' + '<' + tags[i] + '>' \
+ row[i] + '</' + tags[i] + '>' + "\n")
xmlData.write('</Tariff>' + "\n")
rowNum +=1
xmlData.write('</TariffRecords>' + "\n")
xmlData.close()
Getting below error from script:
Traceback (most recent call last):
File "ctox.py", line 20, in ?
tags = Tariff
NameError: name 'Tariff' is not defined
Sample Input file.(this is a sample record in actual input file will contain 278 columns).
If input file has two or three records, same needs to be appended in one XML file.
name,Tariff Summary,Record ID No.,Operator Name,Circle (Service Area),list
Prepaid Plan Voucher,test_All calls 2p/s,TT07PMPV0188,Ta Te,Gu,
Prepaid Plan Voucher,test_All calls 3p/s,TT07PMPV0189,Ta Te,HR,
Sample output file
The above two TariffRecords, tariff will be hard coded at the beginning and end of xml file.
<TariffRecords>
<Tariff>
<A1>Prepaid Plan Voucher</A1>
<A2>test_All calls 2p/s</A2>
<A3>TT07PMPV0188</A3>
<A4>Ta Te</A4>
<A5>Gu</A5>
<A6></A6>
</Tariff>
<Tariff>
<A1>Prepaid Plan Voucher</A1>
<A2>test_All calls 3p/s</A2>
<A3>TT07PMPV0189</A3>
<A4>Ta Te</A4>
<A5>HR</A5>
<A6></A6>
</Tariff>
</TariffRecords>
First off you need to replace
tags = Tariff
with
tags = row
Secondly you want to replace the write line to not write tags name but write A1, A2 etc..
Complete code:
import sys
import os
import csv
if len(sys.argv) != 2:
os._exit(1)
path=sys.argv[1] # get folder as a command line argument
os.chdir(path)
csvFiles = [f for f in os.listdir('.') if f.endswith('.csv') or f.endswith('.CSV')]
for csvFile in csvFiles:
xmlFile = csvFile[:-4] + '.xml'
csvData = csv.reader(open(csvFile))
xmlData = open(xmlFile, 'w')
xmlData.write('<?xml version="1.0"?>' + "\n")
# there must be only one top-level tag
xmlData.write('<TariffRecords>' + "\n")
rowNum = 0
for row in csvData:
if rowNum == 0:
tags = row
# replace spaces w/ underscores in tag names
for i in range(len(tags)):
tags[i] = tags[i].replace(' ', '_')
else:
xmlData.write('<Tariff>' + "\n")
for i, index in enumerate(range(len(tags))):
xmlData.write(' ' + '<' + 'A%s' % (index+1) + '>' \
+ row[i] + '</' + 'A%s' % (index+1) + '>' + "\n")
xmlData.write('</Tariff>' + "\n")
rowNum +=1
xmlData.write('</TariffRecords>' + "\n")
xmlData.close()
Output:
<?xml version="1.0"?>
<TariffRecords>
<Tariff>
<A1>Prepaid Plan Voucher</A1>
<A2>test_All calls 2p/s</A2>
<A3>TT07PMPV0188</A3>
<A4>Ta Te</A4>
<A5>Gu</A5>
<A6></A6>
</Tariff>
<Tariff>
<A1>Prepaid Plan Voucher</A1>
<A2>test_All calls 3p/s</A2>
<A3>TT07PMPV0189</A3>
<A4>Ta Te</A4>
<A5>HR</A5>
<A6></A6>
</Tariff>
</TariffRecords>
import pandas as pd
from xml.etree import ElementTree as xml
df = pd.read_csv("file_path")
csv_data = df.values
root = xml.Element("TariffRecords")
tariff = xml.subelement("Tariff", root)
for index, data in enumarate(csv_data):
row = xml.Element("A"+str(index), tariff)
row.set(str(data))

os.walk() filename scope inside inner loop

Writing a script to help with data migration in renaming images. It seems as though when I try to access the variable filename from within the inner-for-loop, it's just printing .DS_Store
See commented lines for example:
#!/usr/bin/env python
import os
import csv
FILE_PATH = '/Users/admin/Desktop/data-migration/images/product'
COUNT = 0
with open('paths_formatted.csv') as csvfile:
reader = csv.reader(csvfile)
# Walk the tree.
for root, directories, files in os.walk(FILE_PATH):
for filename in files:
# Join the two strings in order to form the full filepath.
filePath = os.path.join(root, filename)
#print(filePath) - this results in the actual file path
for row in reader:
#print(filePath) - this results in .DS_Store
oldFilePath = row[1].strip()
displayName = row[0].strip()
colour = row[2].strip()
if " " in colour:
colour = colour.replace(" ", "-")
slashIndex = oldFilePath.rfind("/")
oldFileName = oldFilePath[slashIndex+1:]
if oldFileName == filename:
number = 1;
newFileName = displayName + "_" + colour + "-" + str(number) + ".jpg"
while os.path.exists(FILE_PATH + leadingPath + newFileName):
number = number + 1
newFileName = filePath, displayName + "_" + colour + "-" + str(number)
os.rename(newFileName)
COUNT = COUNT+1
print(COUNT)
Why would this be?
After changing my code as per the comments, to store the results in a list, now the for root, directories, files in os.walk(FILE_PATH): is not being executed.
I verified that the FILE_PATH exists and printed it to console, also that it has contents.
My new code is as follows:
#!/usr/bin/env python
import os
import csv
FILE_PATH = '/Users/admin/Desktop/data-migration/images/product'
COUNT = 0
productInfo = []
with open('paths_formatted.csv') as csvfile:
reader = csv.reader(csvfile)
for row in reader:
productInfo.append(row)
for root, directories, files in os.walk(FILE_PATH):
for filename in files:
for info in productInfo:
displayName = info[0]
oldFilePath = info[1]
colour = info[2]
slashIndex = oldFilePath.rfind("/")
oldFileName = oldFilePath[slashIndex+1:]
if " " in colour:
colour = colour.replace(" ", "-")
if oldFileName == filename:
number = 1;
newFileName = displayName + "_" + colour + "-" + str(number) + ".jpg"
while os.path.exists(FILE_PATH + leadingPath + newFileName):
number = number + 1
newFileName = filePath, displayName + "_" + colour + "-" + str(number) + ".jpg"
os.rename(newFileName)
COUNT = COUNT + 1
print(COUNT)

Python: Csv to kml with for loop

I am writing a script to take a csv file and write each line of the csv into a placemark in kml. The script works up until the second for loop and it fails to write the placemarks in the kml file. I know the script works fine up to writing the LookAt and then appears to jump over the for loop.
A little background... The csv contains 7 columns and the last column looks like this ";latitude;longitude;radius". That is why I have the first for loop to split out those three bits of information.
Here is what I have so far.
import csv
from Tkinter import *
from tkFileDialog import *
root = Tk()
root.fileName = askopenfilename( filetypes = ( ("Comma Separated Values", "*.csv"), ("All files", "*.*") ) )
print (root.fileName)
#close tkinter gui window
root.destroy()
#open the csv file
data = csv.reader(open (root.fileName), delimiter = ',')
#skip header row
next(data)
for line in data:
raw_coordinates = line[6]
#print raw_coordinates
lat_long_radius = raw_coordinates.split(';')
#Open the file to be written.
f = open('TPS_Report.kml', 'w')
#Writing the kml file.
f.write("<?xml version='1.0' encoding='UTF-8'?>\n")
f.write("<kml xmlns='http://earth.google.com/kml/2.1'>\n")
f.write("<Document>\n")
f.write(" <name>" + str("TPS_Report") + '.kml' +"</name>\n") # display name of kml file in Google Earth
#Create LookAt for start location at 39,-98 4200km eye alt, 0 tilt
print ("Creating LookAt")
f.write(" <LookAt>\n")
f.write(" <longitude>" + str("-98") + "</longitude>\n")
f.write(" <latitude>" + str("39") + "</latitude>\n")
f.write(" <range>" + str("4200000") + "</range>\n")
f.write(" <tilt>" + str("0") + "</tilt>\n")
f.write(" </LookAt>\n")
for row in data:
f.write(" <Placemark>\n")
f.write(" <name>" + lat_long_radius[3] + " meters" + "</name>\n")
f.write(" <description>" + str(row[0]) + "\n")
f.write(" " + str(row[1]) + "\n")
f.write(" " + str(row[2]) + "\n")
f.write(" " + str(row[3]) + "\n")
f.write(" " + str(row[4]) + "\n")
f.write(" " + str(row[5]) + "\n")
f.write(" " + str(row[6]) + "</description>\n")
f.write(" <gx:balloonVisibility>" + str("1") + "</gx:balloonVisibility>\n")
f.write(" <Point>\n")
f.write(" <coordinates>" + lat_long_radius[2] + "," + lat_long_radius[1] + "," + str(0) + "</coordinates>\n")
f.write(" </Point>\n")
f.write(" </Placemark>\n")
f.write("</Document>\n")
f.write("</kml>\n")
f.close()
print ("File Created. ")
print ("Press ENTER to exit. ")
raw_input()
Any help would be appreciated
Your data variable is not a list, it is an iterator that consumes your file. Once you iterate over it, you have reached the end of the file, and any attempt to iterate over it again will fail (as there is nothing more to read).
If you really need to iterate over your file twice, save the contents somewhere (in a list, for example). Or restructure your program so you do everything you need in only one pass.
Your code has also other problems, such as lat_long_radius, which is the same value for all points.

Converting CSV to XML

I'm currently trying to make the input file for a hydrologic model (HBV-light) compatible with external calibration software (PEST). HBV-light requires that it's input files be in XML format, while PEST can only read text files. My issue relates to writing a script that will automatically convert a parameter set written by PEST (in CSV format) to an XML file that can be read by HBV-light.
Here's a short example of a text file that can be written by PEST:
W,X,Y,Z
1,2,3,4
and this is how I'm attempting to organize the XML file:
<Parameters>
<GroupA>
<W>1</W>
<X>2</X>
</GroupA>
<GroupB>
<Y>3</Y>
<Z>4</Z>
</GroupB>
</Parameters>
I don't have very much programming experience whatsoever, but here is a python code that I wrote so far:
import csv
csvFile = 'myCSVfile.csv'
xmlFile = 'myXMLfile.xml'
csvData = csv.reader(open(csvFile))
xmlData = open(xmlFile, 'w')
xmlData.write('<?xml version="1.0" encoding="utf-8"?>' + "\n")
# there must be only one top-level tag
xmlData.write('<Catchment xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema">' + "\n")
xmlData.write('<CatchmentParamters>' + "\n")
rowNum = 0
for row in csvData:
if rowNum == 0:
tags = row
# replace spaces w/ underscores in tag names
for i in range(0, 2):
tags[i] = tags[i].replace(' ', '_')
else:
for i in range(0, 2):
xmlData.write(' ' + '<' + tags[i] + '>' \
+ row[i] + '</' + tags[i] + '>' + "\n")
rowNum +=1
xmlData.write('</CatchmentParameters>' + "\n")
xmlData.write('<VegetationZone>' + "\n")
xmlData.write('<VegetationZoneParameters>' + "\n")
rowNum = 0
for row in csvData:
if rowNum == 0:
tags = row
# replace spaces w/ underscores in tag names
for i in range(3, 5):
tags[i] = tags[i].replace(' ', '_')
else:
for i in range(3, 5):
xmlData.write(' ' + '<' + tags[i] + '>' \
+ row[i] + '</' + tags[i] + '>' + "\n")
rowNum +=1
xmlData.write('</VegetationZoneParameters>' + "\n")
xmlData.write('</VegetationZone>' + "\n")
xmlData.write('</Catchment>' + "\n")
xmlData.close()
I can get the Group A (or CathmentParameters specifically) to be written, but the Group B section is NOT being written. Not sure what to do!
I think that the loop is wrong.
Try if this works for you
#! /usr/bin/env python
# coding= utf-8
import csv
csvFile = 'myCSVfile.csv'
xmlFile = 'myXMLfile.xml'
csvData = csv.reader(open(csvFile))
xmlData = open(xmlFile, 'w')
xmlData.write('<?xml version="1.0" encoding="utf-8"?>' + "\n")
# there must be only one top-level tag
xmlData.write('<Catchment xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema">' + "\n")
xmlData.write('<CatchmentParamters>' + "\n")
rowNum = 0
for row in csvData:
if rowNum == 0:
tags = row
# replace spaces w/ underscores in tag names
for i in range(0, 2):
tags[i] = tags[i].replace(' ', '_')
else:
for i in range(0, 2):
xmlData.write(' ' + '<' + tags[i] + '>' \
+ row[i] + '</' + tags[i] + '>' + "\n")
xmlData.write('</CatchmentParameters>' + "\n")
xmlData.write('<VegetationZone>' + "\n")
xmlData.write('<VegetationZoneParameters>' + "\n")
for i in range(2, 4):
xmlData.write(' ' + '<' + tags[i] + '>' \
+ row[i] + '</' + tags[i] + '>' + "\n")
xmlData.write('</VegetationZoneParameters>' + "\n")
xmlData.write('</VegetationZone>' + "\n")
rowNum +=1
xmlData.write('</Catchment>' + "\n")
xmlData.close()
I think the issue is in your range definition in the second part... range(3, 5) means elements 4 and 5, what you want is probably range(2,4) meaning elements 3 and 4.
The problem is that you iterate over the contents of the csv file twice - it appears that you need to "rewind" after your first loop. There is also a minor indexing issue, with the second range needing to be range(2,4) and not range(3,5) as was already pointed out.
I created a piece of code that appears to work. It can probably be improved upon by people who understand Python properly. Note - I added a couple of print statements to convince myself I understood what is happening. If you don't open the csvFile a second time (at "starting the second for loop"), then no rows get printed. That's your clue that this is the problem.
import csv
csvFile = 'myCSVfile.csv'
xmlFile = 'myXMLfile.xml'
csvData = csv.reader(open(csvFile))
xmlData = open(xmlFile, 'w')
xmlData.write('<?xml version="1.0" encoding="utf-8"?>' + "\n")
# there must be only one top-level tag
xmlData.write('<Catchment xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema">' + "\n")
xmlData.write('<CatchmentParamters>' + "\n")
rowNum = 0
for row in csvData:
print "row is ", row
if rowNum == 0:
tags = row
# replace spaces w/ underscores in tag names
for i in range(0, 2):
tags[i] = tags[i].replace(' ', '_')
else:
for i in range(0, 2):
xmlData.write(' ' + '<' + tags[i] + '>' \
+ row[i] + '</' + tags[i] + '>' + "\n")
rowNum +=1
xmlData.write('</CatchmentParameters>' + "\n")
xmlData.write('<VegetationZone>' + "\n")
xmlData.write('<VegetationZoneParameters>' + "\n")
rowNum = 0
print "starting the second for loop"
csvData = csv.reader(open(csvFile))
for row in csvData:
print "row is now ", row
if rowNum == 0:
tags = row
# replace spaces w/ underscores in tag names
for i in range(2, 4):
tags[i] = tags[i].replace(' ', '_')
else:
for i in range(2, 4):
xmlData.write(' ' + '<' + tags[i] + '>' \
+ row[i] + '</' + tags[i] + '>' + "\n")
rowNum +=1
xmlData.write('</VegetationZoneParameters>' + "\n")
xmlData.write('</VegetationZone>' + "\n")
xmlData.write('</Catchment>' + "\n")
xmlData.close()
Using the above with the little test file you had given resulted in the following XML file:
<?xml version="1.0" encoding="utf-8"?>
<Catchment xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema">
<CatchmentParamters>
<W>1</W>
<X>2</X>
</CatchmentParameters>
<VegetationZone>
<VegetationZoneParameters>
<Y>3</Y>
<Z>4</Z>
</VegetationZoneParameters>
</VegetationZone>
</Catchment>
Problem solved?

Categories