I am trying to do a relatively simple parse of a csv file, and I don't understand why the csv module is not working. Here is my code:
import csv
def getFromCSV(fileName):
with open(fileName, 'r') as f:
reader = csv.reader(f)
data = list(reader)
return data
def append_row(fileName, my_list):
with open(fileName, 'a') as output:
writer = csv.writer(output)
writer.writerow(my_list)
data = getFromCSV('dh_internal_all.csv')
for row in data:
if '25252' not in row:
print(row)
append_row('parsed.csv',[row])
This returns:
dh-dfbhv2l:Documents jwr38$ python3 remove_bad_data.py
Traceback (most recent call last):
File "remove_bad_data.py", line 13, in <module>
data = getFromCSV('dh_internal_all.csv')
File "remove_bad_data.py", line 3, in getFromCSV
reader = csv.reader(f)
NameError: name 'csv' is not defined
Thank you in advance for any tips.
EDIT: when I run python3 in terminal, then import csv, and then csv, it seems to recognize it, it returns:
<module 'csv' from '/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.7/lib/python3.7/csv.py'>
You pasted the wrong code. In your traceback, the faulting line is 3, but in this code, it's 5 - the two missing lines are probably the "import csv" lines.
Related
I am in the process of doing a conversion of JSON to XML using Python.
I'm giving a presentation of how by starting with one file, CSV, you can convert it through multiple formats in a chain. So, CSV to JSON, that JSON to XML, XML to the next file type in the chain, etc, back to CSV.
I obtained a public domain CSV file from Kaggle (https://www.kaggle.com/canggih/anime-data-score-staff-synopsis-and-genre), then converted it to JSON.
From JSON, I am trying to convert to XML and write to an outfile.
I converted the CSV to JSON using this (no formatting, just a straight conversion):
#This should convert CSV to JSON
import json, os
import pandas as pd
import csv
df = pd.read_csv('dataanime.csv')
df.to_json(r'sassyg_data_Anime.json')
Then, I created my JSON to XML file:
#With help from instructor and CodeSpeedy
#https://www.codespeedy.com/how-to-convert-json-to-xml-using-python/
#Import libraries
import json as j
import xml.etree.ElementTree as et
#load in the json file
with open("sassyg_data_Anime.json") as json_file_format:
d = j.load(json_file_format)
#create the main container element for the entire XML file
r = et.Element("Work")
#creates the subelements for each part of the json file
et.SubElement(r,"Title").text = d["Title"]
et.SubElement(r,"Type").text = d["Type"]
et.SubElement(r,"Episodes").text = d["Episodes"]
et.SubElement(r,"Status").text = d["Status"]
et.SubElement(r,"Start airing").text = str(d["Start airing"])
et.SubElement(r,"End airing").text = str(d["End airing"])
et.SubElement(r,"Starting season").text = d["Starting season"]
et.SubElement(r,"Broadcast time").text = d["Broadcast time"]
et.SubElement(r,"Producers").text = d["Producers"]
et.SubElement(r,"Licensors").text = d["Licensors"]
et.SubElement(r,"Studios").text = d["Studios"]
et.SubElement(r,"Sources").text = d["Sources"]
et.SubElement(r,"Genres").text = d["Genres"]
et.SubElement(r,"Duration").text = str(d["Duration"])
et.SubElement(r,"Rating").text = d["Rating"]
et.SubElement(r,"Score").text = str(d["Score"])
et.SubElement(r,"Scored by").text = str(d["Scored by"])
et.SubElement(r,"Members").text = str(d["Members"])
et.SubElement(r,"Favorites").text = str(d["Favorites"])
et.SubElement(r,"Description").text = d["Description"]
#create the element tree/info for the write file
a = et.ElementTree(r)
#ERROR ERROR
#structure the output for xml via tostring rather than str
#Cannot write an ElementTree to file, errors out
#This was one solution I came up with, still errors out
a_xml_str = et.tostring(a)
print(a_xml_str)
#This might error out as well, I can't get the program to get to this point
#write file it should go to
outfile = open("json_to_xml.xml", 'w', encoding='utf-8')
outfile.write(a_xml_str)
outfile.close()
The error I get is:
Traceback (most recent call last):
File "F:\Data_Int_Final\Gardner_json_to_xml\convert_json_to_xml.py", line 44, in <module>
a_xml_str = et.tostring(a)
File "C:\Users\user\AppData\Local\Programs\Python\Python39\lib\xml\etree\ElementTree.py", line 1109, in tostring
ElementTree(element).write(stream, encoding,
File "C:\Users\user\AppData\Local\Programs\Python\Python39\lib\xml\etree\ElementTree.py", line 748, in write
serialize(write, self._root, qnames, namespaces,
File "C:\Users\user\AppData\Local\Programs\Python\Python39\lib\xml\etree\ElementTree.py", line 873, in _serialize_xml
tag = elem.tag
AttributeError: 'ElementTree' object has no attribute 'tag'
This is the latest version of the code I've tried. Can anyone see a solution?
Update:
I have two other ways to convert to the starting JSON file, would one of these be a better approach?
import json
import csv
def make_json(csvFilePath, jsonFilePath):
data = {}
with open(csvFilePath, encoding='utf-8') as csvf:
csvReader = csv.DictReader(csvf)
for rows in csvReader:
key = rows['Title']
data[key] = rows
with open(jsonFilePath, 'w', encoding='utf-8') as jsonf:
jsonf.write(json.dumps(data, indent=4))
csvFilePath = r'dataanime.csv'
jsonFilePath = r'dataAnime.json'
make_json(csvFilePath, jsonFilePath)
which errors out my XML conversion when I use this JSON file with it:
Traceback (most recent call last):
File "F:\Data_Int_Final\convert_json_to_xml.py", line 16, in <module>
et.SubElement(r,"Title").text = d["Title"]
KeyError: 'Title'
or:
import csv
import json
import time
def csv_to_json(csvFilePath, jsonFilePath):
jsonArray = []
#read csv file
with open(csvFilePath, encoding='utf-8') as csvf:
#load csv file data using csv library's dictionary reader
csvReader = csv.DictReader(csvf)
#convert each csv row into python dict
for row in csvReader:
#add this python dict to json array
jsonArray.append(row)
#convert python jsonArray to JSON String and write to file
with open(jsonFilePath, 'w', encoding='utf-8') as jsonf:
jsonString = json.dumps(jsonArray, indent=4)
jsonf.write(jsonString)
csvFilePath = r'dataanime.csv'
jsonFilePath = r'g_d_anime.json'
start = time.perf_counter()
csv_to_json(csvFilePath, jsonFilePath)
finish = time.perf_counter()
print(f"Conversion of all rows completed successfully in {finish - start:0.4f} seconds")
which errors out my XML conversion when I use this created JSON file with it:
Traceback (most recent call last):
File "F:\Data_Int_Final\convert_json_to_xml.py", line 16, in <module>
et.SubElement(r,"Title").text = d["Title"]
TypeError: list indices must be integers or slices, not str
It's simpler to work with the CSV file and generate a XML file from that directly.
Try something like this:
import csv
import xml.etree.ElementTree as et
root = et.Element('WorksXML')
tree = et.ElementTree(root)
with open("dataanime.csv", "r", encoding="utf-8") as fin:
reader = csv.DictReader(fin)
for row in reader:
r = et.SubElement(root, "Work")
# iterate over each of the fields and add to the XML element
for field in reader.fieldnames:
et.SubElement(r, field.replace(' ', '_')).text = row[field]
with open("csv_to_xml.xml", 'wb') as fout:
tree.write(fout, xml_declaration=True, encoding='utf-8')
This generates an XML file with each "work" as a separate sub-element under the root element.
<?xml version="1.0" encoding="utf-8"?>
<WorksXML>
<Work>
<Title>Fullmetal Alchemist: Brotherhood</Title>
<Type>TV</Type>
<Episodes>64</Episodes>
<Status>Finished Airing</Status>
<Start_airing>4/5/2009</Start_airing>
<End_airing>7/4/2010</End_airing>
<Starting_season>Spring</Starting_season>
...
For the CSV to JSON conversion, the first approach creates a dictionary with titles as keys and the second approach creates an array with each item an object with all the attributes.
If any of the works have a duplicate title then the first approach will overwrite the duplicate entries. If not then it's just a matter of how you want to access the data in the JSON file as a dictionary or a list. If you want to generate XML from the JSON file then the second approach with an array will be the better option.
To convert the array-based JSON file to XML then this will do the job.
import json
import xml.etree.ElementTree as ET
def json_to_xml(jsonFilePath, xmlFilePath):
root = ET.Element('WorksXML')
tree = ET.ElementTree(root)
with open(jsonFilePath, "r", encoding="utf-8") as fin:
jdata = json.load(fin)
for obj in jdata:
r = ET.SubElement(root, "Work")
for key, value in obj.items():
ET.SubElement(r, key.replace(' ', '_')).text = value
with open(xmlFilePath, 'wb') as fout:
tree.write(fout, xml_declaration=True, encoding='utf-8')
jsonFilePath = 'g_d_anime.json'
xmlFilePath = 'g_d_anime.xml'
json_to_xml(jsonFilePath, xmlFilePath)
I am trying to run through an excel file line by line and create a new list and then append every cell value on that line to the list. I don't think my code is correct but I just want to know why it cannot find the file, this is the error message.
def createPersonList(fileName):
open(fileName)
i = 0.0
for line in fileName:
i += 1
Person = []
for cell in line:
Person.append(cell)
return Person
error message:
createPersonList(personData.csv) Traceback (most recent call last):
File "<ipython-input-36-207031458d64>", line 1, in <module>
createPersonList(personData.csv) NameError: name 'personData' is not defined
I don't understand very well what you want, and also i don't know your structure of file.
But that's something similar with what you want:
import csv
def createPersonList(fileName):
personList = []
with open(fileName, 'r') as csv_file:
csv_reader = csv.reader(csv_file, delimiter='\t')
next(csv_reader, None)
for row in csv_reader:
for column in row:
personList.append(column)
return personList
I'm trying to read in a CSV of geohashes, to be converted to lat/longs. I'm using Python's Geohash module. Here's my code:
import Geohash
import csv
with open('C:\Users\Data\myData.csv') as csvfile:
readCSV = csv.reader(csvfile, delimiter=',')
for row in readCSV:
geoHash = row[0]
print(str(geoHash)) #This prints sucessfully
geoStr = str(geoHash)
Geohash.decode(geoStr) #This fails
The code only fails on the very last line, geoStr outputs each geohash successfully. The error I'm getting is:
Traceback (most recent call last):
File
"C:/Users/PycharmProjects/GeoHash/GeoHash_to_LatLong.py", line 11, in <module>
Geohash.decode(geoStr)
File "C:\Users\PycharmProjects\GeoHash\venv\lib\site-packages\Geohash\geohash.py", line 68, in decode
lat, lon, lat_err, lon_err = decode_exactly(geohash)
File "C:\Users\PycharmProjects\GeoHash\venv\lib\site-packages\Geohash\geohash.py", line 44, in decode_exactly
cd = __decodemap[c]
KeyError: 'o'
I've checked the CSV and there doesn't appear to be any odd characters or anything that would cause this to fail. Any thoughts?
Hi I'm trying to open simple csv file with the header from an external file:
got next file named: name.csv with next content:
Leo,Days,Ju
Tomas,Lee,Bruce
Max,Perez,Smith
If I code:
import csv
sep = ','
with open('name.csv') as csvfile:
fieldnames = ['name', 'paterno', 'materno']
reader = csv.DictReader(csvfile,fieldnames)
for row in reader:
list = (row['name'], \
row['materno'])
print (sep.join(list))
The result is desired like:
Leo,Ju
Tomas,Bruce
Max,Smith
But if got an extra file with headers named hdr_name.txt with:
['name', 'paterno', 'materno']
With this new code:
import csv
sep = ','
fieldnames = open('hdr_name.txt', 'r').read()
with open('name.csv') as csvfile:
print(fieldnames)
reader = csv.DictReader(csvfile,fieldnames)
for row in reader:
list = (row['name'], \
row['materno'])
print (sep.join(list))
Got as result:
Traceback (most recent call last):
File "<stdin>", line 5, in <module>
KeyError: 'name'
But if I ask for 'name' in fieldnames, is there!
>>> 'name' in fieldnames
True
>>>
What I'm doing wrong, with opening header from external file ?
fieldnames is a string that looks like this:
"['name', 'paterno', 'materno']"
Naturally, a membership test will return true, but that does not imply fieldnames is a list. Remember, file.read returns a string - you still need to cast it to a list.
This doesn't appear to look like JSON, so I'd recommend ast:
import ast
with open('hdr_name.txt', 'r') as f:
fieldnames = ast.literal_eval(f.read().strip())
I'm a QA tester who is new to python, trying to create a script to create multiple XML files from a CSV file containing various fields. I feel I am close to creating this program. Unfortunately,I have been getting the following error when adding code to advance to the next line in the CSV file(line = next(reader)).If I don't add the line to to advance, the program will run but multiple xml files will be created with information from only the first line of the CSV file. I can't figure out why or how to fix it.
Error Message:
Traceback (most recent call last):
File "C:\Users\xxxxxxxxx\Desktop\defxmlImportcsv.py", line 22, in <module>
line = next(reader)
ValueError: I/O operation on closed file.
Here is my code:
import xml.etree.ElementTree as etree
import csv
with open('datanames.csv') as csvfile:
reader = csv.reader(csvfile)
x=0
line = next(reader)
line = next(reader)
while x<2:
filename = "Output"+str(x)+".xml"
[firstName,lastName] = line
print(line)
tree = etree.parse('WB5655(BR-09).xml')
root = tree.getroot()
registration_id=tree.find('primaryApplicant/ssn')
registration_id.text = str(53)
first_name = tree.find('primaryApplicant/firstName')
first_name.text = (line[0])
last_name = tree.find('primaryApplicant/lastName')
last_name.text =(line[1])
line = next(reader)
tree.write(filename)
print(x)
x=x+1
Any help would be greatly appreciated. Thanks in advance.
csvfile is automatically closed when you exit your with block. Which means that reader, in turn, can no longer read from it, causing your line = next(reader) line to fail.
The easiest (and likely most correct) fix is to add indentation to your code so that your while loop is inside the with block.
You exited the with statement:
with open('datanames.csv') as csvfile:
reader = csv.reader(csvfile)
x=0
line = next(reader)
line = next(reader)
while x<2:
# ...
The moment the while line is reached the csvfile file object is closed, because, logically, that block is outside of the with statement (not matching the indentation).
The solution is to indent the whole while loop to be within the with block:
with open('datanames.csv') as csvfile:
reader = csv.reader(csvfile)
x=0
line = next(reader)
line = next(reader)
while x<2:
# ...
Rather than use while, use itertools.islice() to loop just twice:
from itertools import islice
tree = etree.parse('WB5655(BR-09).xml')
registration_id=tree.find('primaryApplicant/ssn')
registration_id.text = '53'
with open('datanames.csv') as csvfile:
reader = csv.reader(csvfile)
# skip two lines
next(islice(reader, 2, 2), None)
for x, row in enumerate(islice(reader, 2)):
filename = "Output{}.xml".format(x)
first_name = tree.find('primaryApplicant/firstName')
last_name = tree.find('primaryApplicant/lastName')
first_name.text, last_name.text = row
tree.write(filename)
I simplified your XML handling as well; you don't have to read the input XML tree twice, for example.