I need to add some additional values to an existing column for my CSV file.
So this is what I have so far:
Sample input CSV file:
Alfa,Beta,Charlie,Delta,Echo,Foxtrot,Golf,Hotel,India,Juliett,Kilo
A1,B1,C1,D1,E1,F1,G1,H1,I1,J1,K1
A2,B2,C2,D2,E2,F2,G2,H2,I2,J2,K2
A3,B3,C3,D3,E3,F3,G3,H3,I3,J3,K3
A4,B4,C4,D4,E4,F4,G4,H4,I4,J4,K4
A5,B5,C5,D5,E1,F5,G5,H5,I5,J5,K5
A6,B6,C6,D6,E6,F6,G6,H6,I6,J6,K6
A7,B7,C7,D7,E7,F7,G7,H7,I7,J7,K7
A8,B8,C8,D8,E8,F8,G8,H8,I8,J8,K8
A9,B9,C9,D9,E9,F9,G9,H9,I9,J9,K9
This is what I have so far, I am thinking of converting row_out into a list and then inputing the values under India_New.
import csv
fieldnames_dict = {
'Beta': 'Beta_New',
'Echo': 'Echo_New',
'Foxtrot': 'Foxtrot_New_ALL',
'Hotel': 'Hotel_New',
'India': 'India_New',
'Charlie': 'Charlie_New'
}
with open("book1.csv", "r", encoding="utf-8", errors='ignore') as csv_in:
with open("xtest_file.csv", "w", encoding="utf-8", errors='ignore') as csv_out:
reader = csv.DictReader(csv_in, delimiter=',', quotechar='"')
writer = csv.DictWriter(csv_out, delimiter=',', quotechar='"',
fieldnames=list(fieldnames_dict.values()))
writer.writeheader()
additional_values = [';I_1;I_2']
new_row_out = []
for row_in in reader:
row_out = {new: row_in[old] for old, new in fieldnames_dict.items()}
row_out.items()
for row in row_out.items():
new_row_out.append(row_out.items())
writer.writerow(new_row_out)
What the output CSV should look like:
Beta_New,Echo_New,Foxtrot_New_ALL,Hotel_New,India_New,Charlie_New
B1,E1,F1,H1,I1;I_1;I_2,C1
B2,E2,F2,H2,I2;I_1;I_2,C2
B3,E3,F3,H3,I3;I_1;I_2,C3
B4,E4,F4,H4,I4;I_1;I_2,C4
B5,E5,F5,H5,I5;I_1;I_2,C5
B6,E6,F6,H6,I6;I_1;I_2,C6
B7,E7,F7,H7,I7;I_1;I_2,C7
B8,E8,F8,H8,I8;I_1;I_2,C8
B9,E9,F9,H9,I9;I_1;I_2,C9
Since writer is a DictWriter, the argument to writer.writerow() must be a dictionary, not a list like new_row_out.
Just append the string to the India_New item in the row_out dictionary before writing it.
with open("book1.csv", "r", encoding="utf-8", errors='ignore') as csv_in, open("xtest_file.csv", "w", encoding="utf-8", errors='ignore') as csv_out:
reader = csv.DictReader(csv_in, delimiter=',', quotechar='"')
writer = csv.DictWriter(csv_out, delimiter=',', quotechar='"',
fieldnames=list(fieldnames_dict.values()))
writer.writeheader()
additional_india_new = ';I_1;I_2'
for row_in in reader:
row_out = {new: row_in[old] for old, new in fieldnames_dict.items()}
row_out['India_New'] += additional_india_new
writer.writerow(row_out)
There are two CSV files. I need to convert to JSON. Code is below
import csv
import json
import os
import glob
os.chdir(r'C:\Users\user\Desktop\test' )
result = glob.glob( '*.csv' )
print (result)
def make_json(csvFile, jsonFile):
csvFile, jsonFile = '',''
for i in result:
data = {}
with open(csvFile, encoding='utf-8') as csvf:
csvReader = csv.DictReader(csvf)
for rows in csvReader:
key = rows['id']
data[key] = rows
with open(jsonFile, 'w', encoding='utf-8') as jsonf:
jsonf.write(json.dumps(data, indent=4))
csvFilePath =f"{i}"
jsonFilePath =f"{i.split('.')[-2]}.json"
make_json(csvFile, jsonFile)
I got error > csvFile is not mentioned. But the third line from the end mentions the CSV file.
Disclaimer. Please find the error in the code. I already know of the working code which is in pandas
Below is the correct code, but I would recommend you learn to use the python debugger so you can resolve any logic flaws in your code next time. Documentation on the python debugger can be found here:
https://docs.python.org/3/library/pdb.html
Your code was structured in a way that meant for each csv file, you were not setting the file name until after you attempted to open it. The immediate error you saw was caused because you tried to call make_json() before you defined the values for csvFile and jsonFile.
I would recommend changing the code to:
import csv
import json
import glob
def make_json(csvList):
for csvFile in csvList:
data = {}
with open(csvFile, encoding='utf-8') as csvf:
csvReader = csv.DictReader(csvf)
for rows in csvReader:
key = rows['id']
data[key] = rows
jsonFile =f"{csvFile.split('.')[-2]}.json"
with open(jsonFile, 'w', encoding='utf-8') as jsonf:
jsonf.write(json.dumps(data, indent=4))
make_json(glob.glob('*.csv'))
You should try this
import csv, json, os, glob
os.chdir(r'C:\Users\user\Desktop\test' )
result = glob.glob( '*.csv' )
print(result)
def make_json():
for i in result:
with open(i, encoding='utf-8') as csvf:
data = [row for row in csv.DictReader(csvf)]
with open(f"{i.split('.')[-2]}.json", 'w', encoding='utf-8') as jsonf:
json.dump(data, jsonf)
make_json()
You did not initialize both the arguments of make_json() - (csvFilePath & jsonFilePath)
I've seen an older answer for the post, Inline CSV File Editing with Python, about how to modify a csv file and save it. It uses the tempfile module. I have used the same code in Python 3.6 but I get an error because I read the file in binary mode and I cannot iterate over it.
The code below creates a simple CSV file
import csv
with open('proglanguages.csv', mode='w', newline='') as csv_file:
fieldnames = ['lang', 'value']
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
writer.writerow({'lang': 'Java', 'value': '90'})
writer.writerow({'lang': 'c', 'value': '80'})
writer.writerow({'lang': 'Perl', 'value': '78'})
writer.writerow({'lang': 'C++', 'value': '92'})
writer.writerow({'lang': 'Python', 'value': '0'})
writer.writerow({'lang': 'Fortran', 'value': '70'})
The code below modifies the previous generated CSV file,
from tempfile import NamedTemporaryFile
import shutil
import csv
filename = 'proglanguages.csv'
tempfile = NamedTemporaryFile(delete=False)
with open(filename, mode='rb') as csvFile, tempfile:
reader = csv.reader(csvFile, delimiter=',', quotechar='"')
writer = csv.writer(tempfile, delimiter=',', quotechar='"')
for row in reader:
if row[0] == 'Python':
row[1] = '100'
writer.writerow(row)
else:
writer.writerow(row)
shutil.move(tempfile.name, filename)
In which way, could I use the for-loop for iteration, modify the item and write in the tempfile
You are not lucky here. Answers from Martijn Pieters are always high quality ones. This one was but was targetted at Python 2 while you use Python 3. And the csv semantics have changed... You should no longer use binary mode in Python 3 and do what you did for the initial csv file:
...
tempfile = NamedTemporaryFile(delete=False, newline='', mode='w')
with open(filename, mode='r', newline='') as csvFile, tempfile:
reader = csv.reader(csvFile)
writer = csv.writer(tempfile)
...
I have to download this .txt file: Link
Then I have to parse it to .csv and remove all header.
I tried to do this, but it doesn't work for me, this is my code:
import urllib
import csv
outfilename = "temp.txt"
csvfile = "data.csv" #open('data.csv', 'wb')
url_of_file = "http://www.ceps.cz/_layouts/15/Ceps/_Pages/GraphData.aspx?mode=txt&from=1/1/2011%2012:00:00%20AM&to=1/2/2011%2011:59:59%20PM&hasinterval=True&sol=1&lang=ENG&agr=MI&fnc=AVG&ver=RT&"
urllib.request.urlretrieve(url_of_file, outfilename)
with open(outfilename, "rb") as infile, open(csvfile, 'wb') as outfile:
in_txt = csv.reader(infile, delimiter = ';')
out_csv = csv.writer(outfile)
out_csv.writerows(in_txt)
In this code I didn't do anything with header because even converting is not working yet.
You might use this code for Python 2:
import urllib
import csv
import urllib2
outfilename = "temp.txt"
csvfile = "data.csv" #open('data.csv', 'wb')
url_of_file = "http://www.ceps.cz/_layouts/15/Ceps/_Pages/GraphData.aspx?mode=txt&from=1/1/2011%2012:00:00%20AM&to=1/2/2011%2011:59:59%20PM&hasinterval=True&sol=1&lang=ENG&agr=MI&fnc=AVG&ver=RT&"
#urllib.request.urlretrieve(url_of_file, outfilename)
response = urllib2.urlopen(url_of_file)
output = open(outfilename,'wb')
output.write(response.read())
output.close()
with open(outfilename, "rb") as infile, open(csvfile, 'wb') as outfile:
in_txt = csv.reader(infile, delimiter = ';')
out_csv = csv.writer(outfile)
i = 0
for row in in_txt:
i +=1
if i>3:
out_csv.writerow(row)
Python 3:
import urllib.request
import csv
outfilename = "temp.txt"
csvfile = "data.csv"
url_of_file = "http://www.ceps.cz/_layouts/15/Ceps/_Pages/GraphData.aspx?mode=txt&from=1/1/2011%2012:00:00%20AM&to=1/2/2011%2011:59:59%20PM&hasinterval=True&sol=1&lang=ENG&agr=MI&fnc=AVG&ver=RT&"
urllib.request.urlretrieve(url_of_file, outfilename)
with open(outfilename, encoding='utf-8') as infile, open(csvfile, 'w', newline='') as outfile:
in_txt = csv.reader(infile, delimiter = ';')
out_csv = csv.writer(outfile)
i = 0
for row in in_txt:
i +=1
if i>3:
out_csv.writerow(row)
There is no request namespace in urllib.
Replace this line
urllib.request.urlretrieve(url_of_file, outfilename)
With this
urllib.urlretrieve(url_of_file, outfilename)
UPDATED:
You need to import urllib.request, not just urllib.
Also, you need to open the files in text mode, not binary mode (which is 'rb', or 'wb').
import urllib.request
import csv
outfilename = "temp.txt"
csvfile = "data.csv" #open('data.csv', 'wb')
url_of_file = "http://www.ceps.cz/_layouts/15/Ceps/_Pages/GraphData.aspx?mode=txt&from=1/1/2011%2012:00:00%20AM&to=1/2/2011%2011:59:59%20PM&hasinterval=True&sol=1&lang=ENG&agr=MI&fnc=AVG&ver=RT&"
urllib.request.urlretrieve(url_of_file, outfilename)
with open(outfilename, "r") as infile, open(csvfile, 'w') as outfile:
in_txt = csv.reader(infile, delimiter = ';')
out_csv = csv.writer(outfile)
out_csv.writerows(in_txt)