Python 3 Encoding Issues when converting to CSV from JSON - python

Python3
I have looked at the other solutions but they havent seem to have covered the situation I am. I have been charged with writing a script to take JSON and convert it to a CSV file.
I have a good chunk of this done but have encountered an issue when I write the data. The data I received does not match what was written. Below is an example. I am lost on how I can get this to preserve the encoding.
I should mention that the default encoding is UTF-8
Input: necesitará
Output: necesitará
import csv
import json
import sys
import sys
print(sys.getdefaultencoding())
stuff = open('data.json')
jsonStuff = json.loads(stuff.read(), encoding="utf-8")
with open('output.csv', 'w', newline='\n', encoding='utf-8') as csvfile:
writer = csv.writer(csvfile, delimiter=",",quotechar='"',quoting=csv.QUOTE_MINIMAL)
for element in jsonStuff:
row = ""
key = element['key']
values = element['valuesRow']
row = element['key']
# values[0]['value'], values[1]['value'], values[2]['value'], values[3]['value'],
writer.writerow([element['key'], values[3]['value']])

Remove encoding='utf-8' in open('output.csv', 'w', newline='\n', encoding='utf-8') should fix it.
data.json (utf-8): {"first": "necesitará", "second": "bodø"}
The following ...
import csv
import json
with open('data.json') as stuff, open('output.csv', 'w', newline='\n', encoding='utf-8') as csvfile:
jsonStuff = json.loads(stuff.read(), encoding="utf-8")
writer = csv.writer(csvfile, delimiter=",", quotechar='"', quoting=csv.QUOTE_MINIMAL)
first = jsonStuff['first']
second = jsonStuff['second']
writer.writerow([first, second])
... gives output.csv: necesitará,bodø
However ...
import csv
import json
with open('data.json') as stuff, open('output.csv', 'w', newline='\n') as csvfile:
jsonStuff = json.loads(stuff.read(), encoding="utf-8")
writer = csv.writer(csvfile, delimiter=",", quotechar='"', quoting=csv.QUOTE_MINIMAL)
first = jsonStuff['first']
second = jsonStuff['second']
writer.writerow([first, second])
... produces output.csv: necesitará,bodø
That said. There is no reason to use json.loads() when you have json.load(), and most of what you've defined are the defaults. I'd simply do ...
import csv
import json
with open('data.json') as jsonfile, open('output.csv', 'w') as csvfile:
json_data = json.load(jsonfile)
writer = csv.writer(csvfile, quoting=csv.QUOTE_MINIMAL)
first = json_data['first']
second = json_data['second']
writer.writerow([first, second])

Related

CSV adding values to an existing column and calling from a dictionary (no pandas)

I need to add some additional values to an existing column for my CSV file.
So this is what I have so far:
Sample input CSV file:
Alfa,Beta,Charlie,Delta,Echo,Foxtrot,Golf,Hotel,India,Juliett,Kilo
A1,B1,C1,D1,E1,F1,G1,H1,I1,J1,K1
A2,B2,C2,D2,E2,F2,G2,H2,I2,J2,K2
A3,B3,C3,D3,E3,F3,G3,H3,I3,J3,K3
A4,B4,C4,D4,E4,F4,G4,H4,I4,J4,K4
A5,B5,C5,D5,E1,F5,G5,H5,I5,J5,K5
A6,B6,C6,D6,E6,F6,G6,H6,I6,J6,K6
A7,B7,C7,D7,E7,F7,G7,H7,I7,J7,K7
A8,B8,C8,D8,E8,F8,G8,H8,I8,J8,K8
A9,B9,C9,D9,E9,F9,G9,H9,I9,J9,K9
This is what I have so far, I am thinking of converting row_out into a list and then inputing the values under India_New.
import csv
fieldnames_dict = {
'Beta': 'Beta_New',
'Echo': 'Echo_New',
'Foxtrot': 'Foxtrot_New_ALL',
'Hotel': 'Hotel_New',
'India': 'India_New',
'Charlie': 'Charlie_New'
}
with open("book1.csv", "r", encoding="utf-8", errors='ignore') as csv_in:
with open("xtest_file.csv", "w", encoding="utf-8", errors='ignore') as csv_out:
reader = csv.DictReader(csv_in, delimiter=',', quotechar='"')
writer = csv.DictWriter(csv_out, delimiter=',', quotechar='"',
fieldnames=list(fieldnames_dict.values()))
writer.writeheader()
additional_values = [';I_1;I_2']
new_row_out = []
for row_in in reader:
row_out = {new: row_in[old] for old, new in fieldnames_dict.items()}
row_out.items()
for row in row_out.items():
new_row_out.append(row_out.items())
writer.writerow(new_row_out)
What the output CSV should look like:
Beta_New,Echo_New,Foxtrot_New_ALL,Hotel_New,India_New,Charlie_New
B1,E1,F1,H1,I1;I_1;I_2,C1
B2,E2,F2,H2,I2;I_1;I_2,C2
B3,E3,F3,H3,I3;I_1;I_2,C3
B4,E4,F4,H4,I4;I_1;I_2,C4
B5,E5,F5,H5,I5;I_1;I_2,C5
B6,E6,F6,H6,I6;I_1;I_2,C6
B7,E7,F7,H7,I7;I_1;I_2,C7
B8,E8,F8,H8,I8;I_1;I_2,C8
B9,E9,F9,H9,I9;I_1;I_2,C9
Since writer is a DictWriter, the argument to writer.writerow() must be a dictionary, not a list like new_row_out.
Just append the string to the India_New item in the row_out dictionary before writing it.
with open("book1.csv", "r", encoding="utf-8", errors='ignore') as csv_in, open("xtest_file.csv", "w", encoding="utf-8", errors='ignore') as csv_out:
reader = csv.DictReader(csv_in, delimiter=',', quotechar='"')
writer = csv.DictWriter(csv_out, delimiter=',', quotechar='"',
fieldnames=list(fieldnames_dict.values()))
writer.writeheader()
additional_india_new = ';I_1;I_2'
for row_in in reader:
row_out = {new: row_in[old] for old, new in fieldnames_dict.items()}
row_out['India_New'] += additional_india_new
writer.writerow(row_out)

Why is my code not working while converting bulk csv to json?

There are two CSV files. I need to convert to JSON. Code is below
import csv
import json
import os
import glob
os.chdir(r'C:\Users\user\Desktop\test' )
result = glob.glob( '*.csv' )
print (result)
def make_json(csvFile, jsonFile):
csvFile, jsonFile = '',''
for i in result:
data = {}
with open(csvFile, encoding='utf-8') as csvf:
csvReader = csv.DictReader(csvf)
for rows in csvReader:
key = rows['id']
data[key] = rows
with open(jsonFile, 'w', encoding='utf-8') as jsonf:
jsonf.write(json.dumps(data, indent=4))
csvFilePath =f"{i}"
jsonFilePath =f"{i.split('.')[-2]}.json"
make_json(csvFile, jsonFile)
I got error > csvFile is not mentioned. But the third line from the end mentions the CSV file.
Disclaimer. Please find the error in the code. I already know of the working code which is in pandas
Below is the correct code, but I would recommend you learn to use the python debugger so you can resolve any logic flaws in your code next time. Documentation on the python debugger can be found here:
https://docs.python.org/3/library/pdb.html
Your code was structured in a way that meant for each csv file, you were not setting the file name until after you attempted to open it. The immediate error you saw was caused because you tried to call make_json() before you defined the values for csvFile and jsonFile.
I would recommend changing the code to:
import csv
import json
import glob
def make_json(csvList):
for csvFile in csvList:
data = {}
with open(csvFile, encoding='utf-8') as csvf:
csvReader = csv.DictReader(csvf)
for rows in csvReader:
key = rows['id']
data[key] = rows
jsonFile =f"{csvFile.split('.')[-2]}.json"
with open(jsonFile, 'w', encoding='utf-8') as jsonf:
jsonf.write(json.dumps(data, indent=4))
make_json(glob.glob('*.csv'))
You should try this
import csv, json, os, glob
os.chdir(r'C:\Users\user\Desktop\test' )
result = glob.glob( '*.csv' )
print(result)
def make_json():
for i in result:
with open(i, encoding='utf-8') as csvf:
data = [row for row in csv.DictReader(csvf)]
with open(f"{i.split('.')[-2]}.json", 'w', encoding='utf-8') as jsonf:
json.dump(data, jsonf)
make_json()
You did not initialize both the arguments of make_json() - (csvFilePath & jsonFilePath)

Open a csv file in 'rb' mode and iterate over it

I've seen an older answer for the post, Inline CSV File Editing with Python, about how to modify a csv file and save it. It uses the tempfile module. I have used the same code in Python 3.6 but I get an error because I read the file in binary mode and I cannot iterate over it.
The code below creates a simple CSV file
import csv
with open('proglanguages.csv', mode='w', newline='') as csv_file:
fieldnames = ['lang', 'value']
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
writer.writerow({'lang': 'Java', 'value': '90'})
writer.writerow({'lang': 'c', 'value': '80'})
writer.writerow({'lang': 'Perl', 'value': '78'})
writer.writerow({'lang': 'C++', 'value': '92'})
writer.writerow({'lang': 'Python', 'value': '0'})
writer.writerow({'lang': 'Fortran', 'value': '70'})
The code below modifies the previous generated CSV file,
from tempfile import NamedTemporaryFile
import shutil
import csv
filename = 'proglanguages.csv'
tempfile = NamedTemporaryFile(delete=False)
with open(filename, mode='rb') as csvFile, tempfile:
reader = csv.reader(csvFile, delimiter=',', quotechar='"')
writer = csv.writer(tempfile, delimiter=',', quotechar='"')
for row in reader:
if row[0] == 'Python':
row[1] = '100'
writer.writerow(row)
else:
writer.writerow(row)
shutil.move(tempfile.name, filename)
In which way, could I use the for-loop for iteration, modify the item and write in the tempfile
You are not lucky here. Answers from Martijn Pieters are always high quality ones. This one was but was targetted at Python 2 while you use Python 3. And the csv semantics have changed... You should no longer use binary mode in Python 3 and do what you did for the initial csv file:
...
tempfile = NamedTemporaryFile(delete=False, newline='', mode='w')
with open(filename, mode='r', newline='') as csvFile, tempfile:
reader = csv.reader(csvFile)
writer = csv.writer(tempfile)
...

Python csv register_dialect delimiter is not working

I have the written the code below to read in a large csv file with many variables and then just print 1 variable for every row in the outfile. It is working except that the delimiter is not being picked up.
import csv
fieldnames = ['tag']
outfile = open('ActiveTags.txt', 'w')
csv.register_dialect('me', delimiter=',', quotechar="'", quoting=csv.QUOTE_ALL, lineterminator='')
writer = csv.DictWriter(outfile, fieldnames=fieldnames, dialect='me')
with open('ActiveList_16.csv', 'r', newline='') as f:
reader = csv.DictReader(f)
for row in reader:
Tag = row['Tag']
writer.writerow({'tag': Tag})
outfile.close()
What am I missing here? I do not understand why the delimiter is not working on the outfile.

Download txt with ';' delimiter and convert to .CSV with python

I have to download this .txt file: Link
Then I have to parse it to .csv and remove all header.
I tried to do this, but it doesn't work for me, this is my code:
import urllib
import csv
outfilename = "temp.txt"
csvfile = "data.csv" #open('data.csv', 'wb')
url_of_file = "http://www.ceps.cz/_layouts/15/Ceps/_Pages/GraphData.aspx?mode=txt&from=1/1/2011%2012:00:00%20AM&to=1/2/2011%2011:59:59%20PM&hasinterval=True&sol=1&lang=ENG&agr=MI&fnc=AVG&ver=RT&"
urllib.request.urlretrieve(url_of_file, outfilename)
with open(outfilename, "rb") as infile, open(csvfile, 'wb') as outfile:
in_txt = csv.reader(infile, delimiter = ';')
out_csv = csv.writer(outfile)
out_csv.writerows(in_txt)
In this code I didn't do anything with header because even converting is not working yet.
You might use this code for Python 2:
import urllib
import csv
import urllib2
outfilename = "temp.txt"
csvfile = "data.csv" #open('data.csv', 'wb')
url_of_file = "http://www.ceps.cz/_layouts/15/Ceps/_Pages/GraphData.aspx?mode=txt&from=1/1/2011%2012:00:00%20AM&to=1/2/2011%2011:59:59%20PM&hasinterval=True&sol=1&lang=ENG&agr=MI&fnc=AVG&ver=RT&"
#urllib.request.urlretrieve(url_of_file, outfilename)
response = urllib2.urlopen(url_of_file)
output = open(outfilename,'wb')
output.write(response.read())
output.close()
with open(outfilename, "rb") as infile, open(csvfile, 'wb') as outfile:
in_txt = csv.reader(infile, delimiter = ';')
out_csv = csv.writer(outfile)
i = 0
for row in in_txt:
i +=1
if i>3:
out_csv.writerow(row)
Python 3:
import urllib.request
import csv
outfilename = "temp.txt"
csvfile = "data.csv"
url_of_file = "http://www.ceps.cz/_layouts/15/Ceps/_Pages/GraphData.aspx?mode=txt&from=1/1/2011%2012:00:00%20AM&to=1/2/2011%2011:59:59%20PM&hasinterval=True&sol=1&lang=ENG&agr=MI&fnc=AVG&ver=RT&"
urllib.request.urlretrieve(url_of_file, outfilename)
with open(outfilename, encoding='utf-8') as infile, open(csvfile, 'w', newline='') as outfile:
in_txt = csv.reader(infile, delimiter = ';')
out_csv = csv.writer(outfile)
i = 0
for row in in_txt:
i +=1
if i>3:
out_csv.writerow(row)
There is no request namespace in urllib.
Replace this line
urllib.request.urlretrieve(url_of_file, outfilename)
With this
urllib.urlretrieve(url_of_file, outfilename)
UPDATED:
You need to import urllib.request, not just urllib.
Also, you need to open the files in text mode, not binary mode (which is 'rb', or 'wb').
import urllib.request
import csv
outfilename = "temp.txt"
csvfile = "data.csv" #open('data.csv', 'wb')
url_of_file = "http://www.ceps.cz/_layouts/15/Ceps/_Pages/GraphData.aspx?mode=txt&from=1/1/2011%2012:00:00%20AM&to=1/2/2011%2011:59:59%20PM&hasinterval=True&sol=1&lang=ENG&agr=MI&fnc=AVG&ver=RT&"
urllib.request.urlretrieve(url_of_file, outfilename)
with open(outfilename, "r") as infile, open(csvfile, 'w') as outfile:
in_txt = csv.reader(infile, delimiter = ';')
out_csv = csv.writer(outfile)
out_csv.writerows(in_txt)

Categories