Python: convert csv file to json file - python

I am trying to convert a csv file to a json file by reading the content of the csv file and writing it to a new json file. I am encountering an error that is at the point where I try to make a column of the csv file into dictionary keys. How can I resolve this error?
My code for reference:
import csv
import json
def jsonformat(infile,outfile):
contents = {}
csvfile = open(infile, 'r')
reader = csvfile.read()
for m in reader:
key = m['Order ID']
contents[key] = m
jsonfile = open(outfile, 'w')
json_contents = json.dumps(contents, indent = 4)
jsonfile.write(json_contents)
csvfile.close()
jsonfile.close()
return json_contents
infile = 'orders.csv'
outfile = 'orders.json'
output = jsonformat(infile,outfile)
print(output)
error message:
TypeError Traceback (most recent call last)
in
28 outfile = 'orders.json'
29
---> 30 output = jsonformat(infile,outfile)
31
32 print(output)
in jsonformat(infile, outfile)
12
13 for m in reader:
---> 14 key = m['Order ID']
15 contents[key] = m
16
TypeError: string indices must be integers

You aren't reading the CSV file the correct way. Use csv.DictReader to read each row as a dictionary. Then, you'll be able to use for m in reader: key = m['Order ID'].
Change reader = csvfile.read() to reader = csv.DictReader(csvfile)
As of now, reader is a string that contains all the contents of your file. for m in reader makes m each character in this string, and you cannot access the "Order ID" key on a character.
After you make the change, reader will be a DictReader object, and iterating over it will return each row as a dictionary.

You can use csv.DictReader.
reader = csv.DictReader(csvfile)
for line in reader:
key = line['Order ID']
contents[key] = m

Related

CSV data matching: 'dict_keys' object is not subscriptable

I have put the together code to compare and match data between two CSVs and collate that data into a new CSV. The CSVs have similar data but the column names and their positions are different.
When I Run and Debug, it throws the following error at line 42:
Exception has occurred: TypeError
'dict_keys' object is not subscriptable
File "D:\Documents\Python\iris\accountmanagement2.py", line 42, in <module>
file1_columns[0]: file2_columns[1],
TypeError: 'dict_keys' object is not subscriptable
This is the code I have put together for this task. I tried using the type(data1[0].keys()) but that just says 'type' object is not subscriptable.
Really need some advice or ideas as I am completely stumped :)
import csv
# specify the file paths for the two CSV files
file1 = "D:\Documents\Python\iris\esr.csv"
file2 = "D:\Documents\Python\iris\iris.csv"
def define_columns(file_path):
with open(file_path, "r") as f:
reader = csv.reader(f)
# get the first row (column headers)
columns = next(reader)
return columns
# Define the keys of the two files
file1_keys = define_columns(file1)
file2_keys = define_columns(file2)
# read the data from the first CSV file
data1 = []
with open(file1, "r") as f:
reader = csv.DictReader(f)
for row in reader:
# Convert data to ASCII
for key in file1_keys:
row[key] = row[key].encode("ascii", "ignore").decode()
data1.append(row)
# read the data from the second CSV file
data2 = []
with open(file2, "r") as f:
reader = csv.DictReader(f)
for row in reader:
# Convert data to ASCII
for key in file2_keys:
row[key] = row[key].encode("ascii", "ignore").decode()
data2.append(row)
# Define the columns name of the two files
file1_columns = data1[0].keys()
file2_columns = data2[0].keys()
col_map = {
file1_columns[0]: file2_columns[1],
file1_columns[2]: file2_columns[5],
file1_columns[1]: file2_columns[6],
file1_columns[4]: file2_columns[7]
}
# match the data from the two files based on a specific column (e.g. "ID")
matching_data = []
non_matching_data1 = []
non_matching_data2 = []
for row1 in data1:
matched = False
for row2 in data2:
if row1[col_map[file1_columns[0]]] == row2[col_map[file2_columns[1]]]:
matching_data.append({**row1, **row2})
matched = True
break
if not matched:
non_matching_data1.append(row1)
for row2 in data2:
matched = False
for row1 in data1:
if row1[col_map[file1_columns[0]]] == row2[col_map[file2_columns[1]]]:
matched = True
break
if not matched:
non_matching_data2.append(row2)
# create a new CSV file with the matched data
with open("matched_data.csv", "w") as f:
writer = csv.DictWriter(f, fieldnames=list(col_map.keys())+list(col_map.values()))
writer.writeheader()
for row in matching_data:
writer.writerow(row)
# create a new CSV file with the non-matching data from file1
with open("non_matching_data1.csv", "w") as f:
writer = csv.DictWriter(f, fieldnames=list(data1[0].keys()))
writer.writeheader()
for row in non_matching_data1:
writer.writerow(row)
# create a new CSV file with the non-matching data from file2
with open("non_matching_data2.csv", "w") as f:
writer = csv.DictWriter(f, fieldnames=list(data2[0].keys()))
writer.writeheader()
for row in non_matching_data2:
writer.writerow(row)

Python script using json.load to compare two files and replace stringss

I have a JSON file like this: [{"ID": "12345", "Name":"John"}, {"ID":"45321", "Name":"Max"}...] called myclass.json. I used json.load library to get "ID" and "Name" values.
I have another .txt file with the content below. File name is list.txt:
Student,12345,Age 14
Student,45321,Age 15
.
.
.
I'm trying to create a script in python that compares the two files line by line and replace the student ID for the students name in list.txt file, so the new file would be:
Student,John,Age 14
Student,Max,Age 15
.
.
Any ideas?
My code so far:
import json
with open('/myclass.json') as f:
data = json.load(f)
for key in data:
x = key['Name']
z = key['ID']
with open('/myclass.json', 'r') as file1:
with open('/list.txt', 'r+') as file2:
for line in file2:
x = z
try this:
import json
import csv
with open('myclass.json') as f:
data = json.load(f)
with open('list.txt', 'r') as f:
reader = csv.reader(f)
rows = list(reader)
def get_name(id_):
for item in data:
if item['ID'] == id_:
return item["Name"]
with open('list.txt', 'w') as f:
writer = csv.writer(f)
for row in rows:
name = get_name(id_ = row[1])
if name:
row[1] = name
writer.writerows(rows)
Keep in mind that this script technically does not replace the items in the list.txt file one by one, but instead reads the entire file in and then overwrites the list.txt file entirely and constructs it from scratch. I suggest making a back up of list.txt or naming the new txt file something different incase the program crashes from some unexpected input.
One option is individually open each file for each mode while appending a list for matched ID values among those two files as
import json
with open('myclass.json','r') as f_in:
data = json.load(f_in)
j=0
lis=[]
with open('list.txt', 'r') as f_in:
for line in f_in:
if data[j]['ID']==line.split(',')[1]:
s = line.replace(line.split(',')[1],data[j]['Name'])
lis.append(s)
j+=1
with open('list.txt', 'w') as f_out:
for i in lis:
f_out.write(i)

How to resolve KeyError: <variable> in Python?

Hi I'm trying to open simple csv file with the header from an external file:
got next file named: name.csv with next content:
Leo,Days,Ju
Tomas,Lee,Bruce
Max,Perez,Smith
If I code:
import csv
sep = ','
with open('name.csv') as csvfile:
fieldnames = ['name', 'paterno', 'materno']
reader = csv.DictReader(csvfile,fieldnames)
for row in reader:
list = (row['name'], \
row['materno'])
print (sep.join(list))
The result is desired like:
Leo,Ju
Tomas,Bruce
Max,Smith
But if got an extra file with headers named hdr_name.txt with:
['name', 'paterno', 'materno']
With this new code:
import csv
sep = ','
fieldnames = open('hdr_name.txt', 'r').read()
with open('name.csv') as csvfile:
print(fieldnames)
reader = csv.DictReader(csvfile,fieldnames)
for row in reader:
list = (row['name'], \
row['materno'])
print (sep.join(list))
Got as result:
Traceback (most recent call last):
File "<stdin>", line 5, in <module>
KeyError: 'name'
But if I ask for 'name' in fieldnames, is there!
>>> 'name' in fieldnames
True
>>>
What I'm doing wrong, with opening header from external file ?
fieldnames is a string that looks like this:
"['name', 'paterno', 'materno']"
Naturally, a membership test will return true, but that does not imply fieldnames is a list. Remember, file.read returns a string - you still need to cast it to a list.
This doesn't appear to look like JSON, so I'd recommend ast:
import ast
with open('hdr_name.txt', 'r') as f:
fieldnames = ast.literal_eval(f.read().strip())

Python file matching and appending

This is one file result.csv:
M11251TH1230
M11543TH4292
M11435TDS144
This is another file sample.csv:
M11435TDS144,STB#1,Router#1
M11543TH4292,STB#2,Router#1
M11509TD9937,STB#3,Router#1
M11543TH4258,STB#4,Router#1
Can I write a Python program to compare both the files and if line in result.csv matches with the first word in the line in sample.csv, then append 1 else append 0 at every line in sample.csv?
import pandas as pd
d1 = pd.read_csv("1.csv",names=["Type"])
d2 = pd.read_csv("2.csv",names=["Type","Col2","Col3"])
d2["Index"] = 0
for x in d1["Type"] :
d2["Index"][d2["Type"] == x] = 1
d2.to_csv("3.csv",header=False)
Considering "1.csv" and "2.csv" are your csv input files and "3.csv" is the result you needed
The solution using csv.reader and csv.writer (csv module):
import csv
newLines = []
# change the file path to the actual one
with open('./data/result.csv', newline='\n') as csvfile:
data = csv.reader(csvfile)
items = [''.join(line) for line in data]
with open('./data/sample.csv', newline='\n') as csvfile:
data = list(csv.reader(csvfile))
for line in data:
line.append(1 if line[0] in items else 0)
newLines.append(line)
with open('./data/sample.csv', 'w', newline='\n') as csvfile:
writer = csv.writer(csvfile)
writer.writerows(newLines)
The sample.csv contents:
M11435TDS144,STB#1,Router#1,1
M11543TH4292,STB#2,Router#1,1
M11509TD9937,STB#3,Router#1,0
M11543TH4258,STB#4,Router#1,0
With only one column, I wonder why you made it as a result.csv. If it is not going to have any more columns, a simple file read operation would suffice. Along with converting the data from result.csv to dictionary will help in quick run as well.
result_file = "result.csv"
sample_file = "sample.csv"
with open(result_file) as fp:
result_data = fp.read()
result_dict = dict.fromkeys(result_data.split("\n"))
"""
You can change the above logic, in case you have very few fields on csv like this:
result_data = fp.readlines()
result_dict = {}
for result in result_data:
key, other_field = result.split(",", 1)
result_dict[key] = other_field.strip()
"""
#Since sample.csv is a real csv, using csv reader and writer
with open(sample_file, "rb") as fp:
sample_data = csv.reader(fp)
output_data = []
for data in sample_data:
output_data.append("%s,%d" % (data, data[0] in result_dict))
with open(sample_file, "wb") as fp:
data_writer = csv.writer(fp)
data_writer.writerows(output_data)
The following snippet of code will work for you
import csv
with open('result.csv', 'rb') as f:
reader = csv.reader(f)
result_list = []
for row in reader:
result_list.extend(row)
with open('sample.csv', 'rb') as f:
reader = csv.reader(f)
sample_list = []
for row in reader:
if row[0] in result_list:
sample_list.append(row + [1])
else:
sample_list.append(row + [0]
with open('sample.csv', 'wb') as f:
writer = csv.writer(f)
writer.writerows(sample_list)

How to not just add a new first column to csv but alter the header names

I would like to do the following
read a csv file, Add a new first column, then rename some of the columns
then load the records from csv file.
Ultimately, I would like the first column to be populated with the file
name.
I'm fairly new to Python and I've kind of worked out how to change the fieldnames however, loading the data is a problem as it's looking for the original fieldnames which no longer match.
Code snippet
import csv
import os
inputFileName = "manifest1.csv"
outputFileName = os.path.splitext(inputFileName)[0] + "_modified.csv"
with open(inputFileName, 'rb') as inFile, open(outputFileName, 'wb') as outfile:
r = csv.DictReader(inFile)
fieldnames = ['MapSvcName','ClientHostName', 'Databasetype', 'ID_A', 'KeepExistingData', 'KeepExistingMapCache', 'Name', 'OnPremisePath', 'Resourcestype']
w = csv.DictWriter(outfile,fieldnames)
w.writeheader()
*** Here is where I start to go wrong
# copy the rest
for node, row in enumerate(r,1):
w.writerow(dict(row))
Error
File "D:\Apps\Python27\ArcGIS10.3\lib\csv.py", line 148, in _dict_to_list
+ ", ".join([repr(x) for x in wrong_fields]))
ValueError: dict contains fields not in fieldnames: 'Databases [xsi:type]', 'Resources [xsi:type]', 'ID'
Would like to some assistance to not just learn but truly understand what I need to do.
Cheers and thanks
Peter
Update..
I think I've worked it out
import csv
import os
inputFileName = "manifest1.csv"
outputFileName = os.path.splitext(inputFileName)[0] + "_modified.csv"
with open(inputFileName, 'rb') as inFile, open(outputFileName, 'wb') as outfile:
r = csv.reader(inFile)
w = csv.writer(outfile)
header = next(r)
header.insert(0, 'MapSvcName')
#w.writerow(header)
next(r, None) # skip the first row from the reader, the old header
# write new header
w.writerow(['MapSvcName','ClientHostName', 'Databasetype', 'ID_A', 'KeepExistingData', 'KeepExistingMapCache', 'Name', 'OnPremisePath', 'Resourcestype'])
prevRow = next(r)
prevRow.insert(0, '0')
w.writerow(prevRow)
for row in r:
if prevRow[-1] == row[-1]:
val = '0'
else:
val = prevRow[-1]
row.insert(0,val)
prevRow = row
w.writerow(row)

Categories