There are two CSV files. I need to convert to JSON. Code is below
import csv
import json
import os
import glob
os.chdir(r'C:\Users\user\Desktop\test' )
result = glob.glob( '*.csv' )
print (result)
def make_json(csvFile, jsonFile):
csvFile, jsonFile = '',''
for i in result:
data = {}
with open(csvFile, encoding='utf-8') as csvf:
csvReader = csv.DictReader(csvf)
for rows in csvReader:
key = rows['id']
data[key] = rows
with open(jsonFile, 'w', encoding='utf-8') as jsonf:
jsonf.write(json.dumps(data, indent=4))
csvFilePath =f"{i}"
jsonFilePath =f"{i.split('.')[-2]}.json"
make_json(csvFile, jsonFile)
I got error > csvFile is not mentioned. But the third line from the end mentions the CSV file.
Disclaimer. Please find the error in the code. I already know of the working code which is in pandas
Below is the correct code, but I would recommend you learn to use the python debugger so you can resolve any logic flaws in your code next time. Documentation on the python debugger can be found here:
https://docs.python.org/3/library/pdb.html
Your code was structured in a way that meant for each csv file, you were not setting the file name until after you attempted to open it. The immediate error you saw was caused because you tried to call make_json() before you defined the values for csvFile and jsonFile.
I would recommend changing the code to:
import csv
import json
import glob
def make_json(csvList):
for csvFile in csvList:
data = {}
with open(csvFile, encoding='utf-8') as csvf:
csvReader = csv.DictReader(csvf)
for rows in csvReader:
key = rows['id']
data[key] = rows
jsonFile =f"{csvFile.split('.')[-2]}.json"
with open(jsonFile, 'w', encoding='utf-8') as jsonf:
jsonf.write(json.dumps(data, indent=4))
make_json(glob.glob('*.csv'))
You should try this
import csv, json, os, glob
os.chdir(r'C:\Users\user\Desktop\test' )
result = glob.glob( '*.csv' )
print(result)
def make_json():
for i in result:
with open(i, encoding='utf-8') as csvf:
data = [row for row in csv.DictReader(csvf)]
with open(f"{i.split('.')[-2]}.json", 'w', encoding='utf-8') as jsonf:
json.dump(data, jsonf)
make_json()
You did not initialize both the arguments of make_json() - (csvFilePath & jsonFilePath)
I have an image folder with images saved with their ids. I would like to search the ids in the rows of a CSV file, then copy the rows into a new CSV file. Code as follows:
import os
import csv
folder = os.listdir("[image folder]")
file_in = "[csv containing info rows]"
file_out = "[new csv to create]"
with open(file_in, 'r', newline='') as f_input, open(file_out, 'w', newline='') as f_output:
writer = csv.writer(f_output)
reader = csv.reader(f_input)
writer.writerow(["ImageID", "LabelName", "XMin", "XMax", "YMin", "YMax", "IsGroupOf"]) #writes header of new csv
for filename in folder:
idx = os.path.splitext(filename)[0]
for row in reader:
if idx == row[0]:
print(row)
writer.writerow(row)
It outputs only the first matching id into the new CSV, instead of the actual number which is a few thousand. Sry for the simple task but I've been stumped for quite a while.
e.g.
Sample CSV:
ImageID,LabelName,XMin,XMax,YMin,YMax,IsGroupOf
0001eeaf4aed83f9,/m/0cmf2,0.022673031,0.9642005,0.07103825,0.80054647,0
00075905539074f2,/m/04yx4,0.020477816,0.32935154,0.0956023,0.665392,0
00075905539074f2,/m/04yx4,0.3208191,0.63993174,0,0.6596558,0
00075905539074f2,/m/04yx4,0.6757679,0.9914676,0.17208412,0.94837475,0
0007cebe1b2ba653,/m/07mhn,0.7359882,0.9262537,0.022123894,0.40265486,0
0007cebe1b2ba653,/m/0bt9lr,0.42035398,0.7935103,0.18141593,0.7212389,0
0007cebe1b2ba653,/m/01g317,0.7345133,0.9321534,0,0.36946902,0
0007d6cf88afaa4a,/m/0bt9lr,0.17342657,0.9020979,0.21678321,0.94172496,0
0008e425fb49a2bf,/m/0bt9lr,0.22610295,0.7150735,0.11170213,0.93439716,0
0009bad4d8539bb4,/m/0cmf2,0.2945508,0.70544916,0.34070796,0.5154867,0
3 sample images in folder: 0001eeaf4aed83f9.jpg, 0007cebe1b2ba653.jpg, 0009bad4d8539bb4.jpg
Expected output CSV:
ImageID,LabelName,XMin,XMax,YMin,YMax,IsGroupOf
0001eeaf4aed83f9,/m/0cmf2,0.022673031,0.9642005,0.07103825,0.80054647,0
0007cebe1b2ba653,/m/07mhn,0.7359882,0.9262537,0.022123894,0.40265486,0
0007cebe1b2ba653,/m/0bt9lr,0.42035398,0.7935103,0.18141593,0.7212389,0
0007cebe1b2ba653,/m/01g317,0.7345133,0.9321534,0,0.36946902,0
0009bad4d8539bb4,/m/0cmf2,0.2945508,0.70544916,0.34070796,0.5154867,0
You could use the following approach. Firstly, use glob.glob() to only get the jpg files. A set can be used to hold all of the filenames that are found (without their extensions).
Now you can just read the sample CSV file in a row at a time and use a simple in check to test if the ImageID is one of the file names in the set.
For example:
import glob
import csv
import os
files = {os.path.splitext(filename)[0] for filename in glob.glob("*.jpg")}
file_in = "sample.csv"
file_out = "output.csv"
with open(file_in, 'r', newline='') as f_input, open(file_out, 'w', newline='') as f_output:
csv_input = csv.reader(f_input)
header = next(csv_input)
csv_output = csv.writer(f_output)
csv_output.writerow(header)
for row in csv_input:
if row[0] in files: # Is ImageID one of the filenames found?
print(row)
csv_output.writerow(row)
This would give you an output.csv file as follows:
ImageID,LabelName,XMin,XMax,YMin,YMax,IsGroupOf
0001eeaf4aed83f9,/m/0cmf2,0.022673031,0.9642005,0.07103825,0.80054647,0
0007cebe1b2ba653,/m/07mhn,0.7359882,0.9262537,0.022123894,0.40265486,0
0007cebe1b2ba653,/m/0bt9lr,0.42035398,0.7935103,0.18141593,0.7212389,0
0007cebe1b2ba653,/m/01g317,0.7345133,0.9321534,0,0.36946902,0
0009bad4d8539bb4,/m/0cmf2,0.2945508,0.70544916,0.34070796,0.5154867,0
I am able to change the data to lowercase and remove all the punctuation but I have trouble saving the corrected data in CSV file.
import csv
import re
import os
input_file=raw_input("Name of the CSV file:")
output_file=raw_input("Output Name:")
reg_test=input_file
result = ''
with open(input_file,'r') as csvfile:
with open(output_file,'w') as csv_out_file:
filereader = csv.reader(csvfile)
filewriter =csv.writer(csv_out_file)
for row in filereader:
row = re.sub('[^A-Za-z0-9]+', '', str(row))
result += row + ','
lower = (result).lower()
csvfile.close()
csv_out_file.close()
You do not have to close the files, this is done automatically after the context of the with statement is over and you have to actually write something after you create the csv.writer, e.g. with writerow:
import csv
import re
input_file = 'in.csv'
output_file = 'out.csv'
with open(input_file, 'r') as csvfile, open(output_file, 'w') as csv_out_file:
filereader = csv.reader(csvfile)
filewriter = csv.writer(csv_out_file)
for row in filereader:
new_row = re.sub('[^A-Za-z0-9]+', '', str(row)) # manipulate the row
filewriter.writerow([new_row.lower()]) # write the new row to the out file
# the files are closed automatically after the context of the with statement is over
This saves the manipulated content of the first csv file to the second.
I have the written the code below to read in a large csv file with many variables and then just print 1 variable for every row in the outfile. It is working except that the delimiter is not being picked up.
import csv
fieldnames = ['tag']
outfile = open('ActiveTags.txt', 'w')
csv.register_dialect('me', delimiter=',', quotechar="'", quoting=csv.QUOTE_ALL, lineterminator='')
writer = csv.DictWriter(outfile, fieldnames=fieldnames, dialect='me')
with open('ActiveList_16.csv', 'r', newline='') as f:
reader = csv.DictReader(f)
for row in reader:
Tag = row['Tag']
writer.writerow({'tag': Tag})
outfile.close()
What am I missing here? I do not understand why the delimiter is not working on the outfile.
i'm new with python and try to modify csv file so i will able to delete specific rows with specific fields according to given list.
in my current code i get the rows which i want to delete but i can't delete it and save the changes on same file (replace).
import os, sys, glob
import time ,csv
# Open a file
path = 'C:\\Users\\tzahi.k\\Desktop\\netzer\\'
dirs = os.listdir( path )
fileslst = []
alertsCode = ("42001", "42003", "42006","51001" , "51002" ,"61001" ,"61002","71001",
"71002","71003","71004","71005","71006","72001","72002","72003","72004",
"82001","82002","82003","82004","82005","82006","82007","83001","84001")
# This would print the unnesscery codes
for file in dirs:
if "ALERTS" in file.upper() :
fileslst.append(file)
fileslst.sort()
with open(fileslst[-1], 'rb') as csvfile:
csvReader = csv.reader(csvfile)
for row in csvReader:
for alert in alertsCode:
if any(alert in row[2] for s in alertsCode) :
print row
any help?
Read all the rows into a list using a list comprehension and excluding the unwanted rows. Then rewrite the rows to the file in mode w (write mode) which overwrites or replaces the content of the file:
with open(fileslst[-1], 'rb') as csvfile:
csvReader = csv.reader(csvfile)
clean_rows = [row for row in csvReader if not any(alert in row[2] for alert in alertsCode)]
# csvfile.truncate()
with open(fileslst[-1], 'wb') as csvfile:
csv_writer = csv.writer(csvfile)
csv_writer.writerows(clean_rows)