Convert CSV file to JSON with python

Convert CSV file to JSON with python - python

I am trying to covert my CSV email list to a JSON format to mass email via API. This is my code thus far but am having trouble with the output. Nothing is outputting on my VS code editor.
import csv
import json
def make_json(csvFilePath, jsonFilePath):
data = {}
with open(csvFilePath, encoding='utf-8') as csvf:
csvReader = csv.DictReader(csvf)
for rows in csvReader:
key = rows['No']
data[key] = rows
with open(jsonFilePath, 'w', encoding='utf-8') as jsonf:
jsonf.write(json.dumps(data, indent=4))
csvFilePath = r'/data/csv-leads.csv'
jsonFilePath = r'Names.json'
make_json(csvFilePath, jsonFilePath)
Here is my desired JSON format
{
"EmailAddress": "hello#youngstowncoffeeseattle.com",
"Name": "Youngstown Coffee",
"ConsentToTrack": "Yes"
},
Heres my CSV list
No,EmailAddress,ConsentToTrack
Zylberschtein's Delicatessen & Bakery,catering#zylberschtein.com,Yes
Youngstown Coffee,hello#youngstowncoffeeseattle.com,Yes

It looks like you could use a csv.DictReader to make this easier.
If I have data.csv that looks like this:
Name,EmailAddress,ConsentToTrack
Zylberschtein's Delicatessen,catering#zylberschtein.com,yes
Youngstown Coffee,hello#youngstowncoffeeseattle.com,yes
I can convert it into JSON like this:
>>> import csv
>>> import json
>>> fd = open('data.csv')
>>> reader = csv.DictReader(fd)
>>> print(json.dumps(list(reader), indent=2))
[
{
"Name": "Zylberschtein's Delicatessen",
"EmailAddress": "catering#zylberschtein.com",
"ConsentToTrack": "yes"
},
{
"Name": "Youngstown Coffee",
"EmailAddress": "hello#youngstowncoffeeseattle.com",
"ConsentToTrack": "yes"
}
]
Here I've assumed the headers in the CSV can be used verbatim. I'll update this with an exmaple if you need to modify key names (e.g. convert "No" to "Name"),.
If you need to rename a column, it might look more like this:
import csv
import json
with open('data.csv') as fd:
reader = csv.DictReader(fd)
data = []
for row in reader:
row['Name'] = row.pop('No')
data.append(row)
print(json.dumps(data, indent=2))
Given this input:
No,EmailAddress,ConsentToTrack
Zylberschtein's Delicatessen,catering#zylberschtein.com,yes
Youngstown Coffee,hello#youngstowncoffeeseattle.com,yes
This will output:
[
{
"EmailAddress": "catering#zylberschtein.com",
"ConsentToTrack": "yes",
"Name": "Zylberschtein's Delicatessen"
},
{
"EmailAddress": "hello#youngstowncoffeeseattle.com",
"ConsentToTrack": "yes",
"Name": "Youngstown Coffee"
}
]
and to print on my editor is it simply print(json.dumps(list(reader), indent=2))?
I'm not really familiar with your editor; print is how you generate console output in Python.

Related

CSV to JSON converter (Grouping by same keys values)

I'm trying to convert csv format to JSON, I googled I'm not getting the correct way to modify it to get the desired one.
This is my code in python:
import csv
import json
def csv_to_json(csvFilePath, jsonFilePath):
jsonArray = []
#reading csv (encoding is important)
with open(csvFilePath, encoding='utf-8') as csvf:
#csv library function
csvReader = csv.DictReader(csvf)
#convert each csv row into python dictionary
for column in csvReader:
#add this python dictionary to json array
jsonArray.append(column)
#convertion
with open(jsonFilePath, 'w', encoding='utf-8') as jsonf:
jsonString = json.dumps(jsonArray, indent=4)
jsonf.write(jsonString)
csvFilePath='example.csv'
jsonFilePath='output.json'
csv_to_json(csvFilePath, jsonFilePath)
and this is my csv file format:
My actual JSON Output:
[
{
"Area": "IT",
"Employee": "Carl",
},
{
"Area": "IT",
"Employee": "Walter",
},
{
"Area": "Financial Resources",
"Employee": "Jennifer",
}
]
My desired JSON Output:
[
{
"Area": "IT",
"Employee": ["Carl","Walter"],
},
{
"Area": "Financial Resources",
"Employee": ["Jennifer"],
}
]
Thank you in advance!

Something like this should work.
def csv_to_json(csvFilePath, jsonFilePath):
areas = {}
with open(csvFilePath, encoding='utf-8') as csvf:
csvReader = csv.DictReader(csvf)
for column in csvReader:
area, employee = column["Area"], column["Employee"] # split values
if area in areas: # add all keys and values to one dictionary
areas[area].append(employee)
else:
areas[area] = [employee]
# convert dictionary to desired output format.
jsonArray = [{"Area": k, "Employee": v} for k,v in areas.items()]
with open(jsonFilePath, 'w', encoding='utf-8') as jsonf:
jsonString = json.dumps(jsonArray, indent=4)
jsonf.write(jsonString)

Converting excel spreadsheet to json

I want to convert an excel spreadsheet data to a JSON file. Here is the code I currently have:
Data
excel spreadsheet
Code
import xlrd
from collections import OrderedDict
import json
wb = xlrd.open_workbook('./file1.xlsx')
sh = wb.sheet_by_index(0)
data_list = []
for rownum in range(1, sh.nrows):
data = OrderedDict()
row_values = sh.row_values(rownum)
data['name'] = row_values[0]
data['description'] = row_values[1]
data_list.append(data)
data_list = {'columns': data_list}
j = json.dumps(data_list)
with open('seq1.json', 'w') as f:
f.write(j)
Output
{"columns": [{"name": "FILEID", "description": "FILETYPE"}]}
Expected output
{
"columns": [
{
"name": "fileid",
"description": "FILEID"
},
{
"name": "filetype",
"description": "FILETYPE"
},
{
"name": "stusab",
"description": "STUSAB"
},
{
"name": "chariter",
"description": "CHARITER"
},
{
"name": "sequence",
"description": "SEQUENCE"
},
{
"name": "logrecno",
"description": "LOGRECNO"
}
],
The "name" column should be displaying the first row while the "description" column should be displaying the second row.
What modification can I do in my function to get the output I am looking for?

You need to iterate over columns, not rows
import xlrd
from collections import OrderedDict
import json
wb = xlrd.open_workbook('./file1.xls')
sh = wb.sheet_by_index(0)
data_list = []
data = OrderedDict()
for colnum in range(0, sh.ncols):
data['name'] = sh.row_values(0)[colnum]
data['description'] = sh.row_values(1)[colnum]
data_list.append(data.copy())
data_list = {'columns': data_list}
j = json.dumps(data_list)
with open('seq1.json', 'w') as f:
f.write(j)

You should give a try to:
import excel2json
excel2json.convert_from_file('file.xlsx')

You can use pandas
import pandas as pd
df = pd.read_excel('./file1.xlsx')
with open('seq1.json', 'w') as f:
f.write(df.to_json())

Python CSV to JSON W/ Array Output

I'm trying to take data from a CSV and put it in a top-level array in JSON format.
Currently I am running this code:
import csv
import json
csvfile = open('music.csv', 'r')
jsonfile = open('file.json', 'w')
fieldnames = ("ID","Artist","Song", "Artist")
reader = csv.DictReader( csvfile, fieldnames)
for row in reader:
json.dump(row, jsonfile)
jsonfile.write('\n')
The CSV file is formatted as so:
| 1 | Empire of the Sun | We Are The People | Walking on a Dream |
| 2 | M83 | Steve McQueen | Hurry Up We're Dreaming |
Where = Column 1: ID | Column 2: Artist | Column 3: Song | Column 4: Album
And getting this output:
{"Song": "Empire of the Sun", "ID": "1", "Artist": "Walking on a Dream"}
{"Song": "M83", "ID": "2", "Artist": "Hurry Up We're Dreaming"}
I'm trying to get it to look like this though:
{
"Music": [
{
"id": 1,
"Artist": "Empire of the Sun",
"Name": "We are the People",
"Album": "Walking on a Dream"
},
{
"id": 2,
"Artist": "M83",
"Name": "Steve McQueen",
"Album": "Hurry Up We're Dreaming"
},
]
}

Pandas solves this really simply. First to read the file
import pandas
df = pandas.read_csv('music.csv', names=("id","Artist","Song", "Album"))
Now you have some options. The quickest way to get a proper json file out of this is simply
df.to_json('file.json', orient='records')
Output:
[{"id":1,"Artist":"Empire of the Sun","Song":"We Are The People","Album":"Walking on a Dream"},{"id":2,"Artist":"M83","Song":"Steve McQueen","Album":"Hurry Up We're Dreaming"}]
This doesn't handle the requirement that you want it all in a "Music" object or the order of the fields, but it does have the benefit of brevity.
To wrap the output in a Music object, we can use to_dict:
import json
with open('file.json', 'w') as f:
json.dump({'Music': df.to_dict(orient='records')}, f, indent=4)
Output:
{
"Music": [
{
"id": 1,
"Album": "Walking on a Dream",
"Artist": "Empire of the Sun",
"Song": "We Are The People"
},
{
"id": 2,
"Album": "Hurry Up We're Dreaming",
"Artist": "M83",
"Song": "Steve McQueen"
}
]
}
I would advise you to reconsider insisting on a particular order for the fields since the JSON specification clearly states "An object is an unordered set of name/value pairs" (emphasis mine).

Alright this is untested, but try the following:
import csv
import json
from collections import OrderedDict
fieldnames = ("ID","Artist","Song", "Artist")
entries = []
#the with statement is better since it handles closing your file properly after usage.
with open('music.csv', 'r') as csvfile:
#python's standard dict is not guaranteeing any order,
#but if you write into an OrderedDict, order of write operations will be kept in output.
reader = csv.DictReader(csvfile, fieldnames)
for row in reader:
entry = OrderedDict()
for field in fieldnames:
entry[field] = row[field]
entries.append(entry)
output = {
"Music": entries
}
with open('file.json', 'w') as jsonfile:
json.dump(output, jsonfile)
jsonfile.write('\n')

Your logic is in the wrong order. json is designed to convert a single object into JSON, recursively. So you should always be thinking in terms of building up a single object before calling dump or dumps.
First collect it into an array:
music = [r for r in reader]
Then put it in a dict:
result = {'Music': music}
Then dump to JSON:
json.dump(result, jsonfile)
Or all in one line:
json.dump({'Music': [r for r in reader]}, jsonfile)
"Ordered" JSON
If you really care about the order of object properties in the JSON (even though you shouldn't), you shouldn't use the DictReader. Instead, use the regular reader and create OrderedDicts yourself:
from collections import OrderedDict
...
reader = csv.Reader(csvfile)
music = [OrderedDict(zip(fieldnames, r)) for r in reader]
Or in a single line again:
json.dump({'Music': [OrderedDict(zip(fieldnames, r)) for r in reader]}, jsonfile)
Other
Also, use context managers for your files to ensure they're closed properly:
with open('music.csv', 'r') as csvfile, open('file.json', 'w') as jsonfile:
# Rest of your code inside this block

It didn't write to the JSON file in the order I would have liked
The csv.DictReader classes return Python dict objects. Python dictionaries are unordered collections. You have no control over their presentation order.
Python does provide an OrderedDict, which you can use if you avoid using csv.DictReader().
and it skipped the song name altogether.
This is because the file is not really a CSV file. In particular, each line begins and ends with the field separator. We can use .strip("|") to fix this.
I need all this data to be output into an array named "Music"
Then the program needs to create a dict with "Music" as a key.
I need it to have commas after each artist info. In the output I get I get
This problem is because you call json.dumps() multiple times. You should only call it once if you want a valid JSON file.
Try this:
import csv
import json
from collections import OrderedDict
def MyDictReader(fp, fieldnames):
fp = (x.strip().strip('|').strip() for x in fp)
reader = csv.reader(fp, delimiter="|")
reader = ([field.strip() for field in row] for row in reader)
dict_reader = (OrderedDict(zip(fieldnames, row)) for row in reader)
return dict_reader
csvfile = open('music.csv', 'r')
jsonfile = open('file.json', 'w')
fieldnames = ("ID","Artist","Song", "Album")
reader = MyDictReader(csvfile, fieldnames)
json.dump({"Music": list(reader)}, jsonfile, indent=2)

Convert CSV to restructured JSON in Python

I have a CSV File with following contents:
source: data.opennepal.net
District,Zone,Geographical Region,Development Region,Causalities,In Number
Sindhupalchok,Bagmati,Mountain,Central,Total No. of Houses,66688
Sindhupalchok,Bagmati,Mountain,Central,Total Population,287798
Sindhupalchok,Bagmati,Mountain,Central,Dead Male,1497
Sindhupalchok,Bagmati,Mountain,Central,Dead Female,1943
Kathmandu,Bagmati,Hill,Central,Total No. of Houses,436344
Kathmandu,Bagmati,Hill,Central,Total Population,1744240
Kathmandu,Bagmati,Hill,Central,Dead Male,621
Kathmandu,Bagmati,Hill,Central,Dead Female,600
My objective is to generate a JSON object like this from it:
{
"district":{
"Sindhupalchok":{
"Causalities":{
"Total No. of Houses":66688,
"Total Population":287798,
"Dead Male":1497,
"Dead Female":1943
},
"geoInfo":{
"Zone":"Bagmati",
"geography":"Mountain",
"Dev Region":"Central"
}
},
"Kathmandu":{
"Causalities":{
"Total No. of Houses":436344,
"Total Population":1744240,
"Dead Male":621,
"Dead Female":600
},
"geoInfo":{
"Zone":"Bagmati",
"geography":"Hill",
"Dev Region":"Central"
}
}
}
}
I've tried using csv.DictReader(csvfile, fieldnames) but it generates redundant nodes in JSON which is difficult to parse and unnecessarily lenghty.
I am using python 2.x
This is my attempt so far:
>>> csvData = open('data.csv','rb')
>>> fieldnames = ("district", "zone", "geographicalRegion", "developmentRegion", "causalities", "injuredNumber")
>>> reader = csv.DictReader(csvData, fieldnames)
>>> rawJson = json.dumps([ row for row in reader ])
rawJson isn't the one I've been seeking. It just maps the fieldnames with individual datasets.
So the question is: How can I create this JSON object without redundant nodes?

As glibdud mentions in the comments you need to loop over the data a bit more manually, so that you can create the desired JSON structure.
We read each line of the CSV data as a dict, and check if we've encountered a new district, and if so we create a new data dict for it, and insert a geoInfo dict into data. Then we can gather the casualty data from that line and the subsequent lines for that district. And once we've gathered all that data we can insert the data dict into the main all_data dict.
To test the code I put your .csv data into a file called 'qdata.csv'
import csv
import json
filename = 'qdata.csv'
fieldnames = ('district', 'Zone', 'geography',
'Dev Region', 'casualties', 'injured')
geo_keys = ('Zone', 'geography', 'Dev Region')
all_data = {}
with open(filename, 'rb') as csvfile:
reader = csv.DictReader(csvfile, fieldnames)
# skip header
next(reader)
current_district = None
for row in reader:
district = row['district']
if district != current_district:
current_district = district
data = all_data[district] = {}
casualties = data['Casualties'] = {}
data['geoInfo'] = dict((k, row[k]) for k in geo_keys)
casualties[row['casualties']] = row['injured']
print json.dumps(all_data, indent=4, sort_keys=True)
Output
{
"Kathmandu": {
"Casualties": {
"Dead Female": "600",
"Dead Male": "621",
"Total No. of Houses": "436344",
"Total Population": "1744240"
},
"geoInfo": {
"Dev Region": "Central",
"Zone": "Bagmati",
"geography": "Hill"
}
},
"Sindhupalchok": {
"Casualties": {
"Dead Female": "1943",
"Dead Male": "1497",
"Total No. of Houses": "66688",
"Total Population": "287798"
},
"geoInfo": {
"Dev Region": "Central",
"Zone": "Bagmati",
"geography": "Mountain"
}
}
}
This output isn't exactly what you've got in your question, but I think you should be able to take it from here. :)

csv to json in python

Hey so I have some hash ids in a csv file like
XbRPhe65YbC+xtgGQ8ukeZEr9xFOC4MEs9Z0wUidGSec=
XbRPhe65YbC+xtgGQ8uksrqSUJ/HhTPj1d2pL0/vuGrHM=
and I want to parse them into python wrap them in some additional code like
{"id" :"XbRPshe65YbC+xtGQ8ukqR2u2btfNeNe2gtcs72QbxPA=", "timestamp":"20150831"},
and then wrap all of that in some JSON syntax. This is then sent as a post request. Problem is I cannot seem to make it JSON readable. Everything seems to be ordered wrong and I am getting extra \.
import os
import pandas as pd
from pprint import pprint
df=pd.read_csv('test.csv',sep=',',header=None)
df[0] = '{"id" :"' + df[0].astype(str) + '", "timestamp":"20150831"}, '
df = df[:-1] # removes last comma
test = 'hello'
data =[ { "ids":[ df[0]],
"attributes":[
{
"name":"girl"
},
{
"name":"size"
}
]
}
]
json1 = data.to_json()
print(json1)

I agree that pandas doesn't seem to be the simplest tool for the job here. The built-in libraries will work great:
import csv
import json
with open('test.csv', newline='') as csvfile:
csvreader = csv.reader(csvfile)
data = {
"ids": [{"id": row[0], "timestamp": "20150831"} for row in csvreader],
"attributes": [
{"name": "girl"},
{"name": "size"}
]
}
json1 = json.dumps(data)
print(json1)

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Convert CSV file to JSON with python - python

Related

CSV to JSON converter (Grouping by same keys values)

Converting excel spreadsheet to json

Python CSV to JSON W/ Array Output

Convert CSV to restructured JSON in Python

csv to json in python

Categories

Resources