CSV to JSON converter (Grouping by same keys values)

CSV to JSON converter (Grouping by same keys values) - python

I'm trying to convert csv format to JSON, I googled I'm not getting the correct way to modify it to get the desired one.
This is my code in python:
import csv
import json
def csv_to_json(csvFilePath, jsonFilePath):
jsonArray = []
#reading csv (encoding is important)
with open(csvFilePath, encoding='utf-8') as csvf:
#csv library function
csvReader = csv.DictReader(csvf)
#convert each csv row into python dictionary
for column in csvReader:
#add this python dictionary to json array
jsonArray.append(column)
#convertion
with open(jsonFilePath, 'w', encoding='utf-8') as jsonf:
jsonString = json.dumps(jsonArray, indent=4)
jsonf.write(jsonString)
csvFilePath='example.csv'
jsonFilePath='output.json'
csv_to_json(csvFilePath, jsonFilePath)
and this is my csv file format:
My actual JSON Output:
[
{
"Area": "IT",
"Employee": "Carl",
},
{
"Area": "IT",
"Employee": "Walter",
},
{
"Area": "Financial Resources",
"Employee": "Jennifer",
}
]
My desired JSON Output:
[
{
"Area": "IT",
"Employee": ["Carl","Walter"],
},
{
"Area": "Financial Resources",
"Employee": ["Jennifer"],
}
]
Thank you in advance!

Something like this should work.
def csv_to_json(csvFilePath, jsonFilePath):
areas = {}
with open(csvFilePath, encoding='utf-8') as csvf:
csvReader = csv.DictReader(csvf)
for column in csvReader:
area, employee = column["Area"], column["Employee"] # split values
if area in areas: # add all keys and values to one dictionary
areas[area].append(employee)
else:
areas[area] = [employee]
# convert dictionary to desired output format.
jsonArray = [{"Area": k, "Employee": v} for k,v in areas.items()]
with open(jsonFilePath, 'w', encoding='utf-8') as jsonf:
jsonString = json.dumps(jsonArray, indent=4)
jsonf.write(jsonString)

Related

Convert CSV file to JSON with python

I am trying to covert my CSV email list to a JSON format to mass email via API. This is my code thus far but am having trouble with the output. Nothing is outputting on my VS code editor.
import csv
import json
def make_json(csvFilePath, jsonFilePath):
data = {}
with open(csvFilePath, encoding='utf-8') as csvf:
csvReader = csv.DictReader(csvf)
for rows in csvReader:
key = rows['No']
data[key] = rows
with open(jsonFilePath, 'w', encoding='utf-8') as jsonf:
jsonf.write(json.dumps(data, indent=4))
csvFilePath = r'/data/csv-leads.csv'
jsonFilePath = r'Names.json'
make_json(csvFilePath, jsonFilePath)
Here is my desired JSON format
{
"EmailAddress": "hello#youngstowncoffeeseattle.com",
"Name": "Youngstown Coffee",
"ConsentToTrack": "Yes"
},
Heres my CSV list
No,EmailAddress,ConsentToTrack
Zylberschtein's Delicatessen & Bakery,catering#zylberschtein.com,Yes
Youngstown Coffee,hello#youngstowncoffeeseattle.com,Yes

It looks like you could use a csv.DictReader to make this easier.
If I have data.csv that looks like this:
Name,EmailAddress,ConsentToTrack
Zylberschtein's Delicatessen,catering#zylberschtein.com,yes
Youngstown Coffee,hello#youngstowncoffeeseattle.com,yes
I can convert it into JSON like this:
>>> import csv
>>> import json
>>> fd = open('data.csv')
>>> reader = csv.DictReader(fd)
>>> print(json.dumps(list(reader), indent=2))
[
{
"Name": "Zylberschtein's Delicatessen",
"EmailAddress": "catering#zylberschtein.com",
"ConsentToTrack": "yes"
},
{
"Name": "Youngstown Coffee",
"EmailAddress": "hello#youngstowncoffeeseattle.com",
"ConsentToTrack": "yes"
}
]
Here I've assumed the headers in the CSV can be used verbatim. I'll update this with an exmaple if you need to modify key names (e.g. convert "No" to "Name"),.
If you need to rename a column, it might look more like this:
import csv
import json
with open('data.csv') as fd:
reader = csv.DictReader(fd)
data = []
for row in reader:
row['Name'] = row.pop('No')
data.append(row)
print(json.dumps(data, indent=2))
Given this input:
No,EmailAddress,ConsentToTrack
Zylberschtein's Delicatessen,catering#zylberschtein.com,yes
Youngstown Coffee,hello#youngstowncoffeeseattle.com,yes
This will output:
[
{
"EmailAddress": "catering#zylberschtein.com",
"ConsentToTrack": "yes",
"Name": "Zylberschtein's Delicatessen"
},
{
"EmailAddress": "hello#youngstowncoffeeseattle.com",
"ConsentToTrack": "yes",
"Name": "Youngstown Coffee"
}
]
and to print on my editor is it simply print(json.dumps(list(reader), indent=2))?
I'm not really familiar with your editor; print is how you generate console output in Python.

Converting excel spreadsheet to json

I want to convert an excel spreadsheet data to a JSON file. Here is the code I currently have:
Data
excel spreadsheet
Code
import xlrd
from collections import OrderedDict
import json
wb = xlrd.open_workbook('./file1.xlsx')
sh = wb.sheet_by_index(0)
data_list = []
for rownum in range(1, sh.nrows):
data = OrderedDict()
row_values = sh.row_values(rownum)
data['name'] = row_values[0]
data['description'] = row_values[1]
data_list.append(data)
data_list = {'columns': data_list}
j = json.dumps(data_list)
with open('seq1.json', 'w') as f:
f.write(j)
Output
{"columns": [{"name": "FILEID", "description": "FILETYPE"}]}
Expected output
{
"columns": [
{
"name": "fileid",
"description": "FILEID"
},
{
"name": "filetype",
"description": "FILETYPE"
},
{
"name": "stusab",
"description": "STUSAB"
},
{
"name": "chariter",
"description": "CHARITER"
},
{
"name": "sequence",
"description": "SEQUENCE"
},
{
"name": "logrecno",
"description": "LOGRECNO"
}
],
The "name" column should be displaying the first row while the "description" column should be displaying the second row.
What modification can I do in my function to get the output I am looking for?

You need to iterate over columns, not rows
import xlrd
from collections import OrderedDict
import json
wb = xlrd.open_workbook('./file1.xls')
sh = wb.sheet_by_index(0)
data_list = []
data = OrderedDict()
for colnum in range(0, sh.ncols):
data['name'] = sh.row_values(0)[colnum]
data['description'] = sh.row_values(1)[colnum]
data_list.append(data.copy())
data_list = {'columns': data_list}
j = json.dumps(data_list)
with open('seq1.json', 'w') as f:
f.write(j)

You should give a try to:
import excel2json
excel2json.convert_from_file('file.xlsx')

You can use pandas
import pandas as pd
df = pd.read_excel('./file1.xlsx')
with open('seq1.json', 'w') as f:
f.write(df.to_json())

Convert csv to json - column1 as a key (nested dict)

I need column1 to be used as a KEY and its value to be a dict of column2 (as key) & column3 (as value) so that output will become a nested dictonary.
For example:
I have a csv file as shown below:
customer1,subkey1,val1
customer2,subkey2,val2
customer2,subkey3,val3
customer2,subkey4,val4
customer3,subkey5,val5
customer3,subkey6,val6
expecting output to be:
{
customer1: {
subkey1:val1
},
customer2: {
subkey2:val2,
subkey3:val3,
subkey4:val4
},
customer3: {
subkey5:val5,
subkey6:val6
}
}
I have tried to convert below sample code as per my requirement, but no luck:
import csv
import json
def csv_to_json(csvFilePath, jsonFilePath):
jsonArray = []
#read csv file
with open(csvFilePath, encoding='utf-8') as csvf:
#load csv file data using csv library's dictionary reader
csvReader = csv.DictReader(csvf)
#convert each csv row into python dict
for row in csvReader:
#add this python dict to json array
jsonArray.append(row)
#convert python jsonArray to JSON String and write to file
with open(jsonFilePath, 'w', encoding='utf-8') as jsonf:
jsonString = json.dumps(jsonArray, indent=4)
jsonf.write(jsonString)
csvFilePath = r'data.csv'
jsonFilePath = r'data.json'
csv_to_json(csvFilePath, jsonFilePath)

Try this
worked for me I used the csv.reader for this and changed the variable into a dict object
import csv
import json
def csv_to_json(csvFilePath, jsonFilePath):
jsonDict = {}
# read csv file
with open(csvFilePath, encoding='utf-8') as csvf:
# load csv file data using csv library's dictionary reader
csvReader = csv.reader(csvf, delimiter=',')
# convert each csv row into python dict
for row in csvReader:
# add this python dict to json array
if row[0] in jsonDict:
jsonDict[row[0]][row[1]] = row[2]
else:
jsonDict[row[0]] = {row[1]: row[2]}
# convert python jsonArray to JSON String and write to file
with open(jsonFilePath, 'w', encoding='utf-8') as jsonf:
jsonString = json.dumps(jsonDict, indent=4)
jsonf.write(jsonString)
csvFilePath = r'data.csv'
jsonFilePath = r'data.json'
csv_to_json(csvFilePath, jsonFilePath)
output
{
"customer1": {
"subkey1": "val1"
},
"customer2": {
"subkey2": "val2",
"subkey3": "val3",
"subkey4": "val4"
},
"customer3": {
"subkey5": "val5",
"subkey6": "val6"
}
}

import collections
import csv
with open('file.csv') as f:
reader = csv.reader(f, delimiter=',')
dict_1 = collections.defaultdict(dict)
for row in reader:
dict_1[row[0]][row[1]] = row[2]
print(dict(dict_1))
# Output
{
customer1: {
subkey1:val1
},
customer2: {
subkey2:val2,
subkey3:val3,
subkey4:val4
},
customer3: {
subkey5:val5,
subkey6:val6
}
}

In this case, DictReader doesn't really help you because it gives each row as a dict in the form:
{"column1": "customer1", "column2": "subkey1", "column3": "val1"}
So it will actually be simpler to use a regular reader and parse manually. You just need to expand existing customers, so it will be helpful to use a defaultdict:
import csv
import json
from collections import defaultdict
def csv_to_json(csvFilePath, jsonFilePath):
jsonDict = defaultdict(dict)
# read csv file
with open(csvFilePath, encoding='utf-8') as csvf:
# load csv file data using csv library's reader
csvReader = csv.reader(csvf)
# convert each csv row into a list
for row in csvReader:
# add this list to json dict
jsonDict[row[0]][row[1]] = row[2]
# write python jsonDict to JSON file
with open(jsonFilePath, 'w', encoding='utf-8') as jsonf:
json.dump(jsonDict, jsonf, indent=4)
Note that the json file can be written simpler by using dump instead of dumps.

Multi column csv from json in python

how to convert json to csv in python. want it open in excel with the lat and long columns.
[
{
"Lat": "-122.37391463199998",
"Long": "47.630880207000075"
},
{
"Lat": "-122.38447021399998",
"Long": "47.70118823100006"
},
{
"Lat": "-122.34729431799997",
"Long": "47.64717111900006"
}
]

The csv module has a handy writerows() method on the DictWriter class:
import csv
import json
data = json.loads(""" My Json """)
with open('lat_long.csv', 'w') as csvfile:
fieldnames = ['Lat', 'Long']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(data)

You can use the json module to convert your string into a dictionary. Then use the csv module to save them into a file.
import json, csv
json_string = '[{"Lat": "-122.37391463199998", "Long": "47.630880207000075"}, {"Lat": "-122.38447021399998", "Long": "47.70118823100006"}, {"Lat": "-122.34729431799997", "Long": "47.64717111900006"} ]'
data = json.loads(json_string)
with open('foo.csv', 'wb') as csvfile:
csv_writer = csv.DictWriter(csvfile, delimiter=',', fieldnames=['Lat', 'Long'])
csv_writer.writeheader()
csv_writer.writerows(data)

Convert CSV to restructured JSON in Python

I have a CSV File with following contents:
source: data.opennepal.net
District,Zone,Geographical Region,Development Region,Causalities,In Number
Sindhupalchok,Bagmati,Mountain,Central,Total No. of Houses,66688
Sindhupalchok,Bagmati,Mountain,Central,Total Population,287798
Sindhupalchok,Bagmati,Mountain,Central,Dead Male,1497
Sindhupalchok,Bagmati,Mountain,Central,Dead Female,1943
Kathmandu,Bagmati,Hill,Central,Total No. of Houses,436344
Kathmandu,Bagmati,Hill,Central,Total Population,1744240
Kathmandu,Bagmati,Hill,Central,Dead Male,621
Kathmandu,Bagmati,Hill,Central,Dead Female,600
My objective is to generate a JSON object like this from it:
{
"district":{
"Sindhupalchok":{
"Causalities":{
"Total No. of Houses":66688,
"Total Population":287798,
"Dead Male":1497,
"Dead Female":1943
},
"geoInfo":{
"Zone":"Bagmati",
"geography":"Mountain",
"Dev Region":"Central"
}
},
"Kathmandu":{
"Causalities":{
"Total No. of Houses":436344,
"Total Population":1744240,
"Dead Male":621,
"Dead Female":600
},
"geoInfo":{
"Zone":"Bagmati",
"geography":"Hill",
"Dev Region":"Central"
}
}
}
}
I've tried using csv.DictReader(csvfile, fieldnames) but it generates redundant nodes in JSON which is difficult to parse and unnecessarily lenghty.
I am using python 2.x
This is my attempt so far:
>>> csvData = open('data.csv','rb')
>>> fieldnames = ("district", "zone", "geographicalRegion", "developmentRegion", "causalities", "injuredNumber")
>>> reader = csv.DictReader(csvData, fieldnames)
>>> rawJson = json.dumps([ row for row in reader ])
rawJson isn't the one I've been seeking. It just maps the fieldnames with individual datasets.
So the question is: How can I create this JSON object without redundant nodes?

As glibdud mentions in the comments you need to loop over the data a bit more manually, so that you can create the desired JSON structure.
We read each line of the CSV data as a dict, and check if we've encountered a new district, and if so we create a new data dict for it, and insert a geoInfo dict into data. Then we can gather the casualty data from that line and the subsequent lines for that district. And once we've gathered all that data we can insert the data dict into the main all_data dict.
To test the code I put your .csv data into a file called 'qdata.csv'
import csv
import json
filename = 'qdata.csv'
fieldnames = ('district', 'Zone', 'geography',
'Dev Region', 'casualties', 'injured')
geo_keys = ('Zone', 'geography', 'Dev Region')
all_data = {}
with open(filename, 'rb') as csvfile:
reader = csv.DictReader(csvfile, fieldnames)
# skip header
next(reader)
current_district = None
for row in reader:
district = row['district']
if district != current_district:
current_district = district
data = all_data[district] = {}
casualties = data['Casualties'] = {}
data['geoInfo'] = dict((k, row[k]) for k in geo_keys)
casualties[row['casualties']] = row['injured']
print json.dumps(all_data, indent=4, sort_keys=True)
Output
{
"Kathmandu": {
"Casualties": {
"Dead Female": "600",
"Dead Male": "621",
"Total No. of Houses": "436344",
"Total Population": "1744240"
},
"geoInfo": {
"Dev Region": "Central",
"Zone": "Bagmati",
"geography": "Hill"
}
},
"Sindhupalchok": {
"Casualties": {
"Dead Female": "1943",
"Dead Male": "1497",
"Total No. of Houses": "66688",
"Total Population": "287798"
},
"geoInfo": {
"Dev Region": "Central",
"Zone": "Bagmati",
"geography": "Mountain"
}
}
}
This output isn't exactly what you've got in your question, but I think you should be able to take it from here. :)

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

CSV to JSON converter (Grouping by same keys values) - python

Related

Convert CSV file to JSON with python

Converting excel spreadsheet to json

Convert csv to json - column1 as a key (nested dict)

Multi column csv from json in python

Convert CSV to restructured JSON in Python

Categories

Resources