Json2csv converter for Python 3 - python

I'm using the Json2csv code to convert the Yelp dataset to csv files (Available here: https://github.com/Yelp/dataset-examples/blob/master/json_to_csv_converter.py)
This code was originally used with Python 2 but I'm using Python 3; I've made some changes so it's now working with Python 3 except that I'm getting b' preceding the strings (which indicates that it is a byte sequence).
I've added encoding='utf-8' to convert it to string but my csv file still shows the b''
Example: business_id
b'7KPBkxAOEtb3QeIL9PEErg'
What do I need to change to make it write strings instead of bytes?
Thanks
# -*- coding: utf-8 -*-
"""Convert the Yelp Dataset Challenge dataset from json format to csv.
For more information on the Yelp Dataset Challenge please visit http://yelp.com/dataset_challenge
"""
import argparse
import collections
import csv
import json
def read_and_write_file(json_file_path, csv_file_path, column_names):
"""Read in the json dataset file and write it out to a csv file, given the column names."""
with open(csv_file_path, 'w', newline='', encoding='utf-8') as fout:
csv_file = csv.writer(fout)
csv_file.writerow(list(column_names))
with open(json_file_path,encoding='utf-8') as fin:
for line in fin:
line_contents = json.loads(line)
csv_file.writerow(get_row(line_contents, column_names))
def get_superset_of_column_names_from_file(json_file_path):
"""Read in the json dataset file and return the superset of column names."""
column_names = set()
with open(json_file_path, encoding='utf-8') as fin:
for line in fin:
line_contents = json.loads(line)
column_names.update(
set(get_column_names(line_contents).keys())
)
return column_names
def get_column_names(line_contents, parent_key=''):
"""Return a list of flattened key names given a dict.
Example:
line_contents = {
'a': {
'b': 2,
'c': 3,
},
}
will return: ['a.b', 'a.c']
These will be the column names for the eventual csv file.
"""
column_names = []
for k, v in line_contents.items():
column_name = "{0}.{1}".format(parent_key, k) if parent_key else k
if isinstance(v, collections.MutableMapping):
column_names.extend(
get_column_names(v, column_name).items()
)
else:
column_names.append((column_name, v))
return dict(column_names)
def get_nested_value(d, key):
"""Return a dictionary item given a dictionary `d` and a flattened key from `get_column_names`.
Example:
d = {
'a': {
'b': 2,
'c': 3,
},
}
key = 'a.b'
will return: 2
"""
if '.' not in key:
if key not in d:
return None
return d[key]
base_key, sub_key = key.split('.', 1)
if base_key not in d:
return None
sub_dict = d[base_key]
return get_nested_value(sub_dict, sub_key)
def get_row(line_contents, column_names):
"""Return a csv compatible row given column names and a dict."""
row = []
for column_name in column_names:
line_value = get_nested_value(
line_contents,
column_name,
)
if isinstance(line_value, str):
row.append('{0}'.format(line_value.encode('utf-8')))
elif line_value is not None:
row.append('{0}'.format(line_value))
else:
row.append('')
return row
if __name__ == '__main__':
"""Convert a yelp dataset file from json to csv."""
parser = argparse.ArgumentParser(
description='Convert Yelp Dataset Challenge data from JSON format to CSV.',
)
parser.add_argument(
'json_file',
type=str,
help='The json file to convert.',
)
args = parser.parse_args()
json_file = args.json_file
csv_file = '{0}.csv'.format(json_file.split('.json')[0])
column_names = get_superset_of_column_names_from_file(json_file)
read_and_write_file(json_file, csv_file, column_names)

Just a guess:
if isinstance(line_value, str):
row.append('{0}'.format(line_value.encode('utf-8')))
If the value is str you don't need to encode it in Python 3 - all strings in Python 3 are unicode. You probably should check if the value is an instance of bytes instead.
if isinstance(line_value, bytes):
row.append('{0}'.format(line_value.decode('utf-8')))
[update]
No, that line is checking if it is string versus number... so str is correct – Luluperam
Are you sure? Lets say line_value is the string "foo":
line_value = 'foo'
Now try this:
>>> row = []
>>> if isinstance(line_value, str):
... row.append('{0}'.format(line_value.encode('utf-8')))
>>> print(row)
["b'foo'"]
That is the source of your bytes literal in the CSV file. Now lets try the version I so kindly suggested before dismissing it:
>>> line_value = b'foo'
>>> row = []
>>> if isinstance(line_value, bytes):
... row.append('{0}'.format(line_value.decode('utf-8')))
>>> print(row)
['foo']

Related

convert JSON to CSV file using python

I am trying to fetch data from JSON file and store it into csv file. This JSON file has nesting and multiple records for same key value i.e.value.
Source File:
I tried below code which flattens the json file, but i am unable to get to required csv format.
import pandas as pd
import json
def flatten_json(nested_json):
out = {}
def flatten(x, name=''):
if type(x) is dict:
for a in x:
flatten(x[a], name + a + '_')
elif type(x) is list:
i = 0
for a in x:
flatten(a, name + str(i) + '_')
i += 1
else:
out[name[:-1]] = x
flatten(nested_json)
return out
response = json.loads(data)
df=pd.Series(flatten_json(response)).to_frame()
print(df)
Below is the output which i get after executing above code:
0
data_Value_strt_0_col1 John
data_Value_strt_0_col2 David
data_Value_strt_0_col3 Lisa
data_Value_strt_0_col4 None
data_Value_strt_0_col5 None
data_Value_strt_0_data_byValue_0_col3 dev
data_Value_strt_0_data_byValue_0_col6 None
data_Value_strt_0_data_byValue_0_col1 None
data_Value_strt_0_data_byValue_0_data_value_201... 02.22
data_Value_strt_0_data_byValue_0_data_value_2020-1 12.32
data_Value_strt_1_col1 Ram
data_Value_strt_1_col2 Shyam
data_Value_strt_1_col3 Kishore
data_Value_strt_1_col4 None
data_Value_strt_1_col5 None
data_Value_strt_1_data_byValue_0_col3 prd
data_Value_strt_1_data_byValue_0_col6 None
data_Value_strt_1_data_byValue_0_col1 None
data_Value_strt_1_data_byValue_0_data_value_2020-3 12.87
data_Value_strt_1_data_byValue_1_col3 dev-prd
data_Value_strt_1_data_byValue_1_col6 None
data_Value_strt_1_data_byValue_1_col1 None
data_Value_strt_1_data_byValue_1_data_value_201... 3.39
data_Value_strt_1_data_byValue_1_data_value_201... 9.24
I am unable to get to above format using above code since there is nesting and multiple values for Key 'value'
The following works for the data you've provided. It's possible that this may not work if there is more data you haven't shown, and the format changes:
import json
import csv
data = ...
info = json.loads(data)["data"]["Value"]["strt"]
fieldnames = ["Name1", "Name2", "Name3", "Col_4", "Col_5", "Val_Col3", "Val_Col6", "Val_Col1", "Val_Year", "Val_Month", "Value"]
with open("output.csv", "w", newline="") as file:
writer = csv.writer(file)
writer.writerow(fieldnames)
for d1 in info:
for d2 in d1["data"]["byValue"]:
for key, value in d2["data"]["value"].items():
year, month = key.split("-")
row = [d1["col1"], d1["col2"], d1["col3"], d1["col4"], d1["col5"], d2["col3"], d2["col6"], d2["col1"], year, month, value]
writer.writerow(row)
This will write to a CSV file in the format you specified. None values are written to the file as empty strings by the csv.writer object. If you want to introduce whitespace into the CSV file, so that the delimiters line up, you may have to make some changes.

Parsing csv file and splitting into sub files

I am trying to create a generic filter to split file on the condition from the Yaml file.
My code is running Pandas but as the environment is not having Pandas module I am trying to achieve it through CSV library.
When I am hard coding the value at q its working but when I am trying to pass it from the config file its not working. Also I want pass multiple checks on the same column like('','Balance). So Asset goest to one file and ('','Balance) in another.
import sys
import yaml
import csv
def dynamicQuery(config_file, data_file, outputPath):
"""Loading Configuration file into dataframe"""
try:
with open(config_file) as file:
doc = yaml.full_load(file)
except Exception as err:
print("Error Configuration data file: ", err)
try:
for k, v in doc.items():
if k != 'column':
filename = k
k = doc[k]
q = ' , '.join(f'{v} ' for q, v in k.items())
q = '"' + str(strip(q)) + '"'
print(q) #-- "Asset"
df = csv.reader(open(data_file), delimiter=',')
df = filter(lambda x: (x[2] == q), df) # Not working here
#df = filter(lambda x: x[2] == "Asset", df) --> this is working
csv.writer(open(filename + ".txt", 'w', newline=' '), delimiter=',').writerows(df)
print("File is created for " + filename)
except Exception as err:
print("Error executing queries and saving output data file: ", err)
def main():
if len(sys.argv) == 3:
"""File will be passed as parameter """
config_file = sys.argv[1]
data_file = sys.argv[2]
dynamicQuery(config_file, data_file)
else:
usage()
def usage():
print("Usage: python splitGenric.py config_file data_file ")
main()
Sample file
1233,ACV,Asset,sample
1235,ACV,Asset,sample
1232,ACV,Asset,sample
1234,ACV,Asset,sample
1237,ACV,,sample
1238,ACV,,sample
1234,ACV,Balance,sample
1254,ACV,Balance,sample
1244,ACV,Balance,sample
1264,ACV,Balance,sample
Config.yaml
Asset :
filter1: '"Asset"'
Balance:
filter1: '"Balance"'
filter2: '""'
The YAML configuration file format is not particularly convenient for this, and yaml is not a standard Python module. I would probably go for something like regular expressions instead of a YAML file. But just to sort out the immediate problems, the problem here is that you are mixing up Python syntax and literal quoting characters. You are assembling a string containing literal double quotes around Asset for example, where your CSV file does not contain double quotes around this value; and so you are effectively comparing if 'Asset' == '"Asset"' which of course is False.
The following might not do exactly what you want, but should at least demonstrate a rough first cut of what I think you are trying to do here.
with open(config_file) as file:
config = yaml.full_load(file)
filters = dict()
for k, v in config.items():
handle = open(k + '.txt', 'w', newline='')
writer = csv.writer(handle, delimiter=',')
filt = {'handle': handle, 'writer': writer, 'conditions': []}
for _, expr in v.items():
filt['conditions'].append(expr.strip('"'))
filters[k] = filt
with open(data_file) as csvfile:
reader = csv.reader(csvfile)
for row in reader:
for handle, conf in filters.items():
for i in range(len(conf['conditions'])):
if row[2] == conf['conditions'][i]:
conf['writer'].writerow(row)
break
for handle, conf in filters.items():
conf['handle'].close()
I'm guessing you used pyyaml which seems to be the dominant YAML module for Python.
I tried to use the config.yaml, but I've got this error
File "C:\Users\XXXXXX\AppData\Local\Programs\Python\Python36-32\lib\site-packages\yaml\parser.py", line 439, in parse_block_mapping_key
"expected <block end>, but found %r" % token.id, token.start_mark)
yaml.parser.ParserError: while parsing a block mapping
in "config.yml", line 5, column 5
expected <block end>, but found ','
in "config.yml", line 5, column 17
But I will pretend it worked and the content was loaded in a dictionary, as it appears to be the intention.
The dictionary is as:
doc = {'Asset':'Asset','Balance':[' ','Balance']}
#load directly to dataframe
df = pd.read_csv('sample.txt',header=None)
handler = ''
for k,v in doc.items():
kList = {k:[]} #making empty lists with k values
if isinstance(v,str): #Asset is string
fil = v
else:
for i in range(len(v)): #Balance is list of values
if v[i]:
fil = v[i]
else:
handler = k #replace the null
for types in df.values:
if fil in types:
kList[k].append(types) #append types to corresponding list
csv.writer(open(k+".txt", 'a', newline='\n'), delimiter=',').writerows(kList[k])
if handler: #there is null values
nulls = df[df.isnull().any(axis=1)].values.tolist()
csv.writer(open(handler+".txt", 'a', newline='\n'), delimiter=',').writerows(nulls)
The result are two files, with the following contents:
Asset.txt:
1233,ACV,Asset,sample
1235,ACV,Asset,sample
1232,ACV,Asset,sample
1234,ACV,Asset,sample
Balance.txt:
1234,ACV,Balance,sample
1254,ACV,Balance,sample
1244,ACV,Balance,sample
1264,ACV,Balance,sample
1237,ACV,nan,sample
1238,ACV,nan,sample

Json to CSV python [duplicate]

I have a JSON file I want to convert to a CSV file. How can I do this with Python?
I tried:
import json
import csv
f = open('data.json')
data = json.load(f)
f.close()
f = open('data.csv')
csv_file = csv.writer(f)
for item in data:
csv_file.writerow(item)
f.close()
However, it did not work. I am using Django and the error I received is:
`file' object has no attribute 'writerow'`
I then tried the following:
import json
import csv
f = open('data.json')
data = json.load(f)
f.close()
f = open('data.csv')
csv_file = csv.writer(f)
for item in data:
f.writerow(item) # ← changed
f.close()
I then get the error:
`sequence expected`
Sample json file:
[{
"pk": 22,
"model": "auth.permission",
"fields": {
"codename": "add_logentry",
"name": "Can add log entry",
"content_type": 8
}
}, {
"pk": 23,
"model": "auth.permission",
"fields": {
"codename": "change_logentry",
"name": "Can change log entry",
"content_type": 8
}
}, {
"pk": 24,
"model": "auth.permission",
"fields": {
"codename": "delete_logentry",
"name": "Can delete log entry",
"content_type": 8
}
}, {
"pk": 4,
"model": "auth.permission",
"fields": {
"codename": "add_group",
"name": "Can add group",
"content_type": 2
}
}, {
"pk": 10,
"model": "auth.permission",
"fields": {
"codename": "add_message",
"name": "Can add message",
"content_type": 4
}
}
]
With the pandas library, this is as easy as using two commands!
df = pd.read_json()
read_json converts a JSON string to a pandas object (either a series or dataframe). Then:
df.to_csv()
Which can either return a string or write directly to a csv-file. See the docs for to_csv.
Based on the verbosity of previous answers, we should all thank pandas for the shortcut.
For unstructured JSON see this answer.
EDIT:
Someone asked for a working minimal example:
import pandas as pd
with open('jsonfile.json', encoding='utf-8') as inputfile:
df = pd.read_json(inputfile)
df.to_csv('csvfile.csv', encoding='utf-8', index=False)
First, your JSON has nested objects, so it normally cannot be directly converted to CSV. You need to change that to something like this:
{
"pk": 22,
"model": "auth.permission",
"codename": "add_logentry",
"content_type": 8,
"name": "Can add log entry"
},
......]
Here is my code to generate CSV from that:
import csv
import json
x = """[
{
"pk": 22,
"model": "auth.permission",
"fields": {
"codename": "add_logentry",
"name": "Can add log entry",
"content_type": 8
}
},
{
"pk": 23,
"model": "auth.permission",
"fields": {
"codename": "change_logentry",
"name": "Can change log entry",
"content_type": 8
}
},
{
"pk": 24,
"model": "auth.permission",
"fields": {
"codename": "delete_logentry",
"name": "Can delete log entry",
"content_type": 8
}
}
]"""
x = json.loads(x)
f = csv.writer(open("test.csv", "wb+"))
# Write CSV Header, If you dont need that, remove this line
f.writerow(["pk", "model", "codename", "name", "content_type"])
for x in x:
f.writerow([x["pk"],
x["model"],
x["fields"]["codename"],
x["fields"]["name"],
x["fields"]["content_type"]])
You will get output as:
pk,model,codename,name,content_type
22,auth.permission,add_logentry,Can add log entry,8
23,auth.permission,change_logentry,Can change log entry,8
24,auth.permission,delete_logentry,Can delete log entry,8
I am assuming that your JSON file will decode into a list of dictionaries. First we need a function which will flatten the JSON objects:
def flattenjson(b, delim):
val = {}
for i in b.keys():
if isinstance(b[i], dict):
get = flattenjson(b[i], delim)
for j in get.keys():
val[i + delim + j] = get[j]
else:
val[i] = b[i]
return val
The result of running this snippet on your JSON object:
flattenjson({
"pk": 22,
"model": "auth.permission",
"fields": {
"codename": "add_message",
"name": "Can add message",
"content_type": 8
}
}, "__")
is
{
"pk": 22,
"model": "auth.permission",
"fields__codename": "add_message",
"fields__name": "Can add message",
"fields__content_type": 8
}
After applying this function to each dict in the input array of JSON objects:
input = map(lambda x: flattenjson( x, "__" ), input)
and finding the relevant column names:
columns = [x for row in input for x in row.keys()]
columns = list(set(columns))
it's not hard to run this through the csv module:
with open(fname, 'wb') as out_file:
csv_w = csv.writer(out_file)
csv_w.writerow(columns)
for i_r in input:
csv_w.writerow(map(lambda x: i_r.get(x, ""), columns))
JSON can represent a wide variety of data structures -- a JS "object" is roughly like a Python dict (with string keys), a JS "array" roughly like a Python list, and you can nest them as long as the final "leaf" elements are numbers or strings.
CSV can essentially represent only a 2-D table -- optionally with a first row of "headers", i.e., "column names", which can make the table interpretable as a list of dicts, instead of the normal interpretation, a list of lists (again, "leaf" elements can be numbers or strings).
So, in the general case, you can't translate an arbitrary JSON structure to a CSV. In a few special cases you can (array of arrays with no further nesting; arrays of objects which all have exactly the same keys). Which special case, if any, applies to your problem? The details of the solution depend on which special case you do have. Given the astonishing fact that you don't even mention which one applies, I suspect you may not have considered the constraint, neither usable case in fact applies, and your problem is impossible to solve. But please do clarify!
A generic solution which translates any json list of flat objects to csv.
Pass the input.json file as first argument on command line.
import csv, json, sys
input = open(sys.argv[1])
data = json.load(input)
input.close()
output = csv.writer(sys.stdout)
output.writerow(data[0].keys()) # header row
for row in data:
output.writerow(row.values())
Use json_normalize from pandas:
Using the sample data from the OP in a file named test.json.
encoding='utf-8' has been used here, but may not be necessary for other cases.
The following code takes advantage of the pathlib library.
.open is a method of pathlib.
Works with non-Windows paths too.
Use pandas.to_csv(...) to save the data to a csv file.
import pandas as pd
# As of Pandas 1.01, json_normalize as pandas.io.json.json_normalize is deprecated and is now exposed in the top-level namespace.
# from pandas.io.json import json_normalize
from pathlib import Path
import json
# set path to file
p = Path(r'c:\some_path_to_file\test.json')
# read json
with p.open('r', encoding='utf-8') as f:
data = json.loads(f.read())
# create dataframe
df = pd.json_normalize(data)
# dataframe view
pk model fields.codename fields.name fields.content_type
22 auth.permission add_logentry Can add log entry 8
23 auth.permission change_logentry Can change log entry 8
24 auth.permission delete_logentry Can delete log entry 8
4 auth.permission add_group Can add group 2
10 auth.permission add_message Can add message 4
# save to csv
df.to_csv('test.csv', index=False, encoding='utf-8')
CSV Output:
pk,model,fields.codename,fields.name,fields.content_type
22,auth.permission,add_logentry,Can add log entry,8
23,auth.permission,change_logentry,Can change log entry,8
24,auth.permission,delete_logentry,Can delete log entry,8
4,auth.permission,add_group,Can add group,2
10,auth.permission,add_message,Can add message,4
Resources for more heavily nested JSON objects:
SO Answers:
Flatten a JSON array with python
How to flatten nested JSON recursively, with flatten_json
How to json_normalize a column with NaNs
Split / Explode a column of dictionaries into separate columns with pandas
See the json_normalize tag for other related questions.
This code should work for you, assuming that your JSON data is in a file called data.json.
import json
import csv
with open("data.json") as file:
data = json.load(file)
with open("data.csv", "w") as file:
csv_file = csv.writer(file)
for item in data:
fields = list(item['fields'].values())
csv_file.writerow([item['pk'], item['model']] + fields)
It'll be easy to use csv.DictWriter(),the detailed implementation can be like this:
def read_json(filename):
return json.loads(open(filename).read())
def write_csv(data,filename):
with open(filename, 'w+') as outf:
writer = csv.DictWriter(outf, data[0].keys())
writer.writeheader()
for row in data:
writer.writerow(row)
# implement
write_csv(read_json('test.json'), 'output.csv')
Note that this assumes that all of your JSON objects have the same fields.
Here is the reference which may help you.
I was having trouble with Dan's proposed solution, but this worked for me:
import json
import csv
f = open('test.json')
data = json.load(f)
f.close()
f=csv.writer(open('test.csv','wb+'))
for item in data:
f.writerow([item['pk'], item['model']] + item['fields'].values())
Where "test.json" contained the following:
[
{"pk": 22, "model": "auth.permission", "fields":
{"codename": "add_logentry", "name": "Can add log entry", "content_type": 8 } },
{"pk": 23, "model": "auth.permission", "fields":
{"codename": "change_logentry", "name": "Can change log entry", "content_type": 8 } }, {"pk": 24, "model": "auth.permission", "fields":
{"codename": "delete_logentry", "name": "Can delete log entry", "content_type": 8 } }
]
This is a modification of #MikeRepass's answer. This version writes the CSV to a file, and works for both Python 2 and Python 3.
import csv,json
input_file="data.json"
output_file="data.csv"
with open(input_file) as f:
content=json.load(f)
try:
context=open(output_file,'w',newline='') # Python 3
except TypeError:
context=open(output_file,'wb') # Python 2
with context as file:
writer=csv.writer(file)
writer.writerow(content[0].keys()) # header row
for row in content:
writer.writerow(row.values())
Alec's answer is great, but it doesn't work in the case where there are multiple levels of nesting. Here's a modified version that supports multiple levels of nesting. It also makes the header names a bit nicer if the nested object already specifies its own key (e.g. Firebase Analytics / BigTable / BigQuery data):
"""Converts JSON with nested fields into a flattened CSV file.
"""
import sys
import json
import csv
import os
import jsonlines
from orderedset import OrderedSet
# from https://stackoverflow.com/a/28246154/473201
def flattenjson( b, prefix='', delim='/', val=None ):
if val is None:
val = {}
if isinstance( b, dict ):
for j in b.keys():
flattenjson(b[j], prefix + delim + j, delim, val)
elif isinstance( b, list ):
get = b
for j in range(len(get)):
key = str(j)
# If the nested data contains its own key, use that as the header instead.
if isinstance( get[j], dict ):
if 'key' in get[j]:
key = get[j]['key']
flattenjson(get[j], prefix + delim + key, delim, val)
else:
val[prefix] = b
return val
def main(argv):
if len(argv) < 2:
raise Error('Please specify a JSON file to parse')
print "Loading and Flattening..."
filename = argv[1]
allRows = []
fieldnames = OrderedSet()
with jsonlines.open(filename) as reader:
for obj in reader:
# print 'orig:\n'
# print obj
flattened = flattenjson(obj)
#print 'keys: %s' % flattened.keys()
# print 'flattened:\n'
# print flattened
fieldnames.update(flattened.keys())
allRows.append(flattened)
print "Exporting to CSV..."
outfilename = filename + '.csv'
count = 0
with open(outfilename, 'w') as file:
csvwriter = csv.DictWriter(file, fieldnames=fieldnames)
csvwriter.writeheader()
for obj in allRows:
# print 'allRows:\n'
# print obj
csvwriter.writerow(obj)
count += 1
print "Wrote %d rows" % count
if __name__ == '__main__':
main(sys.argv)
As mentioned in the previous answers the difficulty in converting json to csv is because a json file can contain nested dictionaries and therefore be a multidimensional data structure verses a csv which is a 2D data structure. However, a good way to turn a multidimensional structure to a csv is to have multiple csvs that tie together with primary keys.
In your example, the first csv output has the columns "pk","model","fields" as your columns. Values for "pk", and "model" are easy to get but because the "fields" column contains a dictionary, it should be its own csv and because "codename" appears to the be the primary key, you can use as the input for "fields" to complete the first csv. The second csv contains the dictionary from the "fields" column with codename as the the primary key that can be used to tie the 2 csvs together.
Here is a solution for your json file which converts a nested dictionaries to 2 csvs.
import csv
import json
def readAndWrite(inputFileName, primaryKey=""):
input = open(inputFileName+".json")
data = json.load(input)
input.close()
header = set()
if primaryKey != "":
outputFileName = inputFileName+"-"+primaryKey
if inputFileName == "data":
for i in data:
for j in i["fields"].keys():
if j not in header:
header.add(j)
else:
outputFileName = inputFileName
for i in data:
for j in i.keys():
if j not in header:
header.add(j)
with open(outputFileName+".csv", 'wb') as output_file:
fieldnames = list(header)
writer = csv.DictWriter(output_file, fieldnames, delimiter=',', quotechar='"')
writer.writeheader()
for x in data:
row_value = {}
if primaryKey == "":
for y in x.keys():
yValue = x.get(y)
if type(yValue) == int or type(yValue) == bool or type(yValue) == float or type(yValue) == list:
row_value[y] = str(yValue).encode('utf8')
elif type(yValue) != dict:
row_value[y] = yValue.encode('utf8')
else:
if inputFileName == "data":
row_value[y] = yValue["codename"].encode('utf8')
readAndWrite(inputFileName, primaryKey="codename")
writer.writerow(row_value)
elif primaryKey == "codename":
for y in x["fields"].keys():
yValue = x["fields"].get(y)
if type(yValue) == int or type(yValue) == bool or type(yValue) == float or type(yValue) == list:
row_value[y] = str(yValue).encode('utf8')
elif type(yValue) != dict:
row_value[y] = yValue.encode('utf8')
writer.writerow(row_value)
readAndWrite("data")
I know it has been a long time since this question has been asked but I thought I might add to everyone else's answer and share a blog post that I think explain the solution in a very concise way.
Here is the link
Open a file for writing
employ_data = open('/tmp/EmployData.csv', 'w')
Create the csv writer object
csvwriter = csv.writer(employ_data)
count = 0
for emp in emp_data:
if count == 0:
header = emp.keys()
csvwriter.writerow(header)
count += 1
csvwriter.writerow(emp.values())
Make sure to close the file in order to save the contents
employ_data.close()
It is not a very smart way to do it, but I have had the same problem and this worked for me:
import csv
f = open('data.json')
data = json.load(f)
f.close()
new_data = []
for i in data:
flat = {}
names = i.keys()
for n in names:
try:
if len(i[n].keys()) > 0:
for ii in i[n].keys():
flat[n+"_"+ii] = i[n][ii]
except:
flat[n] = i[n]
new_data.append(flat)
f = open(filename, "r")
writer = csv.DictWriter(f, new_data[0].keys())
writer.writeheader()
for row in new_data:
writer.writerow(row)
f.close()
Surprisingly, I found that none of the answers posted here so far correctly deal with all possible scenarios (e.g., nested dicts, nested lists, None values, etc).
This solution should work across all scenarios:
def flatten_json(json):
def process_value(keys, value, flattened):
if isinstance(value, dict):
for key in value.keys():
process_value(keys + [key], value[key], flattened)
elif isinstance(value, list):
for idx, v in enumerate(value):
process_value(keys + [str(idx)], v, flattened)
else:
flattened['__'.join(keys)] = value
flattened = {}
for key in json.keys():
process_value([key], json[key], flattened)
return flattened
My simple way to solve this:
Create a new Python file like: json_to_csv.py
Add this code:
import csv, json, sys
#if you are not using utf-8 files, remove the next line
sys.setdefaultencoding("UTF-8")
#check if you pass the input file and output file
if sys.argv[1] is not None and sys.argv[2] is not None:
fileInput = sys.argv[1]
fileOutput = sys.argv[2]
inputFile = open(fileInput)
outputFile = open(fileOutput, 'w')
data = json.load(inputFile)
inputFile.close()
output = csv.writer(outputFile)
output.writerow(data[0].keys()) # header row
for row in data:
output.writerow(row.values())
After add this code, save the file and run at the terminal:
python json_to_csv.py input.txt output.csv
I hope this help you.
SEEYA!
This code works for any given json file
# -*- coding: utf-8 -*-
"""
Created on Mon Jun 17 20:35:35 2019
author: Ram
"""
import json
import csv
with open("file1.json") as file:
data = json.load(file)
# create the csv writer object
pt_data1 = open('pt_data1.csv', 'w')
csvwriter = csv.writer(pt_data1)
count = 0
for pt in data:
if count == 0:
header = pt.keys()
csvwriter.writerow(header)
count += 1
csvwriter.writerow(pt.values())
pt_data1.close()
If we consider the below example for converting the json format file to csv formatted file.
{
"item_data" : [
{
"item": "10023456",
"class": "100",
"subclass": "123"
}
]
}
The below code will convert the json file ( data3.json ) to csv file ( data3.csv ).
import json
import csv
with open("/Users/Desktop/json/data3.json") as file:
data = json.load(file)
file.close()
print(data)
fname = "/Users/Desktop/json/data3.csv"
with open(fname, "w", newline='') as file:
csv_file = csv.writer(file)
csv_file.writerow(['dept',
'class',
'subclass'])
for item in data["item_data"]:
csv_file.writerow([item.get('item_data').get('dept'),
item.get('item_data').get('class'),
item.get('item_data').get('subclass')])
The above mentioned code has been executed in the locally installed pycharm and it has successfully converted the json file to the csv file. Hope this help to convert the files.
This works relatively well.
It flattens the json to write it to a csv file.
Nested elements are managed :)
That's for python 3
import json
o = json.loads('your json string') # Be careful, o must be a list, each of its objects will make a line of the csv.
def flatten(o, k='/'):
global l, c_line
if isinstance(o, dict):
for key, value in o.items():
flatten(value, k + '/' + key)
elif isinstance(o, list):
for ov in o:
flatten(ov, '')
elif isinstance(o, str):
o = o.replace('\r',' ').replace('\n',' ').replace(';', ',')
if not k in l:
l[k]={}
l[k][c_line]=o
def render_csv(l):
ftime = True
for i in range(100): #len(l[list(l.keys())[0]])
for k in l:
if ftime :
print('%s;' % k, end='')
continue
v = l[k]
try:
print('%s;' % v[i], end='')
except:
print(';', end='')
print()
ftime = False
i = 0
def json_to_csv(object_list):
global l, c_line
l = {}
c_line = 0
for ov in object_list : # Assumes json is a list of objects
flatten(ov)
c_line += 1
render_csv(l)
json_to_csv(o)
enjoy.
Modified Alec McGail's answer to support JSON with lists inside
def flattenjson(self, mp, delim="|"):
ret = []
if isinstance(mp, dict):
for k in mp.keys():
csvs = self.flattenjson(mp[k], delim)
for csv in csvs:
ret.append(k + delim + csv)
elif isinstance(mp, list):
for k in mp:
csvs = self.flattenjson(k, delim)
for csv in csvs:
ret.append(csv)
else:
ret.append(mp)
return ret
Thanks!
import json,csv
t=''
t=(type('a'))
json_data = []
data = None
write_header = True
item_keys = []
try:
with open('kk.json') as json_file:
json_data = json_file.read()
data = json.loads(json_data)
except Exception as e:
print( e)
with open('bar.csv', 'at') as csv_file:
writer = csv.writer(csv_file)#, quoting=csv.QUOTE_MINIMAL)
for item in data:
item_values = []
for key in item:
if write_header:
item_keys.append(key)
value = item.get(key, '')
if (type(value)==t):
item_values.append(value.encode('utf-8'))
else:
item_values.append(value)
if write_header:
writer.writerow(item_keys)
write_header = False
writer.writerow(item_values)
Since the data appears to be in a dictionary format, it would appear that you should actually use csv.DictWriter() to actually output the lines with the appropriate header information. This should allow the conversion to be handled somewhat easier. The fieldnames parameter would then set up the order properly while the output of the first line as the headers would allow it to be read and processed later by csv.DictReader().
For example, Mike Repass used
output = csv.writer(sys.stdout)
output.writerow(data[0].keys()) # header row
for row in data:
output.writerow(row.values())
However just change the initial setup to
output = csv.DictWriter(filesetting, fieldnames=data[0].keys())
Note that since the order of elements in a dictionary is not defined, you might have to create fieldnames entries explicitly. Once you do that, the writerow will work. The writes then work as originally shown.
Unfortunately I have not enouthg reputation to make a small contribution to the amazing #Alec McGail answer.
I was using Python3 and I have needed to convert the map to a list following the #Alexis R comment.
Additionaly I have found the csv writer was adding a extra CR to the file (I have a empty line for each line with data inside the csv file). The solution was very easy following the #Jason R. Coombs answer to this thread:
CSV in Python adding an extra carriage return
You need to simply add the lineterminator='\n' parameter to the csv.writer. It will be: csv_w = csv.writer( out_file, lineterminator='\n' )
You can use this code to convert a json file to csv file
After reading the file, I am converting the object to pandas dataframe and then saving this to a CSV file
import os
import pandas as pd
import json
import numpy as np
data = []
os.chdir('D:\\Your_directory\\folder')
with open('file_name.json', encoding="utf8") as data_file:
for line in data_file:
data.append(json.loads(line))
dataframe = pd.DataFrame(data)
## Saving the dataframe to a csv file
dataframe.to_csv("filename.csv", encoding='utf-8',index= False)
I have tried a lot of the suggested solution (also Panda was not correctly normalizing my JSON) but the real good one which is parsing correctly the JSON data is from Max Berman.
I wrote an improvement to avoid new columns for each row and
puts it to the existing column during parsing.
It has also the effect to store a value as a string if only one data exists, and make a list if there are more values for that columns.
It takes an input.json file for input and spits out an output.csv.
import json
import pandas as pd
def flatten_json(json):
def process_value(keys, value, flattened):
if isinstance(value, dict):
for key in value.keys():
process_value(keys + [key], value[key], flattened)
elif isinstance(value, list):
for idx, v in enumerate(value):
process_value(keys, v, flattened)
# process_value(keys + [str(idx)], v, flattened)
else:
key1 = '__'.join(keys)
if not flattened.get(key1) is None:
if isinstance(flattened[key1], list):
flattened[key1] = flattened[key1] + [value]
else:
flattened[key1] = [flattened[key1]] + [value]
else:
flattened[key1] = value
flattened = {}
for key in json.keys():
k = key
# print("Key: " + k)
process_value([key], json[key], flattened)
return flattened
try:
f = open("input.json", "r")
except:
pass
y = json.loads(f.read())
flat = flatten_json(y)
text = json.dumps(flat)
df = pd.read_json(text)
df.to_csv('output.csv', index=False, encoding='utf-8')
I might be late to the party, but I think, I have dealt with the similar problem. I had a json file which looked like this
I only wanted to extract few keys/values from these json file. So, I wrote the following code to extract the same.
"""json_to_csv.py
This script reads n numbers of json files present in a folder and then extract certain data from each file and write in a csv file.
The folder contains the python script i.e. json_to_csv.py, output.csv and another folder descriptions containing all the json files.
"""
import os
import json
import csv
def get_list_of_json_files():
"""Returns the list of filenames of all the Json files present in the folder
Parameter
---------
directory : str
'descriptions' in this case
Returns
-------
list_of_files: list
List of the filenames of all the json files
"""
list_of_files = os.listdir('descriptions') # creates list of all the files in the folder
return list_of_files
def create_list_from_json(jsonfile):
"""Returns a list of the extracted items from json file in the same order we need it.
Parameter
_________
jsonfile : json
The json file containing the data
Returns
-------
one_sample_list : list
The list of the extracted items needed for the final csv
"""
with open(jsonfile) as f:
data = json.load(f)
data_list = [] # create an empty list
# append the items to the list in the same order.
data_list.append(data['_id'])
data_list.append(data['_modelType'])
data_list.append(data['creator']['_id'])
data_list.append(data['creator']['name'])
data_list.append(data['dataset']['_accessLevel'])
data_list.append(data['dataset']['_id'])
data_list.append(data['dataset']['description'])
data_list.append(data['dataset']['name'])
data_list.append(data['meta']['acquisition']['image_type'])
data_list.append(data['meta']['acquisition']['pixelsX'])
data_list.append(data['meta']['acquisition']['pixelsY'])
data_list.append(data['meta']['clinical']['age_approx'])
data_list.append(data['meta']['clinical']['benign_malignant'])
data_list.append(data['meta']['clinical']['diagnosis'])
data_list.append(data['meta']['clinical']['diagnosis_confirm_type'])
data_list.append(data['meta']['clinical']['melanocytic'])
data_list.append(data['meta']['clinical']['sex'])
data_list.append(data['meta']['unstructured']['diagnosis'])
# In few json files, the race was not there so using KeyError exception to add '' at the place
try:
data_list.append(data['meta']['unstructured']['race'])
except KeyError:
data_list.append("") # will add an empty string in case race is not there.
data_list.append(data['name'])
return data_list
def write_csv():
"""Creates the desired csv file
Parameters
__________
list_of_files : file
The list created by get_list_of_json_files() method
result.csv : csv
The csv file containing the header only
Returns
_______
result.csv : csv
The desired csv file
"""
list_of_files = get_list_of_json_files()
for file in list_of_files:
row = create_list_from_json(f'descriptions/{file}') # create the row to be added to csv for each file (json-file)
with open('output.csv', 'a') as c:
writer = csv.writer(c)
writer.writerow(row)
c.close()
if __name__ == '__main__':
write_csv()
I hope this will help. For details on how this code work you can check here

How do I convert JSON data from a URL to CSV in Python? [duplicate]

I have a JSON file I want to convert to a CSV file. How can I do this with Python?
I tried:
import json
import csv
f = open('data.json')
data = json.load(f)
f.close()
f = open('data.csv')
csv_file = csv.writer(f)
for item in data:
csv_file.writerow(item)
f.close()
However, it did not work. I am using Django and the error I received is:
`file' object has no attribute 'writerow'`
I then tried the following:
import json
import csv
f = open('data.json')
data = json.load(f)
f.close()
f = open('data.csv')
csv_file = csv.writer(f)
for item in data:
f.writerow(item) # ← changed
f.close()
I then get the error:
`sequence expected`
Sample json file:
[{
"pk": 22,
"model": "auth.permission",
"fields": {
"codename": "add_logentry",
"name": "Can add log entry",
"content_type": 8
}
}, {
"pk": 23,
"model": "auth.permission",
"fields": {
"codename": "change_logentry",
"name": "Can change log entry",
"content_type": 8
}
}, {
"pk": 24,
"model": "auth.permission",
"fields": {
"codename": "delete_logentry",
"name": "Can delete log entry",
"content_type": 8
}
}, {
"pk": 4,
"model": "auth.permission",
"fields": {
"codename": "add_group",
"name": "Can add group",
"content_type": 2
}
}, {
"pk": 10,
"model": "auth.permission",
"fields": {
"codename": "add_message",
"name": "Can add message",
"content_type": 4
}
}
]
With the pandas library, this is as easy as using two commands!
df = pd.read_json()
read_json converts a JSON string to a pandas object (either a series or dataframe). Then:
df.to_csv()
Which can either return a string or write directly to a csv-file. See the docs for to_csv.
Based on the verbosity of previous answers, we should all thank pandas for the shortcut.
For unstructured JSON see this answer.
EDIT:
Someone asked for a working minimal example:
import pandas as pd
with open('jsonfile.json', encoding='utf-8') as inputfile:
df = pd.read_json(inputfile)
df.to_csv('csvfile.csv', encoding='utf-8', index=False)
First, your JSON has nested objects, so it normally cannot be directly converted to CSV. You need to change that to something like this:
{
"pk": 22,
"model": "auth.permission",
"codename": "add_logentry",
"content_type": 8,
"name": "Can add log entry"
},
......]
Here is my code to generate CSV from that:
import csv
import json
x = """[
{
"pk": 22,
"model": "auth.permission",
"fields": {
"codename": "add_logentry",
"name": "Can add log entry",
"content_type": 8
}
},
{
"pk": 23,
"model": "auth.permission",
"fields": {
"codename": "change_logentry",
"name": "Can change log entry",
"content_type": 8
}
},
{
"pk": 24,
"model": "auth.permission",
"fields": {
"codename": "delete_logentry",
"name": "Can delete log entry",
"content_type": 8
}
}
]"""
x = json.loads(x)
f = csv.writer(open("test.csv", "wb+"))
# Write CSV Header, If you dont need that, remove this line
f.writerow(["pk", "model", "codename", "name", "content_type"])
for x in x:
f.writerow([x["pk"],
x["model"],
x["fields"]["codename"],
x["fields"]["name"],
x["fields"]["content_type"]])
You will get output as:
pk,model,codename,name,content_type
22,auth.permission,add_logentry,Can add log entry,8
23,auth.permission,change_logentry,Can change log entry,8
24,auth.permission,delete_logentry,Can delete log entry,8
I am assuming that your JSON file will decode into a list of dictionaries. First we need a function which will flatten the JSON objects:
def flattenjson(b, delim):
val = {}
for i in b.keys():
if isinstance(b[i], dict):
get = flattenjson(b[i], delim)
for j in get.keys():
val[i + delim + j] = get[j]
else:
val[i] = b[i]
return val
The result of running this snippet on your JSON object:
flattenjson({
"pk": 22,
"model": "auth.permission",
"fields": {
"codename": "add_message",
"name": "Can add message",
"content_type": 8
}
}, "__")
is
{
"pk": 22,
"model": "auth.permission",
"fields__codename": "add_message",
"fields__name": "Can add message",
"fields__content_type": 8
}
After applying this function to each dict in the input array of JSON objects:
input = map(lambda x: flattenjson( x, "__" ), input)
and finding the relevant column names:
columns = [x for row in input for x in row.keys()]
columns = list(set(columns))
it's not hard to run this through the csv module:
with open(fname, 'wb') as out_file:
csv_w = csv.writer(out_file)
csv_w.writerow(columns)
for i_r in input:
csv_w.writerow(map(lambda x: i_r.get(x, ""), columns))
JSON can represent a wide variety of data structures -- a JS "object" is roughly like a Python dict (with string keys), a JS "array" roughly like a Python list, and you can nest them as long as the final "leaf" elements are numbers or strings.
CSV can essentially represent only a 2-D table -- optionally with a first row of "headers", i.e., "column names", which can make the table interpretable as a list of dicts, instead of the normal interpretation, a list of lists (again, "leaf" elements can be numbers or strings).
So, in the general case, you can't translate an arbitrary JSON structure to a CSV. In a few special cases you can (array of arrays with no further nesting; arrays of objects which all have exactly the same keys). Which special case, if any, applies to your problem? The details of the solution depend on which special case you do have. Given the astonishing fact that you don't even mention which one applies, I suspect you may not have considered the constraint, neither usable case in fact applies, and your problem is impossible to solve. But please do clarify!
A generic solution which translates any json list of flat objects to csv.
Pass the input.json file as first argument on command line.
import csv, json, sys
input = open(sys.argv[1])
data = json.load(input)
input.close()
output = csv.writer(sys.stdout)
output.writerow(data[0].keys()) # header row
for row in data:
output.writerow(row.values())
Use json_normalize from pandas:
Using the sample data from the OP in a file named test.json.
encoding='utf-8' has been used here, but may not be necessary for other cases.
The following code takes advantage of the pathlib library.
.open is a method of pathlib.
Works with non-Windows paths too.
Use pandas.to_csv(...) to save the data to a csv file.
import pandas as pd
# As of Pandas 1.01, json_normalize as pandas.io.json.json_normalize is deprecated and is now exposed in the top-level namespace.
# from pandas.io.json import json_normalize
from pathlib import Path
import json
# set path to file
p = Path(r'c:\some_path_to_file\test.json')
# read json
with p.open('r', encoding='utf-8') as f:
data = json.loads(f.read())
# create dataframe
df = pd.json_normalize(data)
# dataframe view
pk model fields.codename fields.name fields.content_type
22 auth.permission add_logentry Can add log entry 8
23 auth.permission change_logentry Can change log entry 8
24 auth.permission delete_logentry Can delete log entry 8
4 auth.permission add_group Can add group 2
10 auth.permission add_message Can add message 4
# save to csv
df.to_csv('test.csv', index=False, encoding='utf-8')
CSV Output:
pk,model,fields.codename,fields.name,fields.content_type
22,auth.permission,add_logentry,Can add log entry,8
23,auth.permission,change_logentry,Can change log entry,8
24,auth.permission,delete_logentry,Can delete log entry,8
4,auth.permission,add_group,Can add group,2
10,auth.permission,add_message,Can add message,4
Resources for more heavily nested JSON objects:
SO Answers:
Flatten a JSON array with python
How to flatten nested JSON recursively, with flatten_json
How to json_normalize a column with NaNs
Split / Explode a column of dictionaries into separate columns with pandas
See the json_normalize tag for other related questions.
This code should work for you, assuming that your JSON data is in a file called data.json.
import json
import csv
with open("data.json") as file:
data = json.load(file)
with open("data.csv", "w") as file:
csv_file = csv.writer(file)
for item in data:
fields = list(item['fields'].values())
csv_file.writerow([item['pk'], item['model']] + fields)
It'll be easy to use csv.DictWriter(),the detailed implementation can be like this:
def read_json(filename):
return json.loads(open(filename).read())
def write_csv(data,filename):
with open(filename, 'w+') as outf:
writer = csv.DictWriter(outf, data[0].keys())
writer.writeheader()
for row in data:
writer.writerow(row)
# implement
write_csv(read_json('test.json'), 'output.csv')
Note that this assumes that all of your JSON objects have the same fields.
Here is the reference which may help you.
I was having trouble with Dan's proposed solution, but this worked for me:
import json
import csv
f = open('test.json')
data = json.load(f)
f.close()
f=csv.writer(open('test.csv','wb+'))
for item in data:
f.writerow([item['pk'], item['model']] + item['fields'].values())
Where "test.json" contained the following:
[
{"pk": 22, "model": "auth.permission", "fields":
{"codename": "add_logentry", "name": "Can add log entry", "content_type": 8 } },
{"pk": 23, "model": "auth.permission", "fields":
{"codename": "change_logentry", "name": "Can change log entry", "content_type": 8 } }, {"pk": 24, "model": "auth.permission", "fields":
{"codename": "delete_logentry", "name": "Can delete log entry", "content_type": 8 } }
]
This is a modification of #MikeRepass's answer. This version writes the CSV to a file, and works for both Python 2 and Python 3.
import csv,json
input_file="data.json"
output_file="data.csv"
with open(input_file) as f:
content=json.load(f)
try:
context=open(output_file,'w',newline='') # Python 3
except TypeError:
context=open(output_file,'wb') # Python 2
with context as file:
writer=csv.writer(file)
writer.writerow(content[0].keys()) # header row
for row in content:
writer.writerow(row.values())
Alec's answer is great, but it doesn't work in the case where there are multiple levels of nesting. Here's a modified version that supports multiple levels of nesting. It also makes the header names a bit nicer if the nested object already specifies its own key (e.g. Firebase Analytics / BigTable / BigQuery data):
"""Converts JSON with nested fields into a flattened CSV file.
"""
import sys
import json
import csv
import os
import jsonlines
from orderedset import OrderedSet
# from https://stackoverflow.com/a/28246154/473201
def flattenjson( b, prefix='', delim='/', val=None ):
if val is None:
val = {}
if isinstance( b, dict ):
for j in b.keys():
flattenjson(b[j], prefix + delim + j, delim, val)
elif isinstance( b, list ):
get = b
for j in range(len(get)):
key = str(j)
# If the nested data contains its own key, use that as the header instead.
if isinstance( get[j], dict ):
if 'key' in get[j]:
key = get[j]['key']
flattenjson(get[j], prefix + delim + key, delim, val)
else:
val[prefix] = b
return val
def main(argv):
if len(argv) < 2:
raise Error('Please specify a JSON file to parse')
print "Loading and Flattening..."
filename = argv[1]
allRows = []
fieldnames = OrderedSet()
with jsonlines.open(filename) as reader:
for obj in reader:
# print 'orig:\n'
# print obj
flattened = flattenjson(obj)
#print 'keys: %s' % flattened.keys()
# print 'flattened:\n'
# print flattened
fieldnames.update(flattened.keys())
allRows.append(flattened)
print "Exporting to CSV..."
outfilename = filename + '.csv'
count = 0
with open(outfilename, 'w') as file:
csvwriter = csv.DictWriter(file, fieldnames=fieldnames)
csvwriter.writeheader()
for obj in allRows:
# print 'allRows:\n'
# print obj
csvwriter.writerow(obj)
count += 1
print "Wrote %d rows" % count
if __name__ == '__main__':
main(sys.argv)
As mentioned in the previous answers the difficulty in converting json to csv is because a json file can contain nested dictionaries and therefore be a multidimensional data structure verses a csv which is a 2D data structure. However, a good way to turn a multidimensional structure to a csv is to have multiple csvs that tie together with primary keys.
In your example, the first csv output has the columns "pk","model","fields" as your columns. Values for "pk", and "model" are easy to get but because the "fields" column contains a dictionary, it should be its own csv and because "codename" appears to the be the primary key, you can use as the input for "fields" to complete the first csv. The second csv contains the dictionary from the "fields" column with codename as the the primary key that can be used to tie the 2 csvs together.
Here is a solution for your json file which converts a nested dictionaries to 2 csvs.
import csv
import json
def readAndWrite(inputFileName, primaryKey=""):
input = open(inputFileName+".json")
data = json.load(input)
input.close()
header = set()
if primaryKey != "":
outputFileName = inputFileName+"-"+primaryKey
if inputFileName == "data":
for i in data:
for j in i["fields"].keys():
if j not in header:
header.add(j)
else:
outputFileName = inputFileName
for i in data:
for j in i.keys():
if j not in header:
header.add(j)
with open(outputFileName+".csv", 'wb') as output_file:
fieldnames = list(header)
writer = csv.DictWriter(output_file, fieldnames, delimiter=',', quotechar='"')
writer.writeheader()
for x in data:
row_value = {}
if primaryKey == "":
for y in x.keys():
yValue = x.get(y)
if type(yValue) == int or type(yValue) == bool or type(yValue) == float or type(yValue) == list:
row_value[y] = str(yValue).encode('utf8')
elif type(yValue) != dict:
row_value[y] = yValue.encode('utf8')
else:
if inputFileName == "data":
row_value[y] = yValue["codename"].encode('utf8')
readAndWrite(inputFileName, primaryKey="codename")
writer.writerow(row_value)
elif primaryKey == "codename":
for y in x["fields"].keys():
yValue = x["fields"].get(y)
if type(yValue) == int or type(yValue) == bool or type(yValue) == float or type(yValue) == list:
row_value[y] = str(yValue).encode('utf8')
elif type(yValue) != dict:
row_value[y] = yValue.encode('utf8')
writer.writerow(row_value)
readAndWrite("data")
I know it has been a long time since this question has been asked but I thought I might add to everyone else's answer and share a blog post that I think explain the solution in a very concise way.
Here is the link
Open a file for writing
employ_data = open('/tmp/EmployData.csv', 'w')
Create the csv writer object
csvwriter = csv.writer(employ_data)
count = 0
for emp in emp_data:
if count == 0:
header = emp.keys()
csvwriter.writerow(header)
count += 1
csvwriter.writerow(emp.values())
Make sure to close the file in order to save the contents
employ_data.close()
It is not a very smart way to do it, but I have had the same problem and this worked for me:
import csv
f = open('data.json')
data = json.load(f)
f.close()
new_data = []
for i in data:
flat = {}
names = i.keys()
for n in names:
try:
if len(i[n].keys()) > 0:
for ii in i[n].keys():
flat[n+"_"+ii] = i[n][ii]
except:
flat[n] = i[n]
new_data.append(flat)
f = open(filename, "r")
writer = csv.DictWriter(f, new_data[0].keys())
writer.writeheader()
for row in new_data:
writer.writerow(row)
f.close()
Surprisingly, I found that none of the answers posted here so far correctly deal with all possible scenarios (e.g., nested dicts, nested lists, None values, etc).
This solution should work across all scenarios:
def flatten_json(json):
def process_value(keys, value, flattened):
if isinstance(value, dict):
for key in value.keys():
process_value(keys + [key], value[key], flattened)
elif isinstance(value, list):
for idx, v in enumerate(value):
process_value(keys + [str(idx)], v, flattened)
else:
flattened['__'.join(keys)] = value
flattened = {}
for key in json.keys():
process_value([key], json[key], flattened)
return flattened
My simple way to solve this:
Create a new Python file like: json_to_csv.py
Add this code:
import csv, json, sys
#if you are not using utf-8 files, remove the next line
sys.setdefaultencoding("UTF-8")
#check if you pass the input file and output file
if sys.argv[1] is not None and sys.argv[2] is not None:
fileInput = sys.argv[1]
fileOutput = sys.argv[2]
inputFile = open(fileInput)
outputFile = open(fileOutput, 'w')
data = json.load(inputFile)
inputFile.close()
output = csv.writer(outputFile)
output.writerow(data[0].keys()) # header row
for row in data:
output.writerow(row.values())
After add this code, save the file and run at the terminal:
python json_to_csv.py input.txt output.csv
I hope this help you.
SEEYA!
This code works for any given json file
# -*- coding: utf-8 -*-
"""
Created on Mon Jun 17 20:35:35 2019
author: Ram
"""
import json
import csv
with open("file1.json") as file:
data = json.load(file)
# create the csv writer object
pt_data1 = open('pt_data1.csv', 'w')
csvwriter = csv.writer(pt_data1)
count = 0
for pt in data:
if count == 0:
header = pt.keys()
csvwriter.writerow(header)
count += 1
csvwriter.writerow(pt.values())
pt_data1.close()
If we consider the below example for converting the json format file to csv formatted file.
{
"item_data" : [
{
"item": "10023456",
"class": "100",
"subclass": "123"
}
]
}
The below code will convert the json file ( data3.json ) to csv file ( data3.csv ).
import json
import csv
with open("/Users/Desktop/json/data3.json") as file:
data = json.load(file)
file.close()
print(data)
fname = "/Users/Desktop/json/data3.csv"
with open(fname, "w", newline='') as file:
csv_file = csv.writer(file)
csv_file.writerow(['dept',
'class',
'subclass'])
for item in data["item_data"]:
csv_file.writerow([item.get('item_data').get('dept'),
item.get('item_data').get('class'),
item.get('item_data').get('subclass')])
The above mentioned code has been executed in the locally installed pycharm and it has successfully converted the json file to the csv file. Hope this help to convert the files.
This works relatively well.
It flattens the json to write it to a csv file.
Nested elements are managed :)
That's for python 3
import json
o = json.loads('your json string') # Be careful, o must be a list, each of its objects will make a line of the csv.
def flatten(o, k='/'):
global l, c_line
if isinstance(o, dict):
for key, value in o.items():
flatten(value, k + '/' + key)
elif isinstance(o, list):
for ov in o:
flatten(ov, '')
elif isinstance(o, str):
o = o.replace('\r',' ').replace('\n',' ').replace(';', ',')
if not k in l:
l[k]={}
l[k][c_line]=o
def render_csv(l):
ftime = True
for i in range(100): #len(l[list(l.keys())[0]])
for k in l:
if ftime :
print('%s;' % k, end='')
continue
v = l[k]
try:
print('%s;' % v[i], end='')
except:
print(';', end='')
print()
ftime = False
i = 0
def json_to_csv(object_list):
global l, c_line
l = {}
c_line = 0
for ov in object_list : # Assumes json is a list of objects
flatten(ov)
c_line += 1
render_csv(l)
json_to_csv(o)
enjoy.
Modified Alec McGail's answer to support JSON with lists inside
def flattenjson(self, mp, delim="|"):
ret = []
if isinstance(mp, dict):
for k in mp.keys():
csvs = self.flattenjson(mp[k], delim)
for csv in csvs:
ret.append(k + delim + csv)
elif isinstance(mp, list):
for k in mp:
csvs = self.flattenjson(k, delim)
for csv in csvs:
ret.append(csv)
else:
ret.append(mp)
return ret
Thanks!
import json,csv
t=''
t=(type('a'))
json_data = []
data = None
write_header = True
item_keys = []
try:
with open('kk.json') as json_file:
json_data = json_file.read()
data = json.loads(json_data)
except Exception as e:
print( e)
with open('bar.csv', 'at') as csv_file:
writer = csv.writer(csv_file)#, quoting=csv.QUOTE_MINIMAL)
for item in data:
item_values = []
for key in item:
if write_header:
item_keys.append(key)
value = item.get(key, '')
if (type(value)==t):
item_values.append(value.encode('utf-8'))
else:
item_values.append(value)
if write_header:
writer.writerow(item_keys)
write_header = False
writer.writerow(item_values)
Since the data appears to be in a dictionary format, it would appear that you should actually use csv.DictWriter() to actually output the lines with the appropriate header information. This should allow the conversion to be handled somewhat easier. The fieldnames parameter would then set up the order properly while the output of the first line as the headers would allow it to be read and processed later by csv.DictReader().
For example, Mike Repass used
output = csv.writer(sys.stdout)
output.writerow(data[0].keys()) # header row
for row in data:
output.writerow(row.values())
However just change the initial setup to
output = csv.DictWriter(filesetting, fieldnames=data[0].keys())
Note that since the order of elements in a dictionary is not defined, you might have to create fieldnames entries explicitly. Once you do that, the writerow will work. The writes then work as originally shown.
Unfortunately I have not enouthg reputation to make a small contribution to the amazing #Alec McGail answer.
I was using Python3 and I have needed to convert the map to a list following the #Alexis R comment.
Additionaly I have found the csv writer was adding a extra CR to the file (I have a empty line for each line with data inside the csv file). The solution was very easy following the #Jason R. Coombs answer to this thread:
CSV in Python adding an extra carriage return
You need to simply add the lineterminator='\n' parameter to the csv.writer. It will be: csv_w = csv.writer( out_file, lineterminator='\n' )
You can use this code to convert a json file to csv file
After reading the file, I am converting the object to pandas dataframe and then saving this to a CSV file
import os
import pandas as pd
import json
import numpy as np
data = []
os.chdir('D:\\Your_directory\\folder')
with open('file_name.json', encoding="utf8") as data_file:
for line in data_file:
data.append(json.loads(line))
dataframe = pd.DataFrame(data)
## Saving the dataframe to a csv file
dataframe.to_csv("filename.csv", encoding='utf-8',index= False)
I have tried a lot of the suggested solution (also Panda was not correctly normalizing my JSON) but the real good one which is parsing correctly the JSON data is from Max Berman.
I wrote an improvement to avoid new columns for each row and
puts it to the existing column during parsing.
It has also the effect to store a value as a string if only one data exists, and make a list if there are more values for that columns.
It takes an input.json file for input and spits out an output.csv.
import json
import pandas as pd
def flatten_json(json):
def process_value(keys, value, flattened):
if isinstance(value, dict):
for key in value.keys():
process_value(keys + [key], value[key], flattened)
elif isinstance(value, list):
for idx, v in enumerate(value):
process_value(keys, v, flattened)
# process_value(keys + [str(idx)], v, flattened)
else:
key1 = '__'.join(keys)
if not flattened.get(key1) is None:
if isinstance(flattened[key1], list):
flattened[key1] = flattened[key1] + [value]
else:
flattened[key1] = [flattened[key1]] + [value]
else:
flattened[key1] = value
flattened = {}
for key in json.keys():
k = key
# print("Key: " + k)
process_value([key], json[key], flattened)
return flattened
try:
f = open("input.json", "r")
except:
pass
y = json.loads(f.read())
flat = flatten_json(y)
text = json.dumps(flat)
df = pd.read_json(text)
df.to_csv('output.csv', index=False, encoding='utf-8')
I might be late to the party, but I think, I have dealt with the similar problem. I had a json file which looked like this
I only wanted to extract few keys/values from these json file. So, I wrote the following code to extract the same.
"""json_to_csv.py
This script reads n numbers of json files present in a folder and then extract certain data from each file and write in a csv file.
The folder contains the python script i.e. json_to_csv.py, output.csv and another folder descriptions containing all the json files.
"""
import os
import json
import csv
def get_list_of_json_files():
"""Returns the list of filenames of all the Json files present in the folder
Parameter
---------
directory : str
'descriptions' in this case
Returns
-------
list_of_files: list
List of the filenames of all the json files
"""
list_of_files = os.listdir('descriptions') # creates list of all the files in the folder
return list_of_files
def create_list_from_json(jsonfile):
"""Returns a list of the extracted items from json file in the same order we need it.
Parameter
_________
jsonfile : json
The json file containing the data
Returns
-------
one_sample_list : list
The list of the extracted items needed for the final csv
"""
with open(jsonfile) as f:
data = json.load(f)
data_list = [] # create an empty list
# append the items to the list in the same order.
data_list.append(data['_id'])
data_list.append(data['_modelType'])
data_list.append(data['creator']['_id'])
data_list.append(data['creator']['name'])
data_list.append(data['dataset']['_accessLevel'])
data_list.append(data['dataset']['_id'])
data_list.append(data['dataset']['description'])
data_list.append(data['dataset']['name'])
data_list.append(data['meta']['acquisition']['image_type'])
data_list.append(data['meta']['acquisition']['pixelsX'])
data_list.append(data['meta']['acquisition']['pixelsY'])
data_list.append(data['meta']['clinical']['age_approx'])
data_list.append(data['meta']['clinical']['benign_malignant'])
data_list.append(data['meta']['clinical']['diagnosis'])
data_list.append(data['meta']['clinical']['diagnosis_confirm_type'])
data_list.append(data['meta']['clinical']['melanocytic'])
data_list.append(data['meta']['clinical']['sex'])
data_list.append(data['meta']['unstructured']['diagnosis'])
# In few json files, the race was not there so using KeyError exception to add '' at the place
try:
data_list.append(data['meta']['unstructured']['race'])
except KeyError:
data_list.append("") # will add an empty string in case race is not there.
data_list.append(data['name'])
return data_list
def write_csv():
"""Creates the desired csv file
Parameters
__________
list_of_files : file
The list created by get_list_of_json_files() method
result.csv : csv
The csv file containing the header only
Returns
_______
result.csv : csv
The desired csv file
"""
list_of_files = get_list_of_json_files()
for file in list_of_files:
row = create_list_from_json(f'descriptions/{file}') # create the row to be added to csv for each file (json-file)
with open('output.csv', 'a') as c:
writer = csv.writer(c)
writer.writerow(row)
c.close()
if __name__ == '__main__':
write_csv()
I hope this will help. For details on how this code work you can check here

How can I convert JSON to CSV?

I have a JSON file I want to convert to a CSV file. How can I do this with Python?
I tried:
import json
import csv
f = open('data.json')
data = json.load(f)
f.close()
f = open('data.csv')
csv_file = csv.writer(f)
for item in data:
csv_file.writerow(item)
f.close()
However, it did not work. I am using Django and the error I received is:
`file' object has no attribute 'writerow'`
I then tried the following:
import json
import csv
f = open('data.json')
data = json.load(f)
f.close()
f = open('data.csv')
csv_file = csv.writer(f)
for item in data:
f.writerow(item) # ← changed
f.close()
I then get the error:
`sequence expected`
Sample json file:
[{
"pk": 22,
"model": "auth.permission",
"fields": {
"codename": "add_logentry",
"name": "Can add log entry",
"content_type": 8
}
}, {
"pk": 23,
"model": "auth.permission",
"fields": {
"codename": "change_logentry",
"name": "Can change log entry",
"content_type": 8
}
}, {
"pk": 24,
"model": "auth.permission",
"fields": {
"codename": "delete_logentry",
"name": "Can delete log entry",
"content_type": 8
}
}, {
"pk": 4,
"model": "auth.permission",
"fields": {
"codename": "add_group",
"name": "Can add group",
"content_type": 2
}
}, {
"pk": 10,
"model": "auth.permission",
"fields": {
"codename": "add_message",
"name": "Can add message",
"content_type": 4
}
}
]
With the pandas library, this is as easy as using two commands!
df = pd.read_json()
read_json converts a JSON string to a pandas object (either a series or dataframe). Then:
df.to_csv()
Which can either return a string or write directly to a csv-file. See the docs for to_csv.
Based on the verbosity of previous answers, we should all thank pandas for the shortcut.
For unstructured JSON see this answer.
EDIT:
Someone asked for a working minimal example:
import pandas as pd
with open('jsonfile.json', encoding='utf-8') as inputfile:
df = pd.read_json(inputfile)
df.to_csv('csvfile.csv', encoding='utf-8', index=False)
First, your JSON has nested objects, so it normally cannot be directly converted to CSV. You need to change that to something like this:
{
"pk": 22,
"model": "auth.permission",
"codename": "add_logentry",
"content_type": 8,
"name": "Can add log entry"
},
......]
Here is my code to generate CSV from that:
import csv
import json
x = """[
{
"pk": 22,
"model": "auth.permission",
"fields": {
"codename": "add_logentry",
"name": "Can add log entry",
"content_type": 8
}
},
{
"pk": 23,
"model": "auth.permission",
"fields": {
"codename": "change_logentry",
"name": "Can change log entry",
"content_type": 8
}
},
{
"pk": 24,
"model": "auth.permission",
"fields": {
"codename": "delete_logentry",
"name": "Can delete log entry",
"content_type": 8
}
}
]"""
x = json.loads(x)
f = csv.writer(open("test.csv", "wb+"))
# Write CSV Header, If you dont need that, remove this line
f.writerow(["pk", "model", "codename", "name", "content_type"])
for x in x:
f.writerow([x["pk"],
x["model"],
x["fields"]["codename"],
x["fields"]["name"],
x["fields"]["content_type"]])
You will get output as:
pk,model,codename,name,content_type
22,auth.permission,add_logentry,Can add log entry,8
23,auth.permission,change_logentry,Can change log entry,8
24,auth.permission,delete_logentry,Can delete log entry,8
I am assuming that your JSON file will decode into a list of dictionaries. First we need a function which will flatten the JSON objects:
def flattenjson(b, delim):
val = {}
for i in b.keys():
if isinstance(b[i], dict):
get = flattenjson(b[i], delim)
for j in get.keys():
val[i + delim + j] = get[j]
else:
val[i] = b[i]
return val
The result of running this snippet on your JSON object:
flattenjson({
"pk": 22,
"model": "auth.permission",
"fields": {
"codename": "add_message",
"name": "Can add message",
"content_type": 8
}
}, "__")
is
{
"pk": 22,
"model": "auth.permission",
"fields__codename": "add_message",
"fields__name": "Can add message",
"fields__content_type": 8
}
After applying this function to each dict in the input array of JSON objects:
input = map(lambda x: flattenjson( x, "__" ), input)
and finding the relevant column names:
columns = [x for row in input for x in row.keys()]
columns = list(set(columns))
it's not hard to run this through the csv module:
with open(fname, 'wb') as out_file:
csv_w = csv.writer(out_file)
csv_w.writerow(columns)
for i_r in input:
csv_w.writerow(map(lambda x: i_r.get(x, ""), columns))
JSON can represent a wide variety of data structures -- a JS "object" is roughly like a Python dict (with string keys), a JS "array" roughly like a Python list, and you can nest them as long as the final "leaf" elements are numbers or strings.
CSV can essentially represent only a 2-D table -- optionally with a first row of "headers", i.e., "column names", which can make the table interpretable as a list of dicts, instead of the normal interpretation, a list of lists (again, "leaf" elements can be numbers or strings).
So, in the general case, you can't translate an arbitrary JSON structure to a CSV. In a few special cases you can (array of arrays with no further nesting; arrays of objects which all have exactly the same keys). Which special case, if any, applies to your problem? The details of the solution depend on which special case you do have. Given the astonishing fact that you don't even mention which one applies, I suspect you may not have considered the constraint, neither usable case in fact applies, and your problem is impossible to solve. But please do clarify!
A generic solution which translates any json list of flat objects to csv.
Pass the input.json file as first argument on command line.
import csv, json, sys
input = open(sys.argv[1])
data = json.load(input)
input.close()
output = csv.writer(sys.stdout)
output.writerow(data[0].keys()) # header row
for row in data:
output.writerow(row.values())
Use json_normalize from pandas:
Using the sample data from the OP in a file named test.json.
encoding='utf-8' has been used here, but may not be necessary for other cases.
The following code takes advantage of the pathlib library.
.open is a method of pathlib.
Works with non-Windows paths too.
Use pandas.to_csv(...) to save the data to a csv file.
import pandas as pd
# As of Pandas 1.01, json_normalize as pandas.io.json.json_normalize is deprecated and is now exposed in the top-level namespace.
# from pandas.io.json import json_normalize
from pathlib import Path
import json
# set path to file
p = Path(r'c:\some_path_to_file\test.json')
# read json
with p.open('r', encoding='utf-8') as f:
data = json.loads(f.read())
# create dataframe
df = pd.json_normalize(data)
# dataframe view
pk model fields.codename fields.name fields.content_type
22 auth.permission add_logentry Can add log entry 8
23 auth.permission change_logentry Can change log entry 8
24 auth.permission delete_logentry Can delete log entry 8
4 auth.permission add_group Can add group 2
10 auth.permission add_message Can add message 4
# save to csv
df.to_csv('test.csv', index=False, encoding='utf-8')
CSV Output:
pk,model,fields.codename,fields.name,fields.content_type
22,auth.permission,add_logentry,Can add log entry,8
23,auth.permission,change_logentry,Can change log entry,8
24,auth.permission,delete_logentry,Can delete log entry,8
4,auth.permission,add_group,Can add group,2
10,auth.permission,add_message,Can add message,4
Resources for more heavily nested JSON objects:
SO Answers:
Flatten a JSON array with python
How to flatten nested JSON recursively, with flatten_json
How to json_normalize a column with NaNs
Split / Explode a column of dictionaries into separate columns with pandas
See the json_normalize tag for other related questions.
This code should work for you, assuming that your JSON data is in a file called data.json.
import json
import csv
with open("data.json") as file:
data = json.load(file)
with open("data.csv", "w") as file:
csv_file = csv.writer(file)
for item in data:
fields = list(item['fields'].values())
csv_file.writerow([item['pk'], item['model']] + fields)
It'll be easy to use csv.DictWriter(),the detailed implementation can be like this:
def read_json(filename):
return json.loads(open(filename).read())
def write_csv(data,filename):
with open(filename, 'w+') as outf:
writer = csv.DictWriter(outf, data[0].keys())
writer.writeheader()
for row in data:
writer.writerow(row)
# implement
write_csv(read_json('test.json'), 'output.csv')
Note that this assumes that all of your JSON objects have the same fields.
Here is the reference which may help you.
I was having trouble with Dan's proposed solution, but this worked for me:
import json
import csv
f = open('test.json')
data = json.load(f)
f.close()
f=csv.writer(open('test.csv','wb+'))
for item in data:
f.writerow([item['pk'], item['model']] + item['fields'].values())
Where "test.json" contained the following:
[
{"pk": 22, "model": "auth.permission", "fields":
{"codename": "add_logentry", "name": "Can add log entry", "content_type": 8 } },
{"pk": 23, "model": "auth.permission", "fields":
{"codename": "change_logentry", "name": "Can change log entry", "content_type": 8 } }, {"pk": 24, "model": "auth.permission", "fields":
{"codename": "delete_logentry", "name": "Can delete log entry", "content_type": 8 } }
]
This is a modification of #MikeRepass's answer. This version writes the CSV to a file, and works for both Python 2 and Python 3.
import csv,json
input_file="data.json"
output_file="data.csv"
with open(input_file) as f:
content=json.load(f)
try:
context=open(output_file,'w',newline='') # Python 3
except TypeError:
context=open(output_file,'wb') # Python 2
with context as file:
writer=csv.writer(file)
writer.writerow(content[0].keys()) # header row
for row in content:
writer.writerow(row.values())
Alec's answer is great, but it doesn't work in the case where there are multiple levels of nesting. Here's a modified version that supports multiple levels of nesting. It also makes the header names a bit nicer if the nested object already specifies its own key (e.g. Firebase Analytics / BigTable / BigQuery data):
"""Converts JSON with nested fields into a flattened CSV file.
"""
import sys
import json
import csv
import os
import jsonlines
from orderedset import OrderedSet
# from https://stackoverflow.com/a/28246154/473201
def flattenjson( b, prefix='', delim='/', val=None ):
if val is None:
val = {}
if isinstance( b, dict ):
for j in b.keys():
flattenjson(b[j], prefix + delim + j, delim, val)
elif isinstance( b, list ):
get = b
for j in range(len(get)):
key = str(j)
# If the nested data contains its own key, use that as the header instead.
if isinstance( get[j], dict ):
if 'key' in get[j]:
key = get[j]['key']
flattenjson(get[j], prefix + delim + key, delim, val)
else:
val[prefix] = b
return val
def main(argv):
if len(argv) < 2:
raise Error('Please specify a JSON file to parse')
print "Loading and Flattening..."
filename = argv[1]
allRows = []
fieldnames = OrderedSet()
with jsonlines.open(filename) as reader:
for obj in reader:
# print 'orig:\n'
# print obj
flattened = flattenjson(obj)
#print 'keys: %s' % flattened.keys()
# print 'flattened:\n'
# print flattened
fieldnames.update(flattened.keys())
allRows.append(flattened)
print "Exporting to CSV..."
outfilename = filename + '.csv'
count = 0
with open(outfilename, 'w') as file:
csvwriter = csv.DictWriter(file, fieldnames=fieldnames)
csvwriter.writeheader()
for obj in allRows:
# print 'allRows:\n'
# print obj
csvwriter.writerow(obj)
count += 1
print "Wrote %d rows" % count
if __name__ == '__main__':
main(sys.argv)
As mentioned in the previous answers the difficulty in converting json to csv is because a json file can contain nested dictionaries and therefore be a multidimensional data structure verses a csv which is a 2D data structure. However, a good way to turn a multidimensional structure to a csv is to have multiple csvs that tie together with primary keys.
In your example, the first csv output has the columns "pk","model","fields" as your columns. Values for "pk", and "model" are easy to get but because the "fields" column contains a dictionary, it should be its own csv and because "codename" appears to the be the primary key, you can use as the input for "fields" to complete the first csv. The second csv contains the dictionary from the "fields" column with codename as the the primary key that can be used to tie the 2 csvs together.
Here is a solution for your json file which converts a nested dictionaries to 2 csvs.
import csv
import json
def readAndWrite(inputFileName, primaryKey=""):
input = open(inputFileName+".json")
data = json.load(input)
input.close()
header = set()
if primaryKey != "":
outputFileName = inputFileName+"-"+primaryKey
if inputFileName == "data":
for i in data:
for j in i["fields"].keys():
if j not in header:
header.add(j)
else:
outputFileName = inputFileName
for i in data:
for j in i.keys():
if j not in header:
header.add(j)
with open(outputFileName+".csv", 'wb') as output_file:
fieldnames = list(header)
writer = csv.DictWriter(output_file, fieldnames, delimiter=',', quotechar='"')
writer.writeheader()
for x in data:
row_value = {}
if primaryKey == "":
for y in x.keys():
yValue = x.get(y)
if type(yValue) == int or type(yValue) == bool or type(yValue) == float or type(yValue) == list:
row_value[y] = str(yValue).encode('utf8')
elif type(yValue) != dict:
row_value[y] = yValue.encode('utf8')
else:
if inputFileName == "data":
row_value[y] = yValue["codename"].encode('utf8')
readAndWrite(inputFileName, primaryKey="codename")
writer.writerow(row_value)
elif primaryKey == "codename":
for y in x["fields"].keys():
yValue = x["fields"].get(y)
if type(yValue) == int or type(yValue) == bool or type(yValue) == float or type(yValue) == list:
row_value[y] = str(yValue).encode('utf8')
elif type(yValue) != dict:
row_value[y] = yValue.encode('utf8')
writer.writerow(row_value)
readAndWrite("data")
I know it has been a long time since this question has been asked but I thought I might add to everyone else's answer and share a blog post that I think explain the solution in a very concise way.
Here is the link
Open a file for writing
employ_data = open('/tmp/EmployData.csv', 'w')
Create the csv writer object
csvwriter = csv.writer(employ_data)
count = 0
for emp in emp_data:
if count == 0:
header = emp.keys()
csvwriter.writerow(header)
count += 1
csvwriter.writerow(emp.values())
Make sure to close the file in order to save the contents
employ_data.close()
It is not a very smart way to do it, but I have had the same problem and this worked for me:
import csv
f = open('data.json')
data = json.load(f)
f.close()
new_data = []
for i in data:
flat = {}
names = i.keys()
for n in names:
try:
if len(i[n].keys()) > 0:
for ii in i[n].keys():
flat[n+"_"+ii] = i[n][ii]
except:
flat[n] = i[n]
new_data.append(flat)
f = open(filename, "r")
writer = csv.DictWriter(f, new_data[0].keys())
writer.writeheader()
for row in new_data:
writer.writerow(row)
f.close()
Surprisingly, I found that none of the answers posted here so far correctly deal with all possible scenarios (e.g., nested dicts, nested lists, None values, etc).
This solution should work across all scenarios:
def flatten_json(json):
def process_value(keys, value, flattened):
if isinstance(value, dict):
for key in value.keys():
process_value(keys + [key], value[key], flattened)
elif isinstance(value, list):
for idx, v in enumerate(value):
process_value(keys + [str(idx)], v, flattened)
else:
flattened['__'.join(keys)] = value
flattened = {}
for key in json.keys():
process_value([key], json[key], flattened)
return flattened
My simple way to solve this:
Create a new Python file like: json_to_csv.py
Add this code:
import csv, json, sys
#if you are not using utf-8 files, remove the next line
sys.setdefaultencoding("UTF-8")
#check if you pass the input file and output file
if sys.argv[1] is not None and sys.argv[2] is not None:
fileInput = sys.argv[1]
fileOutput = sys.argv[2]
inputFile = open(fileInput)
outputFile = open(fileOutput, 'w')
data = json.load(inputFile)
inputFile.close()
output = csv.writer(outputFile)
output.writerow(data[0].keys()) # header row
for row in data:
output.writerow(row.values())
After add this code, save the file and run at the terminal:
python json_to_csv.py input.txt output.csv
I hope this help you.
SEEYA!
This code works for any given json file
# -*- coding: utf-8 -*-
"""
Created on Mon Jun 17 20:35:35 2019
author: Ram
"""
import json
import csv
with open("file1.json") as file:
data = json.load(file)
# create the csv writer object
pt_data1 = open('pt_data1.csv', 'w')
csvwriter = csv.writer(pt_data1)
count = 0
for pt in data:
if count == 0:
header = pt.keys()
csvwriter.writerow(header)
count += 1
csvwriter.writerow(pt.values())
pt_data1.close()
If we consider the below example for converting the json format file to csv formatted file.
{
"item_data" : [
{
"item": "10023456",
"class": "100",
"subclass": "123"
}
]
}
The below code will convert the json file ( data3.json ) to csv file ( data3.csv ).
import json
import csv
with open("/Users/Desktop/json/data3.json") as file:
data = json.load(file)
file.close()
print(data)
fname = "/Users/Desktop/json/data3.csv"
with open(fname, "w", newline='') as file:
csv_file = csv.writer(file)
csv_file.writerow(['dept',
'class',
'subclass'])
for item in data["item_data"]:
csv_file.writerow([item.get('item_data').get('dept'),
item.get('item_data').get('class'),
item.get('item_data').get('subclass')])
The above mentioned code has been executed in the locally installed pycharm and it has successfully converted the json file to the csv file. Hope this help to convert the files.
This works relatively well.
It flattens the json to write it to a csv file.
Nested elements are managed :)
That's for python 3
import json
o = json.loads('your json string') # Be careful, o must be a list, each of its objects will make a line of the csv.
def flatten(o, k='/'):
global l, c_line
if isinstance(o, dict):
for key, value in o.items():
flatten(value, k + '/' + key)
elif isinstance(o, list):
for ov in o:
flatten(ov, '')
elif isinstance(o, str):
o = o.replace('\r',' ').replace('\n',' ').replace(';', ',')
if not k in l:
l[k]={}
l[k][c_line]=o
def render_csv(l):
ftime = True
for i in range(100): #len(l[list(l.keys())[0]])
for k in l:
if ftime :
print('%s;' % k, end='')
continue
v = l[k]
try:
print('%s;' % v[i], end='')
except:
print(';', end='')
print()
ftime = False
i = 0
def json_to_csv(object_list):
global l, c_line
l = {}
c_line = 0
for ov in object_list : # Assumes json is a list of objects
flatten(ov)
c_line += 1
render_csv(l)
json_to_csv(o)
enjoy.
Modified Alec McGail's answer to support JSON with lists inside
def flattenjson(self, mp, delim="|"):
ret = []
if isinstance(mp, dict):
for k in mp.keys():
csvs = self.flattenjson(mp[k], delim)
for csv in csvs:
ret.append(k + delim + csv)
elif isinstance(mp, list):
for k in mp:
csvs = self.flattenjson(k, delim)
for csv in csvs:
ret.append(csv)
else:
ret.append(mp)
return ret
Thanks!
import json,csv
t=''
t=(type('a'))
json_data = []
data = None
write_header = True
item_keys = []
try:
with open('kk.json') as json_file:
json_data = json_file.read()
data = json.loads(json_data)
except Exception as e:
print( e)
with open('bar.csv', 'at') as csv_file:
writer = csv.writer(csv_file)#, quoting=csv.QUOTE_MINIMAL)
for item in data:
item_values = []
for key in item:
if write_header:
item_keys.append(key)
value = item.get(key, '')
if (type(value)==t):
item_values.append(value.encode('utf-8'))
else:
item_values.append(value)
if write_header:
writer.writerow(item_keys)
write_header = False
writer.writerow(item_values)
Since the data appears to be in a dictionary format, it would appear that you should actually use csv.DictWriter() to actually output the lines with the appropriate header information. This should allow the conversion to be handled somewhat easier. The fieldnames parameter would then set up the order properly while the output of the first line as the headers would allow it to be read and processed later by csv.DictReader().
For example, Mike Repass used
output = csv.writer(sys.stdout)
output.writerow(data[0].keys()) # header row
for row in data:
output.writerow(row.values())
However just change the initial setup to
output = csv.DictWriter(filesetting, fieldnames=data[0].keys())
Note that since the order of elements in a dictionary is not defined, you might have to create fieldnames entries explicitly. Once you do that, the writerow will work. The writes then work as originally shown.
Unfortunately I have not enouthg reputation to make a small contribution to the amazing #Alec McGail answer.
I was using Python3 and I have needed to convert the map to a list following the #Alexis R comment.
Additionaly I have found the csv writer was adding a extra CR to the file (I have a empty line for each line with data inside the csv file). The solution was very easy following the #Jason R. Coombs answer to this thread:
CSV in Python adding an extra carriage return
You need to simply add the lineterminator='\n' parameter to the csv.writer. It will be: csv_w = csv.writer( out_file, lineterminator='\n' )
You can use this code to convert a json file to csv file
After reading the file, I am converting the object to pandas dataframe and then saving this to a CSV file
import os
import pandas as pd
import json
import numpy as np
data = []
os.chdir('D:\\Your_directory\\folder')
with open('file_name.json', encoding="utf8") as data_file:
for line in data_file:
data.append(json.loads(line))
dataframe = pd.DataFrame(data)
## Saving the dataframe to a csv file
dataframe.to_csv("filename.csv", encoding='utf-8',index= False)
I have tried a lot of the suggested solution (also Panda was not correctly normalizing my JSON) but the real good one which is parsing correctly the JSON data is from Max Berman.
I wrote an improvement to avoid new columns for each row and
puts it to the existing column during parsing.
It has also the effect to store a value as a string if only one data exists, and make a list if there are more values for that columns.
It takes an input.json file for input and spits out an output.csv.
import json
import pandas as pd
def flatten_json(json):
def process_value(keys, value, flattened):
if isinstance(value, dict):
for key in value.keys():
process_value(keys + [key], value[key], flattened)
elif isinstance(value, list):
for idx, v in enumerate(value):
process_value(keys, v, flattened)
# process_value(keys + [str(idx)], v, flattened)
else:
key1 = '__'.join(keys)
if not flattened.get(key1) is None:
if isinstance(flattened[key1], list):
flattened[key1] = flattened[key1] + [value]
else:
flattened[key1] = [flattened[key1]] + [value]
else:
flattened[key1] = value
flattened = {}
for key in json.keys():
k = key
# print("Key: " + k)
process_value([key], json[key], flattened)
return flattened
try:
f = open("input.json", "r")
except:
pass
y = json.loads(f.read())
flat = flatten_json(y)
text = json.dumps(flat)
df = pd.read_json(text)
df.to_csv('output.csv', index=False, encoding='utf-8')
I might be late to the party, but I think, I have dealt with the similar problem. I had a json file which looked like this
I only wanted to extract few keys/values from these json file. So, I wrote the following code to extract the same.
"""json_to_csv.py
This script reads n numbers of json files present in a folder and then extract certain data from each file and write in a csv file.
The folder contains the python script i.e. json_to_csv.py, output.csv and another folder descriptions containing all the json files.
"""
import os
import json
import csv
def get_list_of_json_files():
"""Returns the list of filenames of all the Json files present in the folder
Parameter
---------
directory : str
'descriptions' in this case
Returns
-------
list_of_files: list
List of the filenames of all the json files
"""
list_of_files = os.listdir('descriptions') # creates list of all the files in the folder
return list_of_files
def create_list_from_json(jsonfile):
"""Returns a list of the extracted items from json file in the same order we need it.
Parameter
_________
jsonfile : json
The json file containing the data
Returns
-------
one_sample_list : list
The list of the extracted items needed for the final csv
"""
with open(jsonfile) as f:
data = json.load(f)
data_list = [] # create an empty list
# append the items to the list in the same order.
data_list.append(data['_id'])
data_list.append(data['_modelType'])
data_list.append(data['creator']['_id'])
data_list.append(data['creator']['name'])
data_list.append(data['dataset']['_accessLevel'])
data_list.append(data['dataset']['_id'])
data_list.append(data['dataset']['description'])
data_list.append(data['dataset']['name'])
data_list.append(data['meta']['acquisition']['image_type'])
data_list.append(data['meta']['acquisition']['pixelsX'])
data_list.append(data['meta']['acquisition']['pixelsY'])
data_list.append(data['meta']['clinical']['age_approx'])
data_list.append(data['meta']['clinical']['benign_malignant'])
data_list.append(data['meta']['clinical']['diagnosis'])
data_list.append(data['meta']['clinical']['diagnosis_confirm_type'])
data_list.append(data['meta']['clinical']['melanocytic'])
data_list.append(data['meta']['clinical']['sex'])
data_list.append(data['meta']['unstructured']['diagnosis'])
# In few json files, the race was not there so using KeyError exception to add '' at the place
try:
data_list.append(data['meta']['unstructured']['race'])
except KeyError:
data_list.append("") # will add an empty string in case race is not there.
data_list.append(data['name'])
return data_list
def write_csv():
"""Creates the desired csv file
Parameters
__________
list_of_files : file
The list created by get_list_of_json_files() method
result.csv : csv
The csv file containing the header only
Returns
_______
result.csv : csv
The desired csv file
"""
list_of_files = get_list_of_json_files()
for file in list_of_files:
row = create_list_from_json(f'descriptions/{file}') # create the row to be added to csv for each file (json-file)
with open('output.csv', 'a') as c:
writer = csv.writer(c)
writer.writerow(row)
c.close()
if __name__ == '__main__':
write_csv()
I hope this will help. For details on how this code work you can check here

Categories