Dumping data from json to csv - python

I have 100 json file, each json file contains following kind of dict format.
I would like to create a csv file and dump only
{
"label": "image",
"confidence": 1.0
}
this data into csv file into prediction column along with json file name. How would I do it?

rowIf I understand ok what you want, you want to get only the first "item" on predictions list, from multiple files on some path. And then put all this as rows on a csv. So you can do something like:
import csv
import json
from os import listdir
from os.path import isfile, join
path = 'path/to/dir'
result = []
for file_name in listdir(path):
with open(join(path, file_name), 'r') as f:
data = json.load(f)
first = data['predictions'][0]
result.append([first['label'], first['confidence']])
with open('path/to/result.csv', 'w', newline='') as f:
writer = csv.writer(f)
writer.writerow(['label', 'confidence']) # Comment this line if you dont want a header row
writer.writerows(result)
Replacing 'path/to/dir' with the path of the json files directory, and 'path/to/result.csv' with the path to the result csv file.

Assuming you know already how to read the json file as dictionary in python. You could do this.
import pandas as pd
json_data = { "predictions": [
{
"label": "empty",
"confidence": 1.0
},
{
"label": "filled",
"confidence": 9.40968867750501e-25
},
{
"label": "no-checkbox",
"confidence": 1.7350328516351668e-28
}
]
}
output_df = pd.DataFrame(json_data['predictions'])
print(output_df)
label confidence
0 empty 1.000000e+00
1 filled 9.409689e-25
2 no-checkbox 1.735033e-28

Related

How to search through multiple (thousands) of JSON files to find files with a specific value and then append those specific values to a new list

I recently generated 10,000 images with a corresponding .json file. I generated 10 before I did the bigger collection and so I am trying to filter out or search through the 10,000 json files, for a specific key value. here is one of the JSON files for example:
{
"name": "GrapeGrannys #1",
"description": "Grannys with grapes etc.",
"image": "ipfs://NewUriToReplace/1.png",
"dna": "93596679f006e3a9226700e0e7539179b532bf29",
"edition": 1,
"date": 1667406230920,
"attributes": [
{
"trait_type": "Backgrounds",
"value": "sunrise_beach"
},
{
"trait_type": "main",
"value": "GrapeGranny"
},
{
"trait_type": "eyeColor",
"value": "gray"
},
{
"trait_type": "skirtAndTieColor",
"value": "green"
},
{
"trait_type": "Headwear",
"value": "hat1"
},
{
"trait_type": "specialItems",
"value": "ThugLife"
}
],
"compiler": "HashLips Art Engine"
}
In "attributes", I want to I want to target the first object and its value and check to see if that value is equal to "GrapeCity".
Then after all files have been read and searched through, Id like the files with that specific value "GrapeCity" to be stored in a new list or array that I can print and see which specific files contain that keyword. Here is what I have tried in Python:
import json
import glob
# from datetime import datetime
src = "./Assets/json"
# date = datetime.now()
data = []
files = glob.glob('$./Assets/json/*', recursive=True)
for single_file in files:
with open(single_file, 'r') as f:
try:
json_file = json.load(f)
data.append([
json_file["attributes"]["values"]["GrapeCity"]
])
except KeyError:
print(f'Skipping {single_file}')
data.sort()
print(data)
# csv_filename = f'{str(date)}.csv'
# with open(csv_filename, "w", newline="") as f:
# writer = csv.writer(f)
# writer.writerows(data)
# print("Updated CSV")
At one point I was getting a typeError but now it is just outputing an empty array. Any help is appreciated!
json_file["attributes"] is a list so you can't access it like a dictionary.
Try this:
for single_file in files:
with open(single_file, 'r') as f:
try:
json_file = json.load(f)
attrs = json_file["attributes"]
has_grape_city = any(attr["value"] == "GrapeCity" for attr in attrs)
if has_grape_city:
data.append(single_file)
except KeyError:
print(f'Skipping {single_file}')

Convert CSV file to JSON with python

I am trying to covert my CSV email list to a JSON format to mass email via API. This is my code thus far but am having trouble with the output. Nothing is outputting on my VS code editor.
import csv
import json
def make_json(csvFilePath, jsonFilePath):
data = {}
with open(csvFilePath, encoding='utf-8') as csvf:
csvReader = csv.DictReader(csvf)
for rows in csvReader:
key = rows['No']
data[key] = rows
with open(jsonFilePath, 'w', encoding='utf-8') as jsonf:
jsonf.write(json.dumps(data, indent=4))
csvFilePath = r'/data/csv-leads.csv'
jsonFilePath = r'Names.json'
make_json(csvFilePath, jsonFilePath)
Here is my desired JSON format
{
"EmailAddress": "hello#youngstowncoffeeseattle.com",
"Name": "Youngstown Coffee",
"ConsentToTrack": "Yes"
},
Heres my CSV list
No,EmailAddress,ConsentToTrack
Zylberschtein's Delicatessen & Bakery,catering#zylberschtein.com,Yes
Youngstown Coffee,hello#youngstowncoffeeseattle.com,Yes
It looks like you could use a csv.DictReader to make this easier.
If I have data.csv that looks like this:
Name,EmailAddress,ConsentToTrack
Zylberschtein's Delicatessen,catering#zylberschtein.com,yes
Youngstown Coffee,hello#youngstowncoffeeseattle.com,yes
I can convert it into JSON like this:
>>> import csv
>>> import json
>>> fd = open('data.csv')
>>> reader = csv.DictReader(fd)
>>> print(json.dumps(list(reader), indent=2))
[
{
"Name": "Zylberschtein's Delicatessen",
"EmailAddress": "catering#zylberschtein.com",
"ConsentToTrack": "yes"
},
{
"Name": "Youngstown Coffee",
"EmailAddress": "hello#youngstowncoffeeseattle.com",
"ConsentToTrack": "yes"
}
]
Here I've assumed the headers in the CSV can be used verbatim. I'll update this with an exmaple if you need to modify key names (e.g. convert "No" to "Name"),.
If you need to rename a column, it might look more like this:
import csv
import json
with open('data.csv') as fd:
reader = csv.DictReader(fd)
data = []
for row in reader:
row['Name'] = row.pop('No')
data.append(row)
print(json.dumps(data, indent=2))
Given this input:
No,EmailAddress,ConsentToTrack
Zylberschtein's Delicatessen,catering#zylberschtein.com,yes
Youngstown Coffee,hello#youngstowncoffeeseattle.com,yes
This will output:
[
{
"EmailAddress": "catering#zylberschtein.com",
"ConsentToTrack": "yes",
"Name": "Zylberschtein's Delicatessen"
},
{
"EmailAddress": "hello#youngstowncoffeeseattle.com",
"ConsentToTrack": "yes",
"Name": "Youngstown Coffee"
}
]
and to print on my editor is it simply print(json.dumps(list(reader), indent=2))?
I'm not really familiar with your editor; print is how you generate console output in Python.

KeyError occures while opening the JSON txt file and setting it up into a DataFrame

I had a code, which gave me an empty DataFrame with no saved tweets.
I tried to debug it by putting print(line) under the for line in json file: and json_data = json.loads(line).
That resulted a KeyError.
How do I fix it?
Thank you.
list_df = list()
# read the .txt file, line by line, and append the json data in each line to the list
with open('tweet_json.txt', 'r') as json_file:
for line in json_file:
print(line)
json_data = json.loads(line)
print(line)
tweet_id = json_data['tweet_id']
fvrt_count = json_data['favorite_count']
rtwt_count = json_data['retweet_count']
list_df.append({'tweet_id': tweet_id,
'favorite_count': fvrt_count,
'retweet_count': rtwt_count})
# create a pandas DataFrame using the list
df = pd.DataFrame(list_df, columns = ['tweet_id', 'favorite_count', 'retweet_count'])
df.head()
Your comment says you're trying to save to a file, but your code kind of says that you're trying to read from a file. Here are examples of how to do both:
Writing to JSON
import json
import pandas as pd
content = { # This just dummy data, in the form of a dictionary
"tweet1": {
"id": 1,
"msg": "Yay, first!"
},
"tweet2": {
"id": 2,
"msg": "I'm always second :("
}
}
# Write it to a file called "tweet_json.txt" in JSON
with open("tweet_json.txt", "w") as json_file:
json.dump(content, json_file, indent=4) # indent=4 is optional, it makes it easier to read
Note the w (as in write) in open("tweet_json.txt", "w"). You're using r (as in read), which doesn't give you permission to write anything. Also note the use of json.dump() rather than json.load(). We then get a file that looks like this:
$ cat tweet_json.txt
{
"tweet1": {
"id": 1,
"msg": "Yay, first!"
},
"tweet2": {
"id": 2,
"msg": "I'm always second :("
}
}
Reading from JSON
Let's read the file that we just wrote, using pandas read_json():
import pandas as pd
df = pd.read_json("tweet_json.txt")
print(df)
Output looks like this:
>>> df
tweet1 tweet2
id 1 2
msg Yay, first! I'm always second :(

Read JSON files from Folder and Store it into a CSV file

I have around 1000 JSON files in a folder, I want to convert all those files to its CSV formats. The sample of JSON file is given below.
{"Reviews":
[
{"Title": "Don't give up on your NOOK HD just yet - make it a Lean Mean Jellybean with OS 4.2.2",
"Author": "DC10",
"ReviewID": "ROX6OFU4UAOK1",
"Overall": "5.0",
"Content": "Hi Folks, ",
"Date": "February 18, 2013"},
{"Title": "freezing problem",
"Author": "joseph",
"ReviewID": "R1QVAPUULQZ57B",
"Overall": "3.0",
"Content": "I am still setting it up the way I want it I havve downloaded anything to it yet and it freezes horribly. All in all tho I love this device.",
"Date": "September 11, 2013"}
],
"ProductInfo": {"Price": "$229.00", "Features": "NOOK HD 7\" 16GB Tablet", "Name": "NOOK HD 7\" 16GB Tablet",
"ImgURL": "http://ecx.images-amazon.com/images/I/41jpVvVz41L._SY300_.jpg",
"ProductID": "1400501520"}}
example: json to csv
imports:
import json
import csv
some json :D
data_json = {
"employee_details": [{
"nom": "Bobby",
"age": "19",
}]
}
and some code
employee_data = data_json['employee_details']
# open a file for writing
employ_data = open('EmployData.csv', 'w')
# create the csv writer object
csvwriter = csv.writer(employ_data)
count = 0
for employee in employee_data:
if count == 0:
header = employee.keys()
csvwriter.writerow(header)
count += 1
csvwriter.writerow(employee.values())
employ_data.close()
you will find the file EmployeeData.csv next to your script
If you're willing to install a 3rd party module, you can do this in a few lines with pandas
for json_file in files:
with open(json_file) as f:
json_data = json.load(f)
df = pandas.DataFrame.from_records(json_data)
df.to_csv(json_file.rstrip('.json') + '.csv')

csv to json in python

Hey so I have some hash ids in a csv file like
XbRPhe65YbC+xtgGQ8ukeZEr9xFOC4MEs9Z0wUidGSec=
XbRPhe65YbC+xtgGQ8uksrqSUJ/HhTPj1d2pL0/vuGrHM=
and I want to parse them into python wrap them in some additional code like
{"id" :"XbRPshe65YbC+xtGQ8ukqR2u2btfNeNe2gtcs72QbxPA=", "timestamp":"20150831"},
and then wrap all of that in some JSON syntax. This is then sent as a post request. Problem is I cannot seem to make it JSON readable. Everything seems to be ordered wrong and I am getting extra \.
import os
import pandas as pd
from pprint import pprint
df=pd.read_csv('test.csv',sep=',',header=None)
df[0] = '{"id" :"' + df[0].astype(str) + '", "timestamp":"20150831"}, '
df = df[:-1] # removes last comma
test = 'hello'
data =[ { "ids":[ df[0]],
"attributes":[
{
"name":"girl"
},
{
"name":"size"
}
]
}
]
json1 = data.to_json()
print(json1)
I agree that pandas doesn't seem to be the simplest tool for the job here. The built-in libraries will work great:
import csv
import json
with open('test.csv', newline='') as csvfile:
csvreader = csv.reader(csvfile)
data = {
"ids": [{"id": row[0], "timestamp": "20150831"} for row in csvreader],
"attributes": [
{"name": "girl"},
{"name": "size"}
]
}
json1 = json.dumps(data)
print(json1)

Categories