How to pull data from the JSON file - python

I have a JSON File which contains some data as below:
{
'count': 2,
'next': '?page=2',
'previous': None,
'results': [
{
'category': 'Triggers',
'id': '783_23058',
'name': 'Covid-19'
},
{
'category': 'Sources',
'id': '426_917746',
'name': 'Covid19Conversations'
}
]
}
I am able to extract the first 'id' and 'name' values as below
Doc_details = dict()
for item in companies:
doc_id = companies['results'][0]['id']
doc_name = companies['results'][0]['name']
Doc_details[doc_name] = doc_id
for key, value in Doc_details.items():
print(key,value)
Output:
Covid-19 783_23058
I am new to python. Can someone help me with:
Loop through it and extract all the key,value pairs
Save the results to an excel file.

If you already have the object, you can iterate through companies['results'] using list comprehension and map the objects to (key, value) pairs.
companies = {
'count': 2,
'next': '?page=2',
'previous': None,
'results': [{
'category': 'Triggers',
'id': '783_23058',
'name': 'Covid-19'
}, {
'category': 'Sources',
'id': '426_917746',
'name': 'Covid19Conversations'
}]
}
pairs = list(map(lambda x: [ x['id'], x['name'] ], companies['results']))
csv = '\n'.join('\t'.join(val for val in pair) for pair in pairs)
print(csv)
Result
783_23058 Covid-19
426_917746 Covid19Conversations
Writing to a file
Convert the list of pairs to a CSV file. See: Writing a Python list of lists to a csv file.
import csv
with open('pairs.csv', 'wb') as f:
writer = csv.writer(f)
writer.writerows(pairs)

If you only want the name, id pairs, you can just do:
for result in companies['results']:
print(result['name'], result['id'])
# =>
# Covid-19 783_23058
# Covid19Conversations 426_917746

IIUC: You can use inbuilt json package to parse the json file as python dict and then you can use pandas library to write the excel file:
Try this:
import json
import pandas as pd
from pandas import ExcelWriter
with open("json_file.json", "r") as file:
info = json.load(file) # info contains all key-value pairs
# save to excel
writer = ExcelWriter('excel_file.xlsx')
pd.DataFrame(info["results"]).to_excel(writer, index=False)
writer.save()

Related

how to form a dictionary with key-value pair using a json file

I've this two Json files
zone.json
{"0":{"id":1,"name":"XYZ"}}
region.json
{"0":{"id":1,"name":"ABC"},"1":{"id":2,"name":"DEF"}}
I need to use these json datas as values to create a dictionary with a manually entered key.
{"zone": {"0":{"id":1,"name":"XYZ"}}, "region": {"0":{"id":1,"name":"ABC"},"1":{"id":2,"name":"DEF"}}}
Can anyone please explain me how to create this dictionary in Python by using the name of files as values? or any other appproach?
Use json module to parse the data. You can split the filename by . and use the first part as a key:
import json
file1 = 'zone.txt'
file2 = 'region.txt'
with open(file1, 'r') as f1, open(file2, 'r') as f2:
out = {
file1.split('.')[0]: json.load(f1),
file2.split('.')[0]: json.load(f2)
}
print(out)
Prints:
{'zone': {'0': {'id': 1, 'name': 'XYZ'}}, 'region': {'0': {'id': 1, 'name': 'ABC'}, '1': {'id': 2, 'name': 'DEF'}}}
Edit (to save the file):
with open('output.txt', 'w') as f_out:
json.dump(out, f_out)
Alternative using pathlib:
from pathlib import Path
import json
zonepath = Path("zone.json")
regionpath = Path("region.json")
zonedict = json.loads(zonepath.read_text())
regiondict = json.loads(regionpath.read_text())
result = {zonepath.stem: zonedict, regionpath.stem: regiondict}

CSV to JSON output only if all values are present in CSV

I have a concatenated CSV file that I am attempting to output into JSON format. How should I go about implementing the logic that the CSV file only get converted to a JSON object all fields have a value ?
import glob , os
import pandas as pd
import json
import csv
with open('some.csv', 'r', newline='') as csvfile, \
open('output.json', 'w') as jsonfile:
for row in csv.DictReader(csvfile):
restructured = {
'STATION_CODE': row['STORE_CODE'],
'id': row['ARTICLE_ID'],
'name': row['ITEM_NAME'],
'data':
{
# fieldname: value for (fieldname, value) in row.items()
'STORE_CODE': row['STORE_CODE'],
'ARTICLE_ID': row['ARTICLE_ID'],
'ITEM_NAME': row['ITEM_NAME'],
'BARCODE': row['BARCODE'],
'SALE_PRICE': row['SALE_PRICE'],
'LIST_PRICE': row['LIST_PRICE'],
'UNIT_PRICE': row['UNIT_PRICE'],
}
}
json.dump(restructured, jsonfile, indent=4)
jsonfile.write('\n')
Currently this will provide all values from the CSV file into the JSON output, which is unintended behavior. Any inputs on how to correct this ?
First I loop through all the elements of CSV and add it to a JSON array. If any row element value is empty, that row will be ignored. Once I have the all rows in the JSON array, I will output it to the JSON file
import json
import csv
csvjsonarr = []
with open('some.csv', 'r', newline='') as csvfile :
for row in csv.DictReader(csvfile):
hasemptyvalues = False
for rowidx in row :
if row[rowidx] == "" :
hasemptyvalues = True
break
if hasemptyvalues == True :
continue
restructured = {
'STATION_CODE': row['STORE_CODE'],
'id': row['ARTICLE_ID'],
'name': row['ITEM_NAME'],
'data': {
'STORE_CODE': row['STORE_CODE'],
'ARTICLE_ID': row['ARTICLE_ID'],
'ITEM_NAME': row['ITEM_NAME'],
'BARCODE': row['BARCODE'],
'SALE_PRICE': row['SALE_PRICE'],
'LIST_PRICE': row['LIST_PRICE'],
'UNIT_PRICE': row['UNIT_PRICE'],
}
}
csvjsonarr.append(restructured)
if len(csvjsonarr) > 0 :
with open('output.json', 'w') as jsonfile :
json.dump(csvjsonarr, jsonfile, indent=4)

Find key value in list of dictionaries, and then replace the other values

I'm using Python and a JSON file containing a list of dictionaries like so:
[
{'name':'person1','id':'123','status':'absent'},
{'name':'person2','id':'0980','status':'away'},
{'name':'person3','id':'5235','status':'present'}
]
And I have an incoming dictionary with the same format:
{'name':'person1','id':'324','status':'present'}
The incoming dictionary can have one thing in common, and that's the name key, if the value for the 'name' key hasn't been seen, I add it the json file, if it has, I update the values for the id and status keys in the json file. I'm having trouble updating the list of dictionaries in the json file.
Taking the examples I gave above, the resulting json file should look like this:
[
{'name':'person1','id':'324','status':'present'},
{'name':'person2','id':'0980','status':'away'},
{'name':'person3','id':'5235','status':'present'}
]
I can manage to find the dictionary I want to change with the following:
dict_to_update = next(item for item in <jsonfilename> if item['name'] == 'desired name')
After this, I'm stuck trying to figure out how to then update the specific dictionary in the json file.
Any ideas? Thank you.
Here is how:
with open('file.json', 'r') as r:
lst = json.load(r)
for i,d in enumerate(lst):
if d['name'] == dct['name']:
lst[i] = dct
with open('file.json', 'w') as f:
json.dump(lst , f)
You can also use a function:
def update(lst):
for i,d in enumerate(lst):
if d['name'] == dct['name']:
lst[i] = dct
return lst
with open('file.json', 'r') as r:
lst = update(json.load(r))
with open('file.json', 'w') as f:
json.dump(lst , f)
list_of_dict = [
{"name": "person1", "id": "123", "status": "absent"},
{"name": "person2", "id": "0980", "status": "away"},
{"name": "person3", "id": "5235", "status": "present"},
]
incoming_dictionary = {"name": "person1", "id": "324", "status": "present"}
for index, dictionary in enumerate(list_of_dict):
if incoming_dictionary["name"] == dictionary["name"]:
list_of_dict[
index
] = incoming_dictionary # replace the dictionary with the new one
break
else:
# if no match was found then append the incoming dictionary
list_of_dict.append(incoming_dictionary)
Here's a function that does so, maybe not as elegantly as other answers:
def process(new_dict):
global data #jsonfilename
if new_dict['name'] in [d['name'] for d in data]:
data = [d for d in data if d['name'] != new_dict['name']]
data.append(new_dict)
data = sorted(data, key=lambda i: i['name'])
Full example:
data = [{'name':'person1','id':'123','status':'absent'},
{'name':'person2','id':'0980','status':'away'},
{'name':'person3','id':'5235','status':'present'}]
process({'name':'person1','id':'324','status':'present'}) #an overwritten person
process({'name':'person4','id':'324','status':'present'}) #a new person
Result:
[{'name': 'person1', 'id': '324', 'status': 'present'},
{'name': 'person2', 'id': '0980', 'status': 'away'},
{'name': 'person3', 'id': '5235', 'status': 'present'},
{'name': 'person4', 'id': '324', 'status': 'present'}]
You can do this while avoiding the global keyword as well, but I thought this seemed alright for modifying an existing structure in place.
Maybe this can help you, and if you found another short solution please let me know.
I make an iteration for the list of dictionary, and add a conditional to change the value of key id and status.
Another way is we can use filter and map, to have a short code instead.
datas = [{'name': 'person1', 'id': '123', 'status': 'absent'}, {'name': 'person2', 'id': '0980',
'status': 'away'}, {'name': 'person3', 'id': '5235', 'status': 'present'}]
newData = {'name':'person1','id':'324','status':'present'}
for data in datas:
if data["name"]==newData["name"]:
data["id"]=newData["id"]
data["status"]=newData["status"]

How would I go about scraping data from a website and updating a file with the new info each day while saving older data?

I was initially planning on using a CSV file, however it would require me to manually log into VScode each day and run my script to add the data to a csv file, and it would replace the old data that I had previously input.
If your scraped dataset is small, scrape the data to a nested list of dictionaries with the structure [{<column1>: <data>, <column2>: <data>, ...}, ...] for each row you want to save, then use this function to append that dictionary to a csv file by doing append_csv_dict(<path_to_your_csv>, <your_dictionary>):
import csv
def append_csv_dict(path, data):
'''
Append a csv with a dictionary keys as column headers
Args:
path (str): Path to the csv file
data (dict or list): Dictionary or list(dict) with keys as
column headers and values as column data
'''
with open(path, 'a') as file:
# set the field names to the keys of the dictionary or keys of the first item
fieldnames = list(data.keys()) if isinstance(data, dict) else data[0].keys()
writer = csv.DictWriter(file, fieldnames=fieldnames)
# write the header if the file is new
if file.tell() == 0:
writer.writeheader()
if isinstance(data, dict):
fieldnames = list(data.keys())
# write the row
writer.writerow(data)
elif isinstance(data, list):
# write the rows if it is a list
writer.writerows(data)
# some example data, you can do one dictionary at a time if you only do one row per day
scraped_data = [
{
'first_name': 'John',
'last_name': 'Do',
'age': 31
},
{
'first_name': 'Jane',
'last_name': 'Do',
'age': 33
},
{
'first_name': 'Foo',
'last_name': 'Bar',
'age': 58
}
]
append_csv_dict('./scrape.csv', scraped_data)
Output (scrape.csv):
first_name,last_name,age
John,Do,31
Jane,Do,33
Foo,Bar,58

Converting nested json to a csv, where each row includes innermost values and all parents values

I am looking to create a python script to be able to convert a nested json file to a csv file, with each inner most child having its own row, that includes all of the parent fields in the row as well.
My nested json looks :
(Note this is just a small excerpt, there are hundreds of date/value pairs)
{
"test1": true,
"test2": [
{
"name_id": 12345,
"tags": [
{
"prod_id": 54321,
"history": [
{
"date": "Feb-2-2019",
"value": 6
},
{
"date": "Feb-3-2019",
"value": 5
},
{
"date": "Feb-4-2019",
"value": 4
}
The goal is to write to a csv where each row shows the values for the inner most field and all of its parents. (e.g, date, value, prod_id, name_id, test1). Basically creating a row for each date & value, with all of the parent field values included as well.
I started using this resource as a foundation, but still not exactly what I'm trying to accomplish:
How to Flatten Deeply Nested JSON Objects in Non-Recursive Elegant Python
I've tried tweaking this script but have not been able to come up with a solution. This seems like a relatively easy task, so maybe there's something I'm missing.
A lot of what you want to do is very data-format specific. Here's something using a function loosely derived from the "traditional recursive" solution shown in linked resource you cited since it will work fine with this data since it's not that deeply nested and is simpler than the iterative approach also illustrtated.
The flatten_json() function returns a list, with each value corresponding to keys in the JSON object passed to it.
Note this is Python 3 code.
from collections import OrderedDict
import csv
import json
def flatten_json(nested_json):
""" Flatten values of JSON object dictionary. """
name, out = [], []
def flatten(obj):
if isinstance(obj, dict):
for key, value in obj.items():
name.append(key)
flatten(value)
elif isinstance(obj, list):
for index, item in enumerate(obj):
name.append(str(index))
flatten(item)
else:
out.append(obj)
flatten(nested_json)
return out
def grouper(iterable, n):
""" Collect data in iterable into fixed-length chunks or blocks. """
args = [iter(iterable)] * n
return zip(*args)
if __name__ == '__main__':
json_str = """
{"test1": true,
"test2": [
{"name_id": 12345,
"tags": [
{"prod_id": 54321,
"history": [
{"date": "Feb-2-2019", "item": 6},
{"date": "Feb-3-2019", "item": 5},
{"date": "Feb-4-2019", "item": 4}
]
}
]
}
]
}
"""
# Flatten the json object into a list of values.
json_obj = json.loads(json_str, object_pairs_hook=OrderedDict)
flattened = flatten_json(json_obj)
print('flattened:', flattened)
# Create row dictionaies for each (data, value) pair at the end of the list
# flattened values with all of the preceeding fields repeated in each one.
test1, name_id, prod_id = flattened[:3]
rows = []
for date, value in grouper(flattened[3:], 2):
rows.append({'date': date, 'value': value,
'prod_id': prod_id, 'name_id': name_id, 'test1': prod_id})
# Write rows to a csv file.
filename = 'product_tests.csv'
fieldnames = 'date', 'value', 'prod_id', 'name_id', 'test1'
with open(filename, mode='w', newline='') as csvfile:
writer = csv.DictWriter(csvfile, fieldnames)
writer.writeheader() # Write csv file header row (optional).
writer.writerows(rows)
print('"{}" file written.'.format(filename))
Here's what it prints:
flattened: [True, 12345, 54321, 'Feb-2-2019', 6, 'Feb-3-2019', 5, 'Feb-4-2019', 4]
"product_tests.csv" file written.
And here's the contents of the product_tests.csv file produced:
date,value,prod_id,name_id,test1
Feb-2-2019,6,54321,12345,True
Feb-3-2019,5,54321,12345,True
Feb-4-2019,4,54321,12345,True

Categories