Python Dict append value if key value pair are same - python

I am new to python dict, and have question regarding append value to key. Sample python dictionary is like below. How can I append values if key-value pair, ID & time are same? Please see expected result below. Tried append(), pop(), update(), and couldn't get expected result. Appreciate any help.
{
"Total": [
{
"ID": "ID_1000",
"time": 1000,
"name": {
"first_name": "John",
"last_name": "Brown"
}
},
{
"ID": "ID_5000",
"time": 5000,
"name": {
"first_name": "Jason",
"last_name": "Willams"
}
},
{
"ID": "ID_5000",
"time": 5000,
"name": {
"first_name": "Mary",
"last_name": "Jones"
}
},
{
"ID": "ID_1000",
"time": 1000,
"name": {
"first_name": "Michael",
"last_name": "Kol"
}
}
]
}
Below is the expected result.
{
"Total": [
{
"ID": "ID_1000",
"time": 1000,
"name": [
{
"first_name": "John",
"last_name": "Brown"
},
{
"first_name": "Michael",
"last_name": "Kol"
}
]
},
{
"ID": "ID_5000",
"time": 5000,
"name": [
{
"first_name": "Jason",
"last_name": "Willams"
},
{
"first_name": "Mary",
"last_name": "Jones"
}
]
}
]
}

One option is use a intermediate dictionary with your ids as keys.
Note that this code do not manage the time data, because is not clear if you want to add or what is expected.
import json
values = {
"Total": [
{
"ID": "ID_1000",
"time": 1000,
"name": {
"first_name": "John",
"last_name": "Brown"
}
},
{
"ID": "ID_5000",
"time": 5000,
"name": {
"first_name": "Jason",
"last_name": "Willams"
}
},
{
"ID": "ID_5000",
"time": 5000,
"name": {
"first_name": "Mary",
"last_name": "Jones"
}
},
{
"ID": "ID_1000",
"time": 1000,
"name": {
"first_name": "Michael",
"last_name": "Kol"
}
}
]
}
# first create a dictionary with the ids as keys
consolidated_names = {}
for total_value in values["Total"]:
id = total_value["ID"]
if id not in consolidated_names:
consolidated_names[id] = [total_value["name"]]
else:
consolidated_names[id].append(total_value["name"])
# then create the structure that you want
processed_values = []
for id in consolidated_names:
processed_values.append({"ID": id, "name": consolidated_names[id]})
print(json.dumps({"Total": processed_values}, indent=4))
Result:
{
"Total": [
{
"ID": "ID_1000",
"name": [
{
"first_name": "John",
"last_name": "Brown"
},
{
"first_name": "Michael",
"last_name": "Kol"
}
]
},
{
"ID": "ID_5000",
"name": [
{
"first_name": "Jason",
"last_name": "Willams"
},
{
"first_name": "Mary",
"last_name": "Jones"
}
]
}
]
}

Related

Explode json without pandas

I have a JSON object:
{
"data": {
"geography": [
{
"id": "1",
"state": "USA",
"properties": [
{
"code": "CMD-01",
"value": "34"
},
{
"code": "CMD-02",
"value": "24"
}
]
},
{
"id": "2",
"state": "Canada",
"properties": [
{
"code": "CMD-04",
"value": "50"
},
{
"code": "CMD-05",
"value": "60"
}
]
}
]
}
}
I want to get the result as a new JSON, but without using pandas (and all those explode, flatten and normalize functions...). Is there any option to get this structure without using pandas or having an Out of memory issue?
The output should be:
{ "id": "1",
"state": "USA",
"code": "CMD-01",
"value": "34"
},
{ "id": "1",
"state": "USA",
"code": "CMD-02",
"value": "24",
},
{ "id": "2",
"state": "Canada",
"code": "CMD-04",
"value": "50"
},
{ "id": "2",
"state": "Canada",
"code": "CMD-05",
"value": "60"
},
You can simply loop over the list associated with "geography" and build new dictionaries that you will add to a newly created list:
dict_in = {
"data": {
"geography": [
{
"id": "1",
"state": "USA",
"properties": [
{
"code": "CMD-01",
"value": "34"
},
{
"code": "CMD-02",
"value": "24"
}
]
},
{
"id": "2",
"state": "Canada",
"properties": [
{
"code": "CMD-04",
"value": "50"
},
{
"code": "CMD-05",
"value": "60"
}
]
}
]
}
}
import json
rec_out = []
for obj in dict_in["data"]["geography"]:
for prop in obj["properties"]:
dict_out = {
"id": obj["id"],
"state": obj["state"]
}
dict_out.update(prop)
rec_out.append(dict_out)
print(json.dumps(rec_out, indent=4))
Output:
[
{
"id": "1",
"state": "USA",
"code": "CMD-01",
"value": "34"
},
{
"id": "1",
"state": "USA",
"code": "CMD-02",
"value": "24"
},
{
"id": "2",
"state": "Canada",
"code": "CMD-04",
"value": "50"
},
{
"id": "2",
"state": "Canada",
"code": "CMD-05",
"value": "60"
}
]

Merge json files in Python

I'm trying to merge 2 json files in Python. Here are the files:
test1.json
{
"version": "1.0",
"data": {
"admin1": {
"id": "1",
"location": "NY"
},
"admin2": {
"id": "2",
"name": "Bob",
"location": "LA",
"admin_key": {
"adminvalue1": "admin1",
"adminvalue2": "admin2"
}
},
"admin3": {
"name": "john"
}
}
}
test2.json
{
"data": {
"user1": {
"name": "jane",
"phone": "555-666-7777",
"enail": "jane#jane.com"
},
"user2": {
"location": "LA",
"id": "5"
},
"user3": {
"description": "user",
"location": "NY",
"name": "zoe",
"phone": "111-222-3333",
"user_key": {
"uservalue1": "user1",
"uservalue2": "user2"
}
}
}
}
I have this code to merge the two files
import json
with open("test1.json", "r") as data1_file:
data1 = json.load(data1_file)
with open("test2.json", "r") as data2_file:
data2 = json.load(data2_file)
data1.update(data2)
with open("out.json", "w") as out_file:
json.dump(data1, out_file, indent=4)
The output I'm getting is this. It only has test2.json contents under "data".
{
"version": "1.0",
"data": {
"user1": {
"name": "jane",
"phone": "555-666-7777",
"enail": "jane#jane.com"
},
"user2": {
"location": "LA",
"id": "5"
},
"user3": {
"description": "user",
"location": "NY",
"name": "zoe",
"phone": "111-222-3333",
"user_key": {
"uservalue1": "user1",
"uservalue2": "user2"
}
}
}
}
I want the output to have contents of both files under "data" like below
{
"version": "1.0",
"data": {
"admin1": {
"id": "1",
"location": "NY"
},
"admin2": {
"id": "2",
"name": "Bob",
"location": "LA",
"admin_key": {
"adminvalue1": "admin1",
"adminvalue2": "admin2"
}
},
"admin3": {
"name": "john"
},
"user1": {
"name": "jane",
"phone": "555-666-7777",
"enail": "jane#jane.com"
},
"user2": {
"location": "LA",
"id": "5"
},
"user3": {
"description": "user",
"location": "NY",
"name": "zoe",
"phone": "111-222-3333",
"user_key": {
"uservalue1": "user1",
"uservalue2": "user2"
}
}
}
}
How can I achieve this? Thanks!
You need to merge the "sub-dictionary" data1['data'], not data1 itself. In the current code, you are updating data1 with data2, so that data2['data'] overwrites data1['data'].
So replace data1.update(data2) with:
data1['data'].update(data2['data'])
I think this is what you are looking for:
https://stackoverflow.com/a/7205107/8786297
def merge(a, b, path=None):
"merges b into a"
if path is None: path = []
for key in b:
if key in a:
if isinstance(a[key], dict) and isinstance(b[key], dict):
merge(a[key], b[key], path + [str(key)])
elif a[key] == b[key]:
pass # same leaf value
else:
raise Exception('Conflict at %s' % '.'.join(path + [str(key)]))
else:
a[key] = b[key]
return a

How to use S3 Select for Nested Parquet Objects

I have dumped data into a parquet file.
When I use
SELECT * FROM s3object s LIMIT 1
it gives me the following result.
{
"name": "John",
"age": "45",
"country": "USA",
"experience": [{
"company": {
"name": "ABC",
"years": "10",
"position": "Manager"
}
},
{
"company": {
"name": "BBC",
"years": "2",
"position": "Assistant"
}
}
]
}
I want to filter the result where company.name = "ABC"
so, the output should be looks like following.
{
"name": "John",
"age": "45",
"country": "USA",
"experience": [{
"company": {
"name": "ABC",
"years": "10",
"position": "Manager"
}
}
]
}
or this
{
"name": "John",
"age": "45",
"country": "USA",
"experience.company.name": "ABC",
"experience.company.years": "10",
"experience.company.position": "Manager"
}
Any support is highly appreciated.
Thanks.

Python how to pick 3rd occurence in nested json array

I am working with one of my requirement
My requirement: I need to pick and print only 3rd "id" from "syrap" list from the nested json file. I am not getting desired output. Any help will be appreciated.
Test file:
{
"id": "0001",
"type": "donut",
"name": "Cake",
"ppu": 0.55,
"batters":
{ "process": "abc",
"mix": "0303",
"syrap":
[
{ "id": "1001", "type": "Regular" },
{ "id": "1002", "type": "Chocolate" },
{ "id": "1003", "type": "Blueberry" },
{ "id": "1004", "type": "Devil's Food" }
]
},
"rate": 0.55,
"topping":
[
{ "id": "5001", "type": "None" },
{ "id": "5002", "type": "Glazed" },
{ "id": "5005", "type": "Sugar" },
{ "id": "5007", "type": "Powdered Sugar" },
{ "id": "5006", "type": "Chocolate with Sprinkles" },
{ "id": "5003", "type": "Chocolate" },
{ "id": "5004", "type": "Maple" }
]
}
Expected output in a csv:
0001,donut,abc,0303,1003
My code:
import requests
import json
import csv
f = open('testdata.json')
data = json.load(f)
f.close()
f = csv.writer(open('testout.csv', 'wb+'))
for item in data:
f.writerow([item['id'], item[type], item['batters'][0]['process'],
item['batters'][0]['mix'],
item['batters'][0]['syrap'][0]['id'],
item['batters'][0]['syrap'][1]['id'],
item['batters'][0]['syrap'][2]['id'])
Here is some sample code showing how you can iterate through json content parsed as a dictionary:
import json
json_str = '''{
"id": "0001",
"type": "donut",
"name": "Cake",
"ppu": 0.55,
"batters":
{ "process": "abc",
"mix": "0303",
"syrap":
[
{ "id": "1001", "type": "Regular" },
{ "id": "1002", "type": "Chocolate" },
{ "id": "1003", "type": "Blueberry" },
{ "id": "1004", "type": "Devil's Food" }
]
},
"rate": 0.55,
"topping":
[
{ "id": "5001", "type": "None" },
{ "id": "5002", "type": "Glazed" },
{ "id": "5005", "type": "Sugar" },
{ "id": "5007", "type": "Powdered Sugar" },
{ "id": "5006", "type": "Chocolate with Sprinkles" },
{ "id": "5003", "type": "Chocolate" },
{ "id": "5004", "type": "Maple" }
]
}
'''
jsondict = json.loads(json_str)
syrap_node = jsondict['batters']['syrap']
for item in syrap_node:
print (f'id:{item["id"]} type: {item["type"]}')
Simply, data[“batters”][“syrap”][2][“id”]
Much better way to achieve this would be
f = open('testout.csv', 'wb+')
with f:
fnames = ['id','type','process','mix','syrap']
writer = csv.DictWriter(f, fieldnames=fnames)
writer.writeheader()
for item in data:
print item
writer.writerow({'id' : item['id'], 'type': item['type'],
'process' : item['batters']['process'],
'mix': item['batters']['mix'],
'syrap': item['batters']['syrap'][2]['id']})
You need to make sure that data is actually a list. if it is not a list, don't use for loop.
simply,
writer.writerow({'id' : data['id'], 'type': data['type'],
'process' : data['batters']['process'],
'mix': data['batters']['mix'],
'syrap': data['batters']['syrap'][2]['id']})

Join two JSONs based on ID element using Python

I have two JSONs. One has entries like this:
one.json
"data": [
{
"results": {
"counties": {
"32": 0,
"96": 0,
"12": 0
},
"cities": {
"total": 32,
"reporting": 0
}
},
"element_id": 999
},
The other has entries like this:
two.json
"data": [
{
"year": 2020,
"state": "Virginia",
"entries": [
{
"first_name": "Robert",
"last_name": "Smith",
"entry_id": 15723,
"pivot": {
"county_id": 32,
"element_id": 999
}
},
{
"first_name": "Benjamin",
"last_name": "Carter",
"entry_id": 15724,
"pivot": {
"county_id": 34,
"element_id": 999
}
}
],
"element_id": 999,
},
I want to join one.json to two.json based on element_id. The JSONs have lots of element_ids so there is an element of finding the right one to append to. Is there a way to use append to do this based on element_id without having to use a for loop? An appended version of the second JSON above would look like this:
joined.json
"data": [
{
"year": 2020,
"state": "Washington",
"entries": [
{
"first_name": "Robert",
"last_name": "Smith",
"entry_id": 15723,
"pivot": {
"county_id": 32,
"element_id": 999
}
},
{
"first_name": "Benjamin",
"last_name": "Carter",
"entry_id": 15724,
"pivot": {
"county_id": 34,
"element_id": 999
}
}
],
"element_id": 999,
{
"results": {
"counties": {
"32": 0,
"96": 0,
"12": 0
},
"cities": {
"total": 32,
"reporting": 0
}
},
},
What I have so far:
for item in one:
#this goes through one and saves each unique item in a couple variables
temp_id = item["element_id"]
temp_datachunk = item
#then I try to find those variables in two and if I find them, I append
for data in two:
if data["element_id"] == temp_id:
full_data = data.append(item)
print(full_data)
Right now, my attempt dies at the append. I get AttributeError: 'dict' object has no attribute 'append'.
Something like this should work:
source = '''
[{
"results": {
"counties": {
"32": 0,
"96": 0,
"12": 0
},
"cities": {
"total": 32,
"reporting": 0
}
},
"element_id": 999
}]
'''
target = """
[{
"year": 2020,
"state": "Virginia",
"entries": [{
"first_name": "Robert",
"last_name": "Smith",
"entry_id": 15723,
"pivot": {
"county_id": 32,
"element_id": 999
}
},
{
"first_name": "Benjamin",
"last_name": "Carter",
"entry_id": 15724,
"pivot": {
"county_id": 34,
"element_id": 999
}
}
],
"element_id": 999
}]
"""
source_j = json.loads(source)
target_j = json.loads(target)
jsonpath_expr = parse('$..element_id')
source_match = jsonpath_expr.find(source_j)
target_match = jsonpath_expr.find(target_j)
if source_match[0].value==target_match[0].value:
final = target_j+source_j
print(final)
The output is the combined json.

Categories