Merge Json with same key value pairs

Merge Json with same key value pairs - python

I got a resultant json from an API in the following format
[{
"Uid": "40cc6103-1cf0-4735-b882-d14d32018e58",
"Id": "9e1a0057-4570-4a6e-8ff5-88b2facbaf4e",
"Details": {
"Name": "Kiran"
}
}, {
"Uid": "40cc6103-1cf0-4735-b882-d14d32018e58",
"Id": "9e1a0057-4570-4a6e-8ff5-88b2facbaf4e",
"Details": {
"Age": "24"
}
},
{
"Uid": "196f5865-e9fe-4847-86ae-97d0bf57b816",
"Id": "84909ecb-c92e-48a7-bcaa-d478bf3a9220",
"Details": {
"Name": "Shreyas"
}
}
]
since the Uid and Id are same for multiple entires, can I club them togeather with Details key being the comma seperate key,value pair? Something like mentioned below
[{
"Uid": "40cc6103-1cf0-4735-b882-d14d32018e58",
"Id": "9e1a0057-4570-4a6e-8ff5-88b2facbaf4e",
"Details": {
"Name": "Kiran",
"Age": "24"
}
},
{
"Uid": "196f5865-e9fe-4847-86ae-97d0bf57b816",
"Id": "84909ecb-c92e-48a7-bcaa-d478bf3a9220",
"Details": {
"Name": "Shreyas"
}
}]
Please Guide me on this for the approach to be followed. Thanks

What you need is the dictionary function update(). Here's an example:
A = [{
"Uid": "40cc6103-1cf0-4735-b882-d14d32018e58",
"Id": "9e1a0057-4570-4a6e-8ff5-88b2facbaf4e",
"Details": {
"Name": "Kiran"
}
}, {
"Uid": "40cc6103-1cf0-4735-b882-d14d32018e58",
"Id": "9e1a0057-4570-4a6e-8ff5-88b2facbaf4e",
"Details": {
"Age": "24"
}
},
{
"Uid": "196f5865-e9fe-4847-86ae-97d0bf57b816",
"Id": "84909ecb-c92e-48a7-bcaa-d478bf3a9220",
"Details": {
"Name": "Shreyas"
}
}
]
B = []
def find(uid, id_):
for i, d in enumerate(B):
if d['Uid'] == uid and d['Id'] == id_:
return i
return -1
for d in A:
if (i := find(d['Uid'], d['Id'])) < 0:
B.append(d)
else:
B[i]['Details'].update(d['Details'])
print(B)
Prettyfied output:
[
{
"Uid": "40cc6103-1cf0-4735-b882-d14d32018e58",
"Id": "9e1a0057-4570-4a6e-8ff5-88b2facbaf4e",
"Details": {
"Name": "Kiran",
"Age": "24"
}
},
{
"Uid": "196f5865-e9fe-4847-86ae-97d0bf57b816",
"Id": "84909ecb-c92e-48a7-bcaa-d478bf3a9220",
"Details": {
"Name": "Shreyas"
}
}
]
Note:
This could be very inefficient if your API response contains very large numbers of dictionaries. You might need a completely different approach

You should iterate over the list and merge with accumulator with (Uid, Id) as key:
from typing import Dict, List
l = [{
"Uid": "40cc6103-1cf0-4735-b882-d14d32018e58",
"Id": "9e1a0057-4570-4a6e-8ff5-88b2facbaf4e",
"Details": {
"Name": "Kiran"
}
}, {
"Uid": "40cc6103-1cf0-4735-b882-d14d32018e58",
"Id": "9e1a0057-4570-4a6e-8ff5-88b2facbaf4e",
"Details": {
"Age": "24"
}
},
{
"Uid": "196f5865-e9fe-4847-86ae-97d0bf57b816",
"Id": "84909ecb-c92e-48a7-bcaa-d478bf3a9220",
"Details": {
"Name": "Shreyas"
}
}
]
def mergeItem(it: Dict, acc: Dict) -> Dict:
uid = it["Uid"]
id = it["Id"]
if (uid, id) in acc:
acc[(uid, id)] = {"Uid": uid, "Id": id, "Details": {**acc[(uid, id)]["Details"], **it["Details"]}}
else:
acc[(uid, id)] = {"Uid": uid, "Id": id, "Details": it["Details"]}
return acc
def mergeList(a:List) -> Dict:
acc = {}
for v in a:
acc = mergeItem(v, acc)
return acc
print(list(mergeList(l).values()))
# [
# {
# 'Uid': '40cc6103-1cf0-4735-b882-d14d32018e58',
# 'Id': '9e1a0057-4570-4a6e-8ff5-88b2facbaf4e',
# 'Details': {'Name': 'Kiran', 'Age': '24'}},
# {
# 'Uid': '196f5865-e9fe-4847-86ae-97d0bf57b816',
# 'Id': '84909ecb-c92e-48a7-bcaa-d478bf3a9220',
# 'Details': {'Name': 'Shreyas'}
# }
# ]

Related

Merge json files in Python

I'm trying to merge 2 json files in Python. Here are the files:
test1.json
{
"version": "1.0",
"data": {
"admin1": {
"id": "1",
"location": "NY"
},
"admin2": {
"id": "2",
"name": "Bob",
"location": "LA",
"admin_key": {
"adminvalue1": "admin1",
"adminvalue2": "admin2"
}
},
"admin3": {
"name": "john"
}
}
}
test2.json
{
"data": {
"user1": {
"name": "jane",
"phone": "555-666-7777",
"enail": "jane#jane.com"
},
"user2": {
"location": "LA",
"id": "5"
},
"user3": {
"description": "user",
"location": "NY",
"name": "zoe",
"phone": "111-222-3333",
"user_key": {
"uservalue1": "user1",
"uservalue2": "user2"
}
}
}
}
I have this code to merge the two files
import json
with open("test1.json", "r") as data1_file:
data1 = json.load(data1_file)
with open("test2.json", "r") as data2_file:
data2 = json.load(data2_file)
data1.update(data2)
with open("out.json", "w") as out_file:
json.dump(data1, out_file, indent=4)
The output I'm getting is this. It only has test2.json contents under "data".
{
"version": "1.0",
"data": {
"user1": {
"name": "jane",
"phone": "555-666-7777",
"enail": "jane#jane.com"
},
"user2": {
"location": "LA",
"id": "5"
},
"user3": {
"description": "user",
"location": "NY",
"name": "zoe",
"phone": "111-222-3333",
"user_key": {
"uservalue1": "user1",
"uservalue2": "user2"
}
}
}
}
I want the output to have contents of both files under "data" like below
{
"version": "1.0",
"data": {
"admin1": {
"id": "1",
"location": "NY"
},
"admin2": {
"id": "2",
"name": "Bob",
"location": "LA",
"admin_key": {
"adminvalue1": "admin1",
"adminvalue2": "admin2"
}
},
"admin3": {
"name": "john"
},
"user1": {
"name": "jane",
"phone": "555-666-7777",
"enail": "jane#jane.com"
},
"user2": {
"location": "LA",
"id": "5"
},
"user3": {
"description": "user",
"location": "NY",
"name": "zoe",
"phone": "111-222-3333",
"user_key": {
"uservalue1": "user1",
"uservalue2": "user2"
}
}
}
}
How can I achieve this? Thanks!

You need to merge the "sub-dictionary" data1['data'], not data1 itself. In the current code, you are updating data1 with data2, so that data2['data'] overwrites data1['data'].
So replace data1.update(data2) with:
data1['data'].update(data2['data'])

I think this is what you are looking for:
https://stackoverflow.com/a/7205107/8786297
def merge(a, b, path=None):
"merges b into a"
if path is None: path = []
for key in b:
if key in a:
if isinstance(a[key], dict) and isinstance(b[key], dict):
merge(a[key], b[key], path + [str(key)])
elif a[key] == b[key]:
pass # same leaf value
else:
raise Exception('Conflict at %s' % '.'.join(path + [str(key)]))
else:
a[key] = b[key]
return a

How to extract data from complex JSON object?

I am trying to extract data from the json file I got from a get request.
{
"data": [
{
"type": "Projects",
"id": "102777c7-50a7-592d-1b65-621d5850a5bb",
"attributes": {
"name": "Hydroelectric Project Updated from Postman",
"projectid": "001"
},
"relationships": {
"Accounts": "Account1"
"Notes": "Note1"
}
},
{
"type": "Projects",
"id": "102c7131-d797-c085-d248-621d5820494f",
"attributes": {
"name": "Ana Hydroelectric Project",
"projectid": "002"
},
"relationships": {
"Accounts": "Account1"
"Notes": "Note1"
}
},
{
"type": "Projects",
"id": "1041f300-5acf-4bd9-2ec4-621d58bbe6bc",
"attributes": {
"name": "Methane Capture Project",
"projectid": "003"
},
"relationships": {
"Accounts": "Account1"
"Notes": "Note1"
}
}
]
}
I have an empty dictionary that stores projectid as Key.
projectids = {
001:"",
002:"",
003:"",
004:"",
}
I was looking for a way to find "projectid" inside "attributes" and the corresponding value for "id" and populate the dictionary projectids with the key(['attributes']['projectid']) and values(id):
{
"001": "102777c7-50a7-592d-1b65-621d5850a5bb",
"002": "102c7131-d797-c085-d248-621d5820494f",
"003": "1041f300-5acf-4bd9-2ec4-621d58bbe6bc",
"004": ""
}

You can try this, assuming data is your variable for the response from the GET request
# this solution will populate for all project ids
projectids = {}
for item in data['data']:
projectids[item['attributes']['projectid']] = item['id']
Output:
{
'001': '102777c7-50a7-592d-1b65-621d5850a5bb',
'002': '102c7131-d797-c085-d248-621d5820494f',
'003': '1041f300-5acf-4bd9-2ec4-621d58bbe6bc'
}
if you're trying to match with already existing projectids in a dict then try
# this solution will search for only pre-specified project ids
projectids = {
"001": "",
"002": "",
"003": "",
"004": "",
}
for idx in projectids.keys():
# find the index of matching dict from data['data']
# will return None if match is not found
matching_index = next((i for i, item in enumerate(data['data']) if
item["attributes"]["projectid"] == idx), None)
if matching_index is not None:
projectids[idx] = data['data'][matching_index]['id']

If data is your input data from the question, then:
projectids = {f"{i:>03}": "" for i in range(1, 5)}
out = {
**projectids,
**{d["attributes"]["projectid"]: d["id"] for d in data["data"]},
}
print(out)
Prints:
{
"001": "102777c7-50a7-592d-1b65-621d5850a5bb",
"002": "102c7131-d797-c085-d248-621d5820494f",
"003": "1041f300-5acf-4bd9-2ec4-621d58bbe6bc",
"004": "",
}

Simply try this:
json_data = {
"data": [
{
"type": "Projects",
"id": "102777c7-50a7-592d-1b65-621d5850a5bb",
"attributes": {
"name": "Hydroelectric Project Updated from Postman",
"projectid": "001"
},
"relationships": {
"Accounts": "Account1",
"Notes": "Note1"
}
},
{
"type": "Projects",
"id": "102c7131-d797-c085-d248-621d5820494f",
"attributes": {
"name": "Ana Hydroelectric Project",
"projectid": "002"
},
"relationships": {
"Accounts": "Account1",
"Notes": "Note1"
}
},
{
"type": "Projects",
"id": "1041f300-5acf-4bd9-2ec4-621d58bbe6bc",
"attributes": {
"name": "Methane Capture Project",
"projectid": "003"
},
"relationships": {
"Accounts": "Account1",
"Notes": "Note1"
}
}
]
}
Just asumme the above json data and try the following code:
project_ids = {item['attributes']['projectid']:item['id'] for item in json_data['data']}
expected output:
{'001': '102777c7-50a7-592d-1b65-621d5850a5bb',
'002': '102c7131-d797-c085-d248-621d5820494f',
'003': '1041f300-5acf-4bd9-2ec4-621d58bbe6bc'}

How to create an automatic mapping of possible JSON data options to be collected?

I've never heard of or found an option for what I'm looking for, but maybe someone knows a way:
To collect the data from a JSON I need to map manually it like this:
events = response['events']
for event in events:
tournament_name = event['tournament']['name']
tournament_slug = event['tournament']['slug']
tournament_category_name = event['tournament']['category']['name']
tournament_category_slug = event['tournament']['category']['slug']
tournament_category_sport_name = event['tournament']['category']['sport']['name']
tournament_category_sport_slug = event['tournament']['category']['sport']['slug']
tournament_category_sport_id = event['tournament']['category']['sport']['id']
The complete model is this:
{
"events": [
{
"tournament": {
"name": "Serie A",
"slug": "serie-a",
"category": {
"name": "Italy",
"slug": "italy",
"sport": {
"name": "Football",
"slug": "football",
"id": 1
},
"id": 31,
"flag": "italy",
"alpha2": "IT"
},
"uniqueTournament": {
"name": "Serie A",
"slug": "serie-a",
"category": {
"name": "Italy",
"slug": "italy",
"sport": {
"name": "Football",
"slug": "football",
"id": 1
},
"id": 31,
"flag": "italy",
"alpha2": "IT"
},
"userCount": 586563,
"id": 23,
"hasEventPlayerStatistics": true
},
"priority": 254,
"id": 33
},
"roundInfo": {
"round": 24
},
"customId": "Kdbsfeb",
"status": {
"code": 7,
"description": "2nd half",
"type": "inprogress"
},
"winnerCode": 0,
"homeTeam": {
"name": "Bologna",
"slug": "bologna",
"shortName": "Bologna",
"gender": "M",
"userCount": 39429,
"nameCode": "BOL",
"national": false,
"type": 0,
"id": 2685,
"subTeams": [
],
"teamColors": {
"primary": "#003366",
"secondary": "#cc0000",
"text": "#cc0000"
}
},
"awayTeam": {
"name": "Empoli",
"slug": "empoli",
"shortName": "Empoli",
"gender": "M",
"userCount": 31469,
"nameCode": "EMP",
"national": false,
"type": 0,
"id": 2705,
"subTeams": [
],
"teamColors": {
"primary": "#0d5696",
"secondary": "#ffffff",
"text": "#ffffff"
}
},
"homeScore": {
"current": 0,
"display": 0,
"period1": 0
},
"awayScore": {
"current": 0,
"display": 0,
"period1": 0
},
"coverage": 1,
"time": {
"initial": 2700,
"max": 5400,
"extra": 540,
"currentPeriodStartTimestamp": 1644159735
},
"changes": {
"changes": [
"status.code",
"status.description",
"time.currentPeriodStart"
],
"changeTimestamp": 1644159743
},
"hasGlobalHighlights": false,
"hasEventPlayerStatistics": true,
"hasEventPlayerHeatMap": true,
"id": 9645399,
"statusTime": {
"prefix": "",
"initial": 2700,
"max": 5400,
"timestamp": 1644159735,
"extra": 540
},
"startTimestamp": 1644156000,
"slug": "empoli-bologna",
"lastPeriod": "period2",
"finalResultOnly": false
}
]
}
In my example I am collecting 7 values.
But there are 83 possible values to be collected.
In case I want to get all the values options that exist in this JSON, is there any way to make this map sequence automatically to print so I can copy it to the code?
Because manually it takes too long to do and it's very tiring.
And the results of texts like print() in terminal would be something like:
tournament_name = event['tournament']['name']
tournament_slug = event['tournament']['slug']
...
...
...
And so on until delivering the 83 object paths with values to collect...
Then I could copy all the prints and paste into my Python file to retrieve the values or any other way to make the work easier.

If the elements in the events arrays are the same, this code works without errors.
def get_prints(recode: dict):
for key in recode.keys():
if type(recode[key]) == dict:
for sub_print in get_prints(recode[key]):
yield [key] + sub_print
else:
yield [key]
class Automater:
def __init__(self,name: str):
"""
Params:
name: name of json
"""
self.name = name
def get_print(self,*args):
"""
Params:
*args: keys json
"""
return '_'.join(args) + ' = ' + self.name + ''.join([f"['{arg}']" for arg in args])
For example, this code:
dicts = {
'tournament':{
'name':"any name",
'slug':'somthing else',
'sport':{
'name':'sport',
'anotherdict':{
'yes':True
}
}
}
}
list_names = get_prints(dicts)
for name in list_names:
print(auto.get_print(*name))
Gives this output:
tournament_name = event['tournament']['name']
tournament_slug = event['tournament']['slug']
tournament_sport_name = event['tournament']['sport']['name']
tournament_sport_anotherdict_yes = event['tournament']['sport']['anotherdict']['yes']

Create dynamic json object in python

I have a dictionary which is contain multiple keys and values and the values also contain the key, value pair. I am not getting how to create dynamic json using this dictionary in python. Here's the dictionary:
image_dict = {"IMAGE_1":{"img0":"IMAGE_2","img1":"IMAGE_3","img2":"IMAGE_4"},"IMAGE_2":{"img0":"IMAGE_1", "img1" : "IMAGE_3"},"IMAGE_3":{"img0":"IMAGE_1", "img1":"IMAGE_2"},"IMAGE_4":{"img0":"IMAGE_1"}}
My expected result like this :
{
"data": [
{
"image": {
"imageId": {
"id": "IMAGE_1"
},
"link": {
"target": {
"id": "IMAGE_2"
},
"target": {
"id": "IMAGE_3"
},
"target": {
"id": "IMAGE_4"
}
}
},
"updateData": "link"
},
{
"image": {
"imageId": {
"id": "IMAGE_2"
},
"link": {
"target": {
"id": "IMAGE_1"
},
"target": {
"id": "IMAGE_3"
}
}
},
"updateData": "link"
},
{
"image": {
"imageId": {
"id": "IMAGE_3"
},
"link": {
"target": {
"id": "IMAGE_1"
},
"target": {
"id": "IMAGE_2"
}
}
},
"updateData": "link"
} ,
{
"image": {
"imageId": {
"id": "IMAGE_4"
},
"link": {
"target": {
"id": "IMAGE_1"
}
}
},
"updateData": "link"
}
]
}
I tried to solve it but I didn't get expected result.
result = {"data":[]}
for k,v in sorted(image_dict.items()):
for a in sorted(v.values()):
result["data"].append({"image":{"imageId":{"id": k},
"link":{"target":{"id": a}}},"updateData": "link"})
print(json.dumps(result, indent=4))

In Python dictionaries you can't have 2 values with the same key. So you can't have multiple targets all called "target". So you can index them. Also I don't know what this question has to do with dynamic objects but here's the code I got working:
import re
dict_res = {}
ind = 0
for image in image_dict:
lin_ind = 0
sub_dict = {'image' + str(ind): {'imageId': {image}, 'link': {}}}
for sub in image_dict[image].values():
sub_dict['image' + str(ind)]['link'].update({'target' + str(lin_ind): {'id': sub}})
lin_ind += 1
dict_res.update(sub_dict)
ind += 1
dict_res = re.sub('target\d', 'target', re.sub('image\d', 'image', str(dict_res)))
print dict_res

AWS DynamoDB Stream python convert native format

I've a Lambda function triggered by a DynamoDB Stream. My problem is the strange format of the event received(with type for each key/value).
Does exists a workaround to convert a whole document in a native python format(without any types). I'm looking for a dynamic solution because in the future I want use this lambda with other DynamoDB table Streams which have different format(multiple dict/list levels)
Example:
{
"Records": [
{
"eventID": "1",
"eventVersion": "1.0",
"dynamodb": {
"Keys": {
"Id": {
"N": "101"
}
},
"NewImage": {
"Message": {
"S": "New item!"
},
"Id": {
"N": "101"
}
},
"StreamViewType": "NEW_AND_OLD_IMAGES",
"SequenceNumber": "111",
"SizeBytes": 26
},
"awsRegion": "us-west-2",
"eventName": "INSERT",
"eventSourceARN": "arn:aws:dynamodb:us-west-2:account-id:table/ExampleTableWithStream/stream/2015-06-27T00:48:05.899",
"eventSource": "aws:dynamodb"
},
{
"eventID": "2",
"eventVersion": "1.0",
"dynamodb": {
"OldImage": {
"Message": {
"S": "New item!"
},
"Id": {
"N": "101"
}
},
"SequenceNumber": "222",
"Keys": {
"Id": {
"N": "101"
}
},
"SizeBytes": 59,
"NewImage": {
"Message": {
"S": "This item has changed"
},
"Id": {
"N": "101"
}
},
"StreamViewType": "NEW_AND_OLD_IMAGES"
},
"awsRegion": "us-west-2",
"eventName": "MODIFY",
"eventSourceARN": "arn:aws:dynamodb:us-west-2:account-id:table/ExampleTableWithStream/stream/2015-06-27T00:48:05.899",
"eventSource": "aws:dynamodb"
},
{
"eventID": "3",
"eventVersion": "1.0",
"dynamodb": {
"Keys": {
"Id": {
"N": "101"
}
},
"SizeBytes": 38,
"SequenceNumber": "333",
"OldImage": {
"Message": {
"S": "This item has changed"
},
"Id": {
"N": "101"
}
},
"StreamViewType": "NEW_AND_OLD_IMAGES"
},
"awsRegion": "us-west-2",
"eventName": "REMOVE",
"eventSourceARN": "arn:aws:dynamodb:us-west-2:account-id:table/ExampleTableWithStream/stream/2015-06-27T00:48:05.899",
"eventSource": "aws:dynamodb"
}
]
}
Thanks

I've been using this, it served us well until now:
from boto3.dynamodb.types import TypeDeserializer
serializer = TypeDeserializer()
def deserialize(data):
if isinstance(data, list):
return [deserialize(v) for v in data]
if isinstance(data, dict):
try:
return serializer.deserialize(data)
except TypeError:
return {k: deserialize(v) for k, v in data.items()}
else:
return data

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Merge Json with same key value pairs - python

Related

Merge json files in Python

How to extract data from complex JSON object?

How to create an automatic mapping of possible JSON data options to be collected?

Create dynamic json object in python

AWS DynamoDB Stream python convert native format

Categories

Resources