I have a python dictionary:
d = {
"config": {
"application": {
"payment": {
"dev": {
"modes": {"credit,debit,emi": {}},
"company": {
"address": {
"city": {"London": {}},
"pincode": {"LD568162": {}},
},
"country": {"United Kingdom": {}},
"phone": {"7865432765": {}},
},
"levels": {"0,1,2": {}},
},
"prod": {"modes": {"credit,debit": {}}, "levels": {"0,1": {}}},
}
}
}
}
I want to change it to something like this(if the value is empty{} then make the key as value for its parent)
d = {
"config": {
"application": {
"payment": {
"dev": {
"modes": "credit,debit,emi",
"company": {
"address": {
"city": "London",
"pincode": "LD568162"
},
"country": "United Kingdom",
"phone": "7865432765"
},
"levels": "0,1,2"
},
"prod": {
"modes": "credit,debit",
"levels": "0,1"
}
}
}
}
}
i tried to write the code to traverse this deep dictionary, but couldn't modify it to get the above output. Please help.
def recur(json_object):
for x in list(json_object.items()):
print(x)
recur(json_object[x])
d={'config': {'application': {'payment': {'dev': {'modes': {'credit,debit,emi': {}}, 'company': {'address': {'city': {'London': {}}, 'pincode': {'LD568162': {}}}, 'country': {'United Kingdom': {}}, 'phone': {'7865432765': {}}}, 'levels': {'0,1,2': {}}}, 'prod': {'modes': {'credit,debit': {}}, 'levels': {'0,1': {}}}}}}}
Solution 1
We can use a non-recursive approach with queues to enqueue each inner/nested element of the document and put as value if the nested value is just {}:
# d = ...
queue = [d]
while queue:
data = queue.pop()
for key, value in data.items():
if isinstance(value, dict) and list(value.values()) == [{}]:
data[key] = list(value.keys())[0]
elif isinstance(value, dict):
queue.append(value)
print(d)
Output
{
"config": {
"application": {
"payment": {
"dev": {
"modes": "credit,debit,emi",
"company": {
"address": {
"city": "London",
"pincode": "LD568162"
},
"country": "United Kingdom",
"phone": "7865432765"
},
"levels": "0,1,2"
},
"prod": {
"modes": "credit,debit",
"levels": "0,1"
}
}
}
}
}
Solution 2
Here's a recursive approach
# d = ...
def recur(data):
for key, value in data.items():
if isinstance(value, dict) and list(value.values()) == [{}]:
data[key] = list(value.keys())[0]
elif isinstance(value, dict):
recur(value)
recur(d)
print(d)
Output
Same as Solution 1
Related
i have json response like this :
r = {
"ID": "0001",
"ST": "87549937737",
"DT": "22062022",
"DATA": {
"0": {
"PC": "100",
"NM": "ALEX",
"AMOUNT": "210,912",
"ACT": "123123456"
},
"1": {
"PC": "100",
"NM": "MARCO",
"AMOUNT": "500,200",
"ACT": "123555456"
},
"2": {
"PC": "100",
"NM": "TIFFANY",
"AMOUNT": "712,111",
"ACT": "123666981"
}
},
"RCMSG": {
"0": {
"SEVERITY": "00"
}
},
"RC": "0000"
}
and i want to parse every loop items in DATA, and my scripts are like this :
for items in r['DATA']:
jdata = {'PRODUCTCODE': items['PC'], 'NAME': items['NM'], 'AMOUNTBALANCE': items['AMOUNT'], 'ACCOUNT': items['ACT']}
return jsonify({'status':'success', 'code':'00', 'data': jdata})
but the response is only parsed '0' in 'DATA, how do i loop every items inside 'DATA' ?
{
"PRODUCTCODE": "100",
"NAME": "ALEX",
"AMOUNTBALANCE": "210,912",
"ACCOUNT": "123123456"
}
In your code, there is a return statement inside the for loop, so it will never traverse all elements in JSON data because it will end its execution at the first item. So, you need to traverse all elements accessing them from the original JSON response:
r = {
"ID": "0001",
"ST": "87549937737",
"DT": "22062022",
"DATA": {
"0": {
"PC": "100",
"NM": "ALEX",
"AMOUNT": "210,912",
"ACT": "123123456"
},
"1": {
"PC": "100",
"NM": "MARCO",
"AMOUNT": "500,200",
"ACT": "123555456"
},
"2": {
"PC": "100",
"NM": "TIFFANY",
"AMOUNT": "712,111",
"ACT": "123666981"
}
},
"RCMSG": {
"0": {
"SEVERITY": "00"
}
},
"RC": "0000"
}
d = []
for i in r['DATA']:
d.append(r['DATA'][i])
print(d)
Output:
[{'PC': '100', 'NM': 'ALEX', 'AMOUNT': '210,912', 'ACT': '123123456'}, {'PC': '100', 'NM': 'MARCO', 'AMOUNT': '500,200', 'ACT': '123555456'}, {'PC': '100', 'NM': 'TIFFANY', 'AMOUNT': '712,111', 'ACT': '123666981'}]
results = []
for k, v in r['DATA'].items():
item = {
"PRODUCTCODE": v["PC"],
"NAME": v["NM"],
"AMOUNTBALANCE": v["AMOUNT"],
"ACCOUNT": v["ACT"],
}
results.append(item)
I got a resultant json from an API in the following format
[{
"Uid": "40cc6103-1cf0-4735-b882-d14d32018e58",
"Id": "9e1a0057-4570-4a6e-8ff5-88b2facbaf4e",
"Details": {
"Name": "Kiran"
}
}, {
"Uid": "40cc6103-1cf0-4735-b882-d14d32018e58",
"Id": "9e1a0057-4570-4a6e-8ff5-88b2facbaf4e",
"Details": {
"Age": "24"
}
},
{
"Uid": "196f5865-e9fe-4847-86ae-97d0bf57b816",
"Id": "84909ecb-c92e-48a7-bcaa-d478bf3a9220",
"Details": {
"Name": "Shreyas"
}
}
]
since the Uid and Id are same for multiple entires, can I club them togeather with Details key being the comma seperate key,value pair? Something like mentioned below
[{
"Uid": "40cc6103-1cf0-4735-b882-d14d32018e58",
"Id": "9e1a0057-4570-4a6e-8ff5-88b2facbaf4e",
"Details": {
"Name": "Kiran",
"Age": "24"
}
},
{
"Uid": "196f5865-e9fe-4847-86ae-97d0bf57b816",
"Id": "84909ecb-c92e-48a7-bcaa-d478bf3a9220",
"Details": {
"Name": "Shreyas"
}
}]
Please Guide me on this for the approach to be followed. Thanks
What you need is the dictionary function update(). Here's an example:
A = [{
"Uid": "40cc6103-1cf0-4735-b882-d14d32018e58",
"Id": "9e1a0057-4570-4a6e-8ff5-88b2facbaf4e",
"Details": {
"Name": "Kiran"
}
}, {
"Uid": "40cc6103-1cf0-4735-b882-d14d32018e58",
"Id": "9e1a0057-4570-4a6e-8ff5-88b2facbaf4e",
"Details": {
"Age": "24"
}
},
{
"Uid": "196f5865-e9fe-4847-86ae-97d0bf57b816",
"Id": "84909ecb-c92e-48a7-bcaa-d478bf3a9220",
"Details": {
"Name": "Shreyas"
}
}
]
B = []
def find(uid, id_):
for i, d in enumerate(B):
if d['Uid'] == uid and d['Id'] == id_:
return i
return -1
for d in A:
if (i := find(d['Uid'], d['Id'])) < 0:
B.append(d)
else:
B[i]['Details'].update(d['Details'])
print(B)
Prettyfied output:
[
{
"Uid": "40cc6103-1cf0-4735-b882-d14d32018e58",
"Id": "9e1a0057-4570-4a6e-8ff5-88b2facbaf4e",
"Details": {
"Name": "Kiran",
"Age": "24"
}
},
{
"Uid": "196f5865-e9fe-4847-86ae-97d0bf57b816",
"Id": "84909ecb-c92e-48a7-bcaa-d478bf3a9220",
"Details": {
"Name": "Shreyas"
}
}
]
Note:
This could be very inefficient if your API response contains very large numbers of dictionaries. You might need a completely different approach
You should iterate over the list and merge with accumulator with (Uid, Id) as key:
from typing import Dict, List
l = [{
"Uid": "40cc6103-1cf0-4735-b882-d14d32018e58",
"Id": "9e1a0057-4570-4a6e-8ff5-88b2facbaf4e",
"Details": {
"Name": "Kiran"
}
}, {
"Uid": "40cc6103-1cf0-4735-b882-d14d32018e58",
"Id": "9e1a0057-4570-4a6e-8ff5-88b2facbaf4e",
"Details": {
"Age": "24"
}
},
{
"Uid": "196f5865-e9fe-4847-86ae-97d0bf57b816",
"Id": "84909ecb-c92e-48a7-bcaa-d478bf3a9220",
"Details": {
"Name": "Shreyas"
}
}
]
def mergeItem(it: Dict, acc: Dict) -> Dict:
uid = it["Uid"]
id = it["Id"]
if (uid, id) in acc:
acc[(uid, id)] = {"Uid": uid, "Id": id, "Details": {**acc[(uid, id)]["Details"], **it["Details"]}}
else:
acc[(uid, id)] = {"Uid": uid, "Id": id, "Details": it["Details"]}
return acc
def mergeList(a:List) -> Dict:
acc = {}
for v in a:
acc = mergeItem(v, acc)
return acc
print(list(mergeList(l).values()))
# [
# {
# 'Uid': '40cc6103-1cf0-4735-b882-d14d32018e58',
# 'Id': '9e1a0057-4570-4a6e-8ff5-88b2facbaf4e',
# 'Details': {'Name': 'Kiran', 'Age': '24'}},
# {
# 'Uid': '196f5865-e9fe-4847-86ae-97d0bf57b816',
# 'Id': '84909ecb-c92e-48a7-bcaa-d478bf3a9220',
# 'Details': {'Name': 'Shreyas'}
# }
# ]
I have the following json document:
{
"id": "5c26321bd8f4113d43b91141",
"idMemberCreator": "5b203bc7e47d817a8138bc37",
"data": {
"list": {
"name": "Sorji for QA",
"id": "5b0a2543b89acdbdb85f7b42"
},
"board": {
"shortLink": "iyCzZ5jx",
"name": "FlicksIO",
"id": "5b0a251f68a9e74b8ec3b3ac"
},
"card": {
"shortLink": "vOt2vO7v",
"idShort": 92,
"name": "New column in main for Storefront provider correlation.",
"id": "5b9c0023533f7c26424ea4ed",
"closed": true
},
"old": {
"closed": false
}
},
"type": "updateCard",
"date": "2018-12-28T14:24:27.455Z",
"limits": {},
"memberCreator": {
"id": "5b203bc7e47d817a8138bc37",
"avatarHash": "73bfa48c76c3c92615fe89ff79a6c5ae",
"avatarUrl": "https://trello-avatars.s3.amazonaws.com/73bfa48f79a6c5ae",
"fullName": "Marie Bond",
"idMemberReferrer": null,
"initials": "MB",
"username": "mb"
}
}
I would like to expand this out to be a single level with dot notation. That is, it should look like:
{
"id": "5c26321bd8f4113d43b91141",
"idMemberCreator": "5b203bc7e47d817a8138bc37",
"data.list.name": "Sorji for QA",
"data.list.id": "5b0a2543b89acdbdb85f7b42"
"data.board.shortLink": "iyCzZ5jx",
"data.board.name": "FlicksIO",
"data.board.id": "5b0a251f68a9e74b8ec3b3ac"
"data.card.shortLink": "vOt2vO7v",
"data.card.idShort": 92,
"data.card.name": "New column in main for Storefront provider correlation.",
"data.card.id": "5b9c0023533f7c26424ea4ed",
"data.card.closed": true
"data.old.closed": false
"type": "updateCard",
"date": "2018-12-28T14:24:27.455Z",
"limits": {},
"memberCreator.id": "5b203bc7e47d817a8138bc37",
"memberCreator.avatarHash": "73bfa48c76c3c92615fe89ff79a6c5ae",
"memberCreator.avatarUrl": "https://trello-avatars.s3.amazonaws.com/73bfa48f79a6c5ae",
"memberCreator.fullName": "Marie Bond",
"memberCreator.idMemberReferrer": null,
"memberCreator.initials": "MB",
"memberCreator.username": "mb"
}
Would it be possible to do this with a generator object? I've been working a lot on recursion today, and have been trying to move from while loops to using generator objects and yields, etc.
You can keep a parameter in the signature of the recursive function to store the paths:
data = {'id': '5c26321bd8f4113d43b91141', 'idMemberCreator': '5b203bc7e47d817a8138bc37', 'data': {'list': {'name': 'Sorji for QA', 'id': '5b0a2543b89acdbdb85f7b42'}, 'board': {'shortLink': 'iyCzZ5jx', 'name': 'FlicksIO', 'id': '5b0a251f68a9e74b8ec3b3ac'}, 'card': {'shortLink': 'vOt2vO7v', 'idShort': 92, 'name': 'New column in main for Storefront provider correlation.', 'id': '5b9c0023533f7c26424ea4ed', 'closed': True}, 'old': {'closed': False}}, 'type': 'updateCard', 'date': '2018-12-28T14:24:27.455Z', 'limits': {}, 'memberCreator': {'id': '5b203bc7e47d817a8138bc37', 'avatarHash': '73bfa48c76c3c92615fe89ff79a6c5ae', 'avatarUrl': 'https://trello-avatars.s3.amazonaws.com/73bfa48f79a6c5ae', 'fullName': 'Marie Bond', 'idMemberReferrer': None, 'initials': 'MB', 'username': 'mb'}}
def dot_paths(d, _paths = []):
for a, b in d.items():
if not b or not isinstance(b, dict):
yield ['.'.join(_paths+[a]), b]
else:
yield from dot_paths(b, _paths+[a])
import json
print(json.dumps(dict(dot_paths(data)), indent=4))
Output:
{
"id": "5c26321bd8f4113d43b91141",
"idMemberCreator": "5b203bc7e47d817a8138bc37",
"data.list.name": "Sorji for QA",
"data.list.id": "5b0a2543b89acdbdb85f7b42",
"data.board.shortLink": "iyCzZ5jx",
"data.board.name": "FlicksIO",
"data.board.id": "5b0a251f68a9e74b8ec3b3ac",
"data.card.shortLink": "vOt2vO7v",
"data.card.idShort": 92,
"data.card.name": "New column in main for Storefront provider correlation.",
"data.card.id": "5b9c0023533f7c26424ea4ed",
"data.card.closed": true,
"data.old.closed": false,
"type": "updateCard",
"date": "2018-12-28T14:24:27.455Z",
"limits": {},
"memberCreator.id": "5b203bc7e47d817a8138bc37",
"memberCreator.avatarHash": "73bfa48c76c3c92615fe89ff79a6c5ae",
"memberCreator.avatarUrl": "https://trello-avatars.s3.amazonaws.com/73bfa48f79a6c5ae",
"memberCreator.fullName": "Marie Bond",
"memberCreator.idMemberReferrer": null,
"memberCreator.initials": "MB",
"memberCreator.username": "mb"
}
I have the following function to remove keys with an empty value or that start with an underscore. It works on a non-nested dictionary:
def _remove_missing_and_underscored_keys(d):
if not d: return d
for key in d.keys():
if not d.get(key):
del d[key]
elif key.startswith('_'):
del d[key]
return d
d = {"Name": "David",
"_Age": 50,
"Numbers": [1,2,3,4,5],
"Height": ""
}
>>> _remove_missing_and_underscored_keys(d)
{'Name': 'David', 'Numbers': [1, 2, 3, 4, 5]}
However, I would like to create the above so that it can remove nested items as well. I believe I need to use a yield statement for this, but I'm not having luck with implementing it properly. Here is an example of what I want it to do:
d = {
"PlatformID": "B00EU7XL9Q",
"Platform": "Amazon",
"Type": "Collection",
"Products": {
"UK": {
"URL": "http://www.amazon.co.uk/dp/B00EU7XL9Q",
"Rating": None,
"_IsAudited": True,
"Offers": {
"HDBUY": {
"Currency": "GBP",
"FutureReleaseStartDate": None,
"Cost": "14.99",
"IsFutureRelease": False
},
"SDBUY": {
"Currency": "GBP",
"FutureReleaseStartDate": None,
"Cost": "14.99",
"IsFutureRelease": False
}
}
}
}
}
>>> _remove_missing_and_underscored_keys(d)
{
"PlatformID": "B00EU7XL9Q",
"Platform": "Amazon",
"Type": "Collection",
"Products": {
"UK": {
"URL": "http://www.amazon.co.uk/dp/B00EU7XL9Q",
"Offers": {
"HDBUY": {
"Currency": "GBP",
"Cost": "14.99",
},
"SDBUY": {
"Currency": "GBP",
"Cost": "14.99",
}
}
}
}
}
In other words, it will do the above operation on all nested levels of the dict.
You can use recursion with a dictionary comprehension:
d = {'PlatformID': 'B00EU7XL9Q', 'Platform': 'Amazon', 'Type': 'Collection', 'Products': {'UK': {'URL': 'http://www.amazon.co.uk/dp/B00EU7XL9Q', 'Rating': None, '_IsAudited': True, 'Offers': {'HDBUY': {'Currency': 'GBP', 'FutureReleaseStartDate': None, 'Cost': '14.99', 'IsFutureRelease': False}, 'SDBUY': {'Currency': 'GBP', 'FutureReleaseStartDate': None, 'Cost': '14.99', 'IsFutureRelease': False}}}}}
def _del(_d):
return {a:_del(b) if isinstance(b, dict) else b for a, b in _d.items() if b and not a.startswith('_')}
import json
print(json.dumps(_del(d), indent=4))
Output:
{
"PlatformID": "B00EU7XL9Q",
"Platform": "Amazon",
"Type": "Collection",
"Products": {
"UK": {
"URL": "http://www.amazon.co.uk/dp/B00EU7XL9Q",
"Offers": {
"HDBUY": {
"Currency": "GBP",
"Cost": "14.99"
},
"SDBUY": {
"Currency": "GBP",
"Cost": "14.99"
}
}
}
}
}
def _remove_missing_and_underscored_keys(d):
if not d: return d
for key in d.keys():
if not d.get(key):
del d[key]
elif key.startswith('_'):
del d[key]
elif type(d[key]) == dict:
d[key] = _remove_missing_and_underscored_keys(d[key])
return d
I think you meant to say, you needed to use recursion to solve this. I don't think using a generator quite solves your problem.
Another caveat is that you shouldn't iterate through a variable you are changing. That's why I create the copy _d modify and return that and iterate through the original structure.
import pprint
def _remove_missing_and_underscored_keys(d):
if not d: return d
_d = d.copy()
for key in _d.keys():
if not _d.get(key):
del d[key]
elif key.startswith('_'):
del d[key]
elif isinstance(_d[key], dict):
_remove_missing_and_underscored_keys(_d[key])
return _d
_d = {
"PlatformID": "B00EU7XL9Q",
"Platform": "Amazon",
"Type": "Collection",
"Products": {
"UK": {
"URL": "http://www.amazon.co.uk/dp/B00EU7XL9Q",
"Rating": None,
"_IsAudited": True,
"Offers": {
"HDBUY": {
"Currency": "GBP",
"FutureReleaseStartDate": None,
"Cost": "14.99",
"IsFutureRelease": False
},
"SDBUY": {
"Currency": "GBP",
"FutureReleaseStartDate": None,
"Cost": "14.99",
"IsFutureRelease": False
}
}
}
}
}
foo = _remove_missing_and_underscored_keys(_d)
pprint.pprint(foo)
Output:
{'Platform': 'Amazon',
'PlatformID': 'B00EU7XL9Q',
'Products': {'UK': {'Offers': {'HDBUY': {'Cost': '14.99', 'Currency': 'GBP'},
'SDBUY': {'Cost': '14.99', 'Currency': 'GBP'}},
'URL': 'http://www.amazon.co.uk/dp/B00EU7XL9Q'}},
'Type': 'Collection'}
Just go recursive.
Add another check to see if a value in primary dict is a dict, and call the same function on it.
# your code goes here
def _remove_missing_and_underscored_keys(d):
if not d: return d
for key in d.keys():
if not d.get(key):
del d[key]
elif key.startswith('_'):
del d[key]
elif type(d[key]) is dict:
#print("key '{}' stores a dict '{}', need to cleanup recursively".format(key, d[key]))
d[key] = _remove_missing_and_underscored_keys(d[key])
# Keep below check if you want to treat empty dict as `empty` as well
if d[key] == None or d[key] == {}:
del d[key]
return d
d = {
"PlatformID": "B00EU7XL9Q",
"Platform": "Amazon",
"Type": "Collection",
"Products": {
"UK": {
"URL": "http://www.amazon.co.uk/dp/B00EU7XL9Q",
"Rating": None,
"_IsAudited": True,
"Offers": {
"HDBUY": {
"Currency": "GBP",
"FutureReleaseStartDate": None,
"Cost": "14.99",
"IsFutureRelease": False
},
"SDBUY": {
"Currency": "GBP",
"FutureReleaseStartDate": None,
"Cost": "14.99",
"IsFutureRelease": False
},
"x" : {
"y":None
}
}
}
}
}
e = _remove_missing_and_underscored_keys(d)
print(e)
See it in action: https://ideone.com/5xDDZl
Above code also handles empty dicts stored at any key or any dict that became empty after recursively cleaning it. You can remove that check if needed.
I've tried ast.literal_eval and json.loads but both of these, doesn't maintain the sequence of json attributes when a string is provided. Please see the following example -
String before providing it to json.loads -
{
"type": "array",
"properties": {
"name": {
"type": "string"
},
"i": {
"type": "integer"
},
"strList": {
"type": "array",
"items": {
"type": "string"
}
},
"strMap": {
"type": "object"
},
"p2": {
"type": "array",
"items": {
"type": "object",
"properties": {
"name": {
"type": "string"
},
"i": {
"type": "integer"
},
"p3": {
"type": "object",
"properties": {
"name": {
"type": "string"
},
"i": {
"type": "integer"
},
"p4": {
"type": "object",
"properties": {
"name": {
"type": "string"
},
"i": {
"type": "integer"
}
}
}
}
}
}
}
},
"p3": {
"type": "array",
"items": {
"type": "object",
"properties": {
"name": {
"type": "string"
},
"i": {
"type": "integer"
},
"p4": {
"type": "object",
"properties": {
"name": {
"type": "string"
},
"i": {
"type": "integer"
}
}
}
}
}
},
"b": {
"type": "boolean",
"required": true
}
},
"classnames": {
"rootNode": {
"classname": "com.agent.Person"
},
"p2": {
"classname": "com.agent.Person2",
"p3": {
"classname": "com.agent.Person3",
"p4": {
"classname": "com.agent.Person4"
}
}
},
"p3": {
"classname": "com.agent.Person3",
"p4": {
"classname": "com.agent.Person4"
}
}
}
}
String after providing it to json.loads -
{
'classnames': {
'p2': {
'classname': 'com.agent.Person2',
'p3': {
'classname': 'com.agent.Person3',
'p4': {
'classname': 'com.agent.Person4'
}
}
},
'p3': {
'classname': 'com.agent.Person3',
'p4': {
'classname': 'com.agent.Person4'
}
},
'rootNode': {
'classname': 'com.agent.Person'
}
},
'properties': {
'b': {
'required': True,
'type': 'boolean'
},
'i': {
'type': 'integer'
},
'name': {
'type': 'string'
},
'p2': {
'items': {
'properties': {
'i': {
'type': 'integer'
},
'name': {
'type': 'string'
},
'p3': {
'properties': {
'i': {
'type': 'integer'
},
'name': {
'type': 'string'
},
'p4': {
'properties': {
'i': {
'type': 'integer'
},
'name': {
'type': 'string'
}
},
'type': 'object'
}
},
'type': 'object'
}
},
'type': 'object'
},
'type': 'array'
},
'p3': {
'items': {
'properties': {
'i': {
'type': 'integer'
},
'name': {
'type': 'string'
},
'p4': {
'properties': {
'i': {
'type': 'integer'
},
'name': {
'type': 'string'
}
},
'type': 'object'
}
},
'type': 'object'
},
'type': 'array'
},
'strList': {
'items': {
'type': 'string'
},
'type': 'array'
},
'strMap': {
'type': 'object'
}
},
'type': 'array'
}
Can anyone please suggest an alternative or something in python which keeps the sequence of attributes as they are and convert the string into the python dictionary?
As tobias_k has said, python dictionaries are unordered, so you'll lose any order information as soon as you load your data into one.
You can, however, load your JSON string into a OrderedDict:
from collections import OrderedDict
import json
json.loads(your_json_string, object_pairs_hook=OrderedDict)
This method is mentioned in the json module documentation