get keys from nested dict JSON without hardcode the middle keys - python

Want to get the MetaEntry data from the dictionary, but each time the keys differ with some random names.
for example: ['Client']
dictionary = {
"Tags":[],
"ObjectId":
"ab9c6448-85fe-eb11-b563-281878c3a7fe",
"Client": {
"MetaData": {
"MetaEntry": [
{
"Key": "status",
"Value": "Active"
},
{
"Key": "first_day_of_week",
"Value": "Monday"
},
{
"Key": "default_induction_expiry",
"Value": "0"
}
]
},
"RelatedLinks": [],
"Tags": [],
"ObjectId": "6cf54386-d81a-eb11-9fb4-281878b13795",
"Type": "Artifice.Web.Data.Entities.Client",
"Name": "Amco Logictics "
}
}
print(dictionary['Client']['MetaData']['MetaEntry'])
here ['Client'] key will change randomly, so my above print fun will not work, is there a workaround way to get the ['MetaData']['MetaEntry'] without hardcoding the ['Client'] key?

If you have deeply nested dictionary, you can recursively traverse it to find all MetaEntry keys:
dictionary = {
"Tags": [],
"ObjectId": "ab9c6448-85fe-eb11-b563-281878c3a7fe",
"Client": {
"MetaData": {
"MetaEntry": [
{"Key": "status", "Value": "Active"},
{"Key": "first_day_of_week", "Value": "Monday"},
{"Key": "default_induction_expiry", "Value": "0"},
]
},
"RelatedLinks": [],
"Tags": [],
"ObjectId": "6cf54386-d81a-eb11-9fb4-281878b13795",
"Type": "Artifice.Web.Data.Entities.Client",
"Name": "Amco Logictics ",
},
}
def find(d):
if isinstance(d, dict):
if "MetaEntry" in d:
yield d["MetaEntry"]
else:
for k, v in d.items():
yield from find(v)
elif isinstance(d, list):
for v in d:
yield from find(v)
for meta_entry in find(dictionary):
print(meta_entry)
Prints:
[
{"Key": "status", "Value": "Active"},
{"Key": "first_day_of_week", "Value": "Monday"},
{"Key": "default_induction_expiry", "Value": "0"},
]
EDIT: To print current path:
def find(d, cur_path=None):
if cur_path is None:
cur_path = []
if isinstance(d, dict):
if "MetaEntry" in d:
yield d["MetaEntry"], cur_path + ["MetaEntry"]
else:
for k, v in d.items():
yield from find(v, cur_path + [k])
elif isinstance(d, list):
for i, v in enumerate(d):
yield from find(v, cur_path + [i])
for meta_entry, cur_path in find(dictionary):
print(cur_path)
print(meta_entry)
Prints:
['Client', 'MetaData', 'MetaEntry']
[{'Key': 'status', 'Value': 'Active'}, {'Key': 'first_day_of_week', 'Value': 'Monday'}, {'Key': 'default_induction_expiry', 'Value': '0'}]

the below code will handle the situation where 'Client' will be replaced by other string
data = {"Tags": [], "ObjectId": "ab9c6448-85fe-eb11-b563-281878c3a7fe", "kkk": {"MetaData": {
"MetaEntry": [{"Key": "status", "Value": "Active"}, {"Key": "first_day_of_week", "Value": "Monday"},
{"Key": "default_induction_expiry", "Value": "0"}]}, "RelatedLinks": [], "Tags": [],
"ObjectId": "6cf54386-d81a-eb11-9fb4-281878b13795",
"Type": "Artifice.Web.Data.Entities.Client",
"Name": "Amco Logictics "}}
for k,v in data.items():
if isinstance(v,dict) and 'MetaData' in v:
print(f'{k} points to metadata')
output
kkk points to metadata

Related

How to change value in a dict?

I have so many dict data with different formats and I want to change the key place to None.
I create this function to read all value, but I can change it
def test(T, data):
if T:
T.pop(0)
for cle, valeur in data.items():
if isinstance(valeur, dict):
T.append(valeur)
elif isinstance(valeur, list):
for idx, obj in enumerate(valeur):
if isinstance(obj, dict):
T.append(obj)
else:
print(cle, valeur)
if T:
test(T, T[0])
test(T=[], data=datas)
example data :
datas={
"first_name": "test",
"last_name": "test",
"cars": [
{"mark": "test", "type": "12"},
{"mark": "test2", "type": "7"},
],
"date_created": "2022-05-07",
"invoice_info": {
"price": 1233,
"currency": "EUR",
"total": {"product1": 12, "product2": 22},
"date": [
{"date_1": "2022-05-07", "info": {"comment": "test", "place": "France"}},
{"date_2": "2022-06-12", "info": {"comment": None, "place": "France"}},
]
}
}
You can do
key = 'yourkey'
dic[key] = new_value
Basically you just tell python that the value for the associated key is now new_value.

How to update values in a nested dictionary?

I have 2 dictionaries:
data = {
"filter":
{
"and":
[
{
"or":
[
{
"and":
[
{"category": "profile", "key": "languages", "operator": "IN", "value": "EN"},
{"category": "skill", "key": "26366", "value": 100, "operator": "EQ"},
],
},
],
},
{"or": [{"category": "skill", "key": "45165", "operator": "NE"}]},
{"or": [{"category": "skill", "key": "48834", "value": 80, "operator": "GT"}]},
{"or": [{"category": "profile", "key": "gender", "operator": "EQ", "value": "FEMALE"}]},
],
},
}
new_val = {'26366': '11616', '45165': '11613', '48834': '11618'}
I want to update values in "data" dictionary with the values from "new_val" dictionary.
So that 26366(in "data" dict) becomes 11616(from "new_val" dict), 45165 becomes 11613, and 48834 becomes 11618.
"data" dictionary nesting can be different (both up and down)
The key in the "data" dictionary can be different, not only "key", it can be "skill_id", "filter_id" and so on.
And get this result:
{
"filter":
{
"and":
[
{
"or":
[
{
"and":
[
{"category": "profile", "key": "languages", "operator": "IN", "value": "EN"},
{"category": "skill", "key": "11616", "value": 100, "operator": "EQ"},
],
},
],
},
{"or": [{"category": "skill", "key": "11613", "operator": "NE"}]},
{"or": [{"category": "skill", "key": "11618", "value": 80, "operator": "GT"}]},
{"or": [{"category": "profile", "key": "gender", "operator": "EQ", "value": "FEMALE"}]},
],
},
}
To return an updated dict without modifying the old one:
def updated_in_depth(d, replace):
if isinstance(d, dict):
return {k: updated_in_depth(v, replace)
for k,v in d.items()}
elif isinstance(d, list):
return [updated_in_depth(x, replace) for x in d]
else:
return replace.get(d, d)
Testing with your data and new_val:
>>> updated_in_depth(data, new_val)
{'filter': {'and': [{'or': [{'and': [
{'category': 'profile', 'key': 'languages', 'operator': 'IN', 'value': 'EN'},
{'category': 'skill', 'key': '11616', 'value': 100, 'operator': 'EQ'}]}]},
{'or': [{'category': 'skill', 'key': '11613', 'operator': 'NE'}]},
{'or': [{'category': 'skill', 'key': '11618', 'value': 80, 'operator': 'GT'}]},
{'or': [{'category': 'profile', 'key': 'gender', 'operator': 'EQ', 'value': 'FEMALE'}]}]}}
Use something like this:
data['filter']['and']['or']['and'][1]['key']='11616'
To search for the keys recursively you can do:
from copy import deepcopy
def replace(d, new_vals):
if isinstance(d, dict):
# replace key (if there's match):
if "key" in d:
d["key"] = new_vals.get(d["key"], d["key"])
for v in d.values():
replace(v, new_vals)
elif isinstance(d, list):
for v in d:
replace(v, new_vals)
new_data = deepcopy(data)
replace(new_data, new_val)
print(new_data)
Prints:
{
"filter": {
"and": [
{
"or": [
{
"and": [
{
"category": "profile",
"key": "languages",
"operator": "IN",
"value": "EN",
},
{
"category": "skill",
"key": "11616",
"value": 100,
"operator": "EQ",
},
]
}
]
},
{"or": [{"category": "skill", "key": "11613", "operator": "NE"}]},
{
"or": [
{
"category": "skill",
"key": "11618",
"value": 80,
"operator": "GT",
}
]
},
{
"or": [
{
"category": "profile",
"key": "gender",
"operator": "EQ",
"value": "FEMALE",
}
]
},
]
}
}
If you don't need copy of data you can omit the deepcopy:
replace(data, new_val)
print(data)
You can build a recursive function like this
def walk_dict(d):
if isinstance(d, list):
for item in d:
walk_dict(item)
elif isinstance(d, dict):
if 'key' in d and d['key'] in new_val:
d['key'] = new_val[d['key']]
for k, v in d.items():
walk_dict(v)
walk_dict(data)
print(data)
As many have advised, a recursive function will do the trick:
def a(d):
if isinstance(d, dict): # if dictionary, apply a to all values
d = {k: a(d[k]) for k in d.keys()}
return d
elif isinstance(d, list): # if list, apply to all elements
return [a(x) for x in d]
else: # apply to d directly (it is a number, a string or a bool)
return new_val[d] if d in new_val else d
When a is called, it check what is the type of the variable d:
if d is a list, it apply a to each element of the list and return the updated list
if d is a dict, it applies a to all values and return the updated dict
otherwise, it returns the mapped new value if the old one has been found in the new_val keys
data = {
"filter":
{
"and":
[
{
"or":
[
{
"and":
[
{"category": "profile", "key": "languages", "operator": "IN", "value": "EN"},
{"category": "skill", "key": "11616", "value": 100, "operator": "EQ"},
],
},
],
},
{"or": [{"category": "skill", "key": "11613", "operator": "NE"}]},
{"or": [{"category": "skill", "key": "11618", "value": 80, "operator": "GT"}]},
{"or": [{"category": "profile", "key": "gender", "operator": "EQ", "value": "FEMALE"}]},
],
},
}
class Replace:
def __init__(self,data):
self.data=data
def start(self,d):
data = self.data
def replace(data):
if type(data) == list:
for v in data:
replace(v)
if type(data) == dict:
for k,v in data.items():
if type(v) == dict:
replace(v)
if type(v) == str:
if v in d:
data[k] = d[v]
replace(data)
return data
new_data = Replace(data).start({'26366': '11616',
'45165': '11613',
'48834': '11618'})
print(new_data)

How to group a json by a nested key using Python?

Lets say we have a json object in Python:
myJson = [
{
"id": "123",
"name": "alex",
"meta": {
"city": "boston"
}
},
{
"id": "234",
"name": "mike",
"meta": {
"city": "seattle"
}
},
{
"id": "345",
"name": "jess",
"meta": {
"city": "boston"
}
}
]
What is the most efficient way to group this data by city, so that we end up with a json in which we group the data by city such that we end up with a json as:
myNewJson = [
{
"city": "boston",
"people": [ ... ... ]
},
{
"city": "seattle",
"people": [ ... ]
}
]
... in which the content of the people are included in "people" key.
Thanks!
Try:
myJson = [
{"id": "123", "name": "alex", "meta": {"city": "boston"}},
{"id": "234", "name": "mike", "meta": {"city": "seattle"}},
{"id": "345", "name": "jess", "meta": {"city": "boston"}},
]
out = {}
for d in myJson:
out.setdefault(d["meta"]["city"], []).append(d["name"])
out = [{"city": k, "people": v} for k, v in out.items()]
print(out)
Prints:
[
{"city": "boston", "people": ["alex", "jess"]},
{"city": "seattle", "people": ["mike"]},
]
Seems like a dictionary could work. Use city names as the keys, and a list as the value. Then at the end, go through the dictionary and convert it to a list.
myJson = [
{
"id": "123",
"name": "alex",
"meta": {
"city": "boston"
}
},
{
"id": "234",
"name": "mike",
"meta": {
"city": "seattle"
}
},
{
"id": "345",
"name": "jess",
"meta": {
"city": "boston"
}
}
]
d = dict() # dictionary of {city: list of people}
for e in myJson:
city = e['meta']['city']
if city not in d:
d[city] = list()
d[city].append(e['name'])
# convert dictionary to list of json
result = list()
for key, val in d.items():
result.append({'city': key, 'people': val})
print(result)

How to delete keys in a nested dictionary of lists based on the values

I am working on a file representing a tree-like structure very similar to flare.json which is known for D3.js community. What's the best way to delete all the leaves of the tree in python? In other words, I want to remove all the keys that don't have a 'children' key in their value.
example :
{
"name": "flare",
"children": [
{
"name": "analytics",
"children": [
{
"name": "cluster",
"children": [
{"name": "AgglomerativeCluster", "size": 3938},
{"name": "CommunityStructure", "size": 3812},
{"name": "HierarchicalCluster", "size": 6714},
{"name": "MergeEdge", "size": 743}
]
},
{
"name": "graph",
"children": [
{"name": "BetweennessCentrality", "size": 3534},
{"name": "LinkDistance", "size": 5731},
{"name": "MaxFlowMinCut", "size": 7840},
{"name": "ShortestPaths", "size": 5914},
{"name": "SpanningTree", "size": 3416}
]
},
{
"name": "optimization",
"children": [
{"name": "AspectRatioBanker", "size": 7074}
] ...
which should become:
{
"name": "flare",
"children": [
{
"name": "analytics",
"children": [
{
"name": "cluster",
},
{
"name": "graph",
},
{
"name": "optimization",
] ...
In other words, I'm just cutting the leaves of the tree. In a children list is empty, it should be removed.
I tried this only to remove the keys and it did not work:
def deleteLeaves(pTree):
if pTree.has_key('children'):
for child in pTree['children']:
deleteLeaves(child)
else:
del pTree
This seems to approximate what you want:
def pruneLeaves(obj):
if isinstance(obj, dict):
isLeaf = True
for key in obj.keys():
if key == 'children': isLeaf = False
if pruneLeaves(obj[key]): del obj[key]
return isLeaf
elif isinstance(obj, list):
leaves = []
for (index, element) in enumerate(obj):
if pruneLeaves(element): leaves.append(index)
leaves.reverse()
for index in leaves: obj.pop(index)
return not bool(obj)
else: # String values look like attributes in your dict, so never prune them
return False
Tested with a truncated sample of your data:
data = {
"name": "flare",
"children": [
{
"name": "analytics",
"children": [
{
"name": "cluster",
"children": [
{"name": "AgglomerativeCluster", "size": 3938},
{"name": "CommunityStructure", "size": 3812},
{"name": "HierarchicalCluster", "size": 6714},
{"name": "MergeEdge", "size": 743}
]
},
{
"name": "graph",
"children": [
{"name": "BetweennessCentrality", "size": 3534},
{"name": "LinkDistance", "size": 5731},
{"name": "MaxFlowMinCut", "size": 7840},
{"name": "ShortestPaths", "size": 5914},
{"name": "SpanningTree", "size": 3416}
]
}
]
}
]
}
pruneLeaves(data)
print data
And got these results:
{'name': 'flare', 'children': [{'name': 'analytics', 'children': [{'name': 'cluster'}, {'name': 'graph'}]}]}
I just edited the answer of #rchang to fix deletion of lists other than children.
def pruneLeaves(self,obj):
if isinstance(obj, dict):
isLeaf = True
for key in obj.keys():
if key=='children':
isLeaf = False
if self.pruneLeaves(obj[key]): del obj[key]
return isLeaf
elif isinstance(obj, list) :
leaves = []
for (index, element) in enumerate(obj):
if self.pruneLeaves(element): leaves.append(index)
leaves.reverse()
for index in leaves: obj.pop(index)
return not bool(obj)
else: # String values look like attributes in your dict, so never prune them
return False

Manipulating data structures in Python

I have data in JSON format:
data = {"outfit":{"shirt":"red,"pants":{"jeans":"blue","trousers":"khaki"}}}
I'm attempting to plot this data into a decision tree using InfoVis, because it looks pretty and interactive. The problem is that their graph takes JSON data in this format:
data = {id:"nodeOutfit",
name:"outfit",
data:{},
children:[{
id:"nodeShirt",
name:"shirt",
data:{},
children:[{
id:"nodeRed",
name:"red",
data:{},
children:[]
}],
}, {
id:"nodePants",
name:"pants",
data:{},
children:[{
id:"nodeJeans",
name:"jeans",
data:{},
children:[{
id:"nodeBlue",
name:"blue",
data:{},
children[]
},{
id:"nodeTrousers",
name:"trousers",
data:{},
children:[{
id:"nodeKhaki",
name:"khaki",
data:{},
children:[]
}
}
Note the addition of 'id', 'data' and 'children' to every key and value and calling every key and value 'name'. I feel like I have to write a recursive function to add these extra values. Is there an easy way to do this?
Here's what I want to do but I'm not sure if it's the right way. Loop through all the keys and values and replace them with the appropriate:
for name, list in data.iteritems():
for dict in list:
for key, value in dict.items():
#Need something here which changes the value for each key and values
#Not sure about the syntax to change "outfit" to name:"outfit" as well as
#adding id:"nodeOutfit", data:{}, and 'children' before the value
Let me know if I'm way off.
Here is their example http://philogb.github.com/jit/static/v20/Jit/Examples/Spacetree/example1.html
And here's the data http://philogb.github.com/jit/static/v20/Jit/Examples/Spacetree/example1.code.html
A simple recursive solution:
data = {"outfit":{"shirt":"red","pants":{"jeans":"blue","trousers":"khaki"}}}
import json
from collections import OrderedDict
def node(name, children):
n = OrderedDict()
n['id'] = 'node' + name.capitalize()
n['name'] = name
n['data'] = {}
n['children'] = children
return n
def convert(d):
if type(d) == dict:
return [node(k, convert(v)) for k, v in d.items()]
else:
return [node(d, [])]
print(json.dumps(convert(data), indent=True))
note that convert returns a list, not a dict, as data could also have more then one key then just 'outfit'.
output:
[
{
"id": "nodeOutfit",
"name": "outfit",
"data": {},
"children": [
{
"id": "nodeShirt",
"name": "shirt",
"data": {},
"children": [
{
"id": "nodeRed",
"name": "red",
"data": {},
"children": []
}
]
},
{
"id": "nodePants",
"name": "pants",
"data": {},
"children": [
{
"id": "nodeJeans",
"name": "jeans",
"data": {},
"children": [
{
"id": "nodeBlue",
"name": "blue",
"data": {},
"children": []
}
]
},
{
"id": "nodeTrousers",
"name": "trousers",
"data": {},
"children": [
{
"id": "nodeKhaki",
"name": "khaki",
"data": {},
"children": []
}
]
}
]
}
]
}
]

Categories