Find key in nested dictionary mixed with lists - python

I get JSON Data back from an API. The dataset is large and nested. I can access the Datenreihen key like this:
jsondata.get("Ergebnis")[0].get("Kontakte").get("Datenreihen")
As you can see, this is a mix of dictionaries and lists.
I tried the following, but with lists it does not work :-(.
def recursive_lookup(k, d):
if k in d:
return d[k]
for v in d.values():
if isinstance(v, dict):
return recursive_lookup(k, v)
return None
# Works
recursive_lookup("Ergebnis", jsondata)
# Returns None
recursive_lookup("Datenreihen", jsondata)
Is there an easy way to access and key in my dictionary, no matter how deeply my object is nested?
This is exampledata:
{
"Success":true,
"Ergebnis":[
{
"ErgA1a: KPI Zeitreihe":{
"Message":"",
"MitZielgruppe":true,
"Beschriftung":[
"2019 KW 27",
"2019 KW 28",
"2019 KW 29"
],
"Datenreihen":{
"Gesamt":{
"Name":"Sympathie [#4]\n(Sehr sympathisch, Sympathisch)",
"Werte":[
39.922142815641145,
37.751410794385762,
38.35504885993484
]
}
}
}
}
],
"rest":[
{
"test":"bla"
}
]
}
data.get("ErgebnisseAnalyse")[0].get("ErgA1a: KPI Zeitreihe")
recursive_lookup("ErgA1a: KPI Zeitreihe", data)

Recursive function to find value in nested dictionary based upon key field
Code
def find_item(obj, field):
"""
Takes a dict with nested lists and dicts,
and searches all dicts for a key of the field
provided.
"""
if isinstance(obj, dict):
for k, v in obj.items():
if k == field:
yield v
elif isinstance(v, dict) or isinstance(v, list):
yield from find_item(v, field)
elif isinstance(obj, list):
for v in obj:
yield from find_item(v, field)
Usage
value = next(find_item(dictionary_object, field), None)
Test
# Nested dictionary
dic = {
"a": [{"b": {"c": 1}},
{"d": 2}],
"e": 3}
# Values form various fields
print(next(find_item(dic, "a"), None)) # Output: [{'b': {'c': 1}}, {'d': 2}]
print(next(find_item(dic, "b"), None)) # Output: {'c': 1}
print(next(find_item(dic, "c"), None)) # Output: 1
print(next(find_item(dic, "d"), None)) # Output: 2
print(next(find_item(dic, "e"), None)) # Output: 3
print(next(find_item(dic, "h"), None)) # Output: None

Related

How can I get a value of a specific json key only on a certain level?

I have a huge nested json file and I want to get the values of "text" but only on a certain level as there are many "text" keys deeper in the json file. The level I mean would be the "text:"Hi" after "event":"user".
The file looks like this:
`
{
"_id":{
"$oid":"123"
},
"events":[
{
"event":"action",
"metadata":{
"model_id":"12"
},
"action_text":null,
"hide_rule_turn":false
},
{
"event":"user",
"text":"Hi",
"parse_data":{
"intent":{
"name":"greet",
"confidence":{
"$numberDouble":"0.9601748585700989"
}
},
"entities":[
],
"text":"Hi",
"metadata":{
},
"text_tokens":[
[
{
"$numberInt":"0"
},
{
"$numberInt":"2"
}
]
],
"selector":{
"ideas":{
"response":{
"responses":[
{
"text":"yeah"
},
{
"text":"No"
},
{
"text":"Goo"
}
]
},
`
First I uses this function to get the text data but of course if gave me all of them:
def json_extract(obj, key):
"""Recursively fetch values from nested JSON."""
arr = []
def extract(obj, arr, key):
"""Recursively search for values of key in JSON tree."""
if isinstance(obj, dict):
for k, v in obj.items():
if isinstance(v, (dict, list)):
extract(v, arr, key)
elif k == key:
arr.append(v)
elif isinstance(obj, list):
for item in obj:
extract(item, arr, key)
return arr
values = extract(obj, arr, key)
return values
I also tried to access only the second level through this text but it gave me a KeyNotFound Error:
for i in data["events"][0]:
print(i["text"])
Maybe because that key is not in every nested list? ... I really don't know what else I could do
Since events is a list, you can write a list comprehension (if there are multiple items you need), or you can use the next function to get an element that you need from the iterator:
event = next(e for e in data.get('events', list()) if e.get('event')=='user')
print(event.get('text', ''))
Using get method gives you the safety that it won't throw an exception if the key doesn't exist in the dictionary
Edit:
If you need this for all events:
all_events = [e for e in data.get('events', list()) if e.get('event')=='user']
for event in all_events:
print(event.get('text', ''))
Convert your JSON to a Python dictionary (e.g., json.load or json.loads depending on how you're accessing the JSON). Then just pass a reference to the dictionary to this:
def json_extract(jdata):
assert isinstance(jdata, dict)
arr = []
def _extract(d, arr):
if 'event' in d and (t := d.get('text')):
arr.append(t)
for k, v in d.items():
if k not in {'event', 'text'}:
if isinstance(v, list):
for e in v:
if isinstance(e, dict):
_extract(e, arr)
elif isinstance(v, dict):
_extract(v, arr)
return arr
return _extract(jdata, arr)
This will return a list of all values associated with the key 'text' providing that key is found in a dictionary that also has an 'event' key

Get key values for certain fields in JSON response

My json data would look like this:
{
"a":1,
"b":[
{
"c":2,
"d":{
"e":3
},
"f":{
"g":4
},
"h":[
{
"i":5
},
{
"j":6
}
]
}
]
}
Is there a way I can get values for certain fields in the response along with their keys. So from this response, the fields for which I expect values are a, c,e,g,i,j along with the respective keys.
Eg: [a:1,c:2,e:3,g:4,i:5,j:6]. Could this be done?
My response contained something like:
{
"a":1,
"b":[
{
"c":2,
"d":{
"e":3
},
"f":{
"g":4,
"k":[
"l","m"]
},
"h":[
{
"i":5
},
{
"j":6
}
]
}
]
}
Which resulted in the error. I have made the following fix for it.
def get_key_value(dct, res_dct, lst):
for k,v in dct.items():
if isinstance(v, list):
for d in v:
if isinstance(d,dict):
get_key_value(d, res_dct, lst)
else:
lst.append(f'{k}:{v}')
elif isinstance(v, dict):
get_key_value(v, res_dct, lst)
else:
res_dct[k] = v
# If you want to store in 'list' you can store as string
lst.append(f'{k}:{v}')
res_dct = {}
lst = []
get_key_value(staging_dict, res_dct, lst)
You can use a recursive function and store key & value if only value not list or dict.
def get_key_value(dct, res_dct, lst):
for k,v in dct.items():
if isinstance(v, list):
for d in v:
get_key_value(d, res_dct, lst)
elif isinstance(v, dict):
get_key_value(v, res_dct, lst)
else:
res_dct[k] = v
# If you want to store in 'list' you can store as string
lst.append(f'{k}:{v}')
res_dct = {}
lst = []
get_key_value(dct, res_dct, lst)
print(res_dct)
print(lst)
Output:
# res_dct
{'a': 1, 'c': 2, 'e': 3, 'g': 4, 'i': 5, 'j': 6}
# lst
['a:1', 'c:2', 'e:3', 'g:4', 'i:5', 'j:6']

Python: recursively append dictionary to another

I've searched and found this Append a dictionary to a dictionary but that clobbers keys from b if they exist in a..
I'd like to essentially recursively append 1 dictionary to another, where:
keys are unique (obviously, it's a dictionary), but each dictionary is fully represented in the result such that a.keys() and b.keys() are both subsets of c.keys()
if the same key is in both dictionaries, the resulting key contains a list of values from both, such that a[key] and b[key] are in c[key]
the values could be another dictionary, (but nothing deeper than 1 level), in which case the same logic should apply (append values) such that a[key1][key2] and b[key1][key2] are in c[key][key2]
The basic example is where 2 dictionary have keys that don't overlap, and I can accomplish that in multiple ways.. c = {**a, **b} for example, so I haven't covered that below
A trickier case:
a = {
"key1": "value_a1"
"key2": "value_a2"
}
b = {
"key1": "value_b1"
"key3": "value_b3"
}
c = combine(a, b)
c >> {
"key1": ["value_a1", "value_b1"],
"key2": "value_a2",
"key3": "value_b3"
}
An even trickier case
a = {
"key1": {
"sub_key_1": ["sub_value_a1", "sub_value_a2"],
"sub_key_2": "sub_value_a3"
},
"key2": "value_a2"
}
b = {
"key1": {
"sub_key_1": ["sub_value_a1", "sub_value_b1"],
"sub_key_2": "sub_value_b3"
},
"key3": "value_b3" # I'm okay with converting this to a list even if it's not one
}
c = combine(a, b)
c >> {
"key1": {
"sub_key_1": ["sub_value_a1", "sub_value_a2", "sub_value_b1"], #sub_value_a1 is not duplicated
"sub_key_2": ["sub_value_a3", "sub_value_b3"]
},
"key2": "value_a2",
"key3": "value_b3" # ["value_b3"] this would be okay, following from the code comment above
}
Caveats:
Python 3.6
The examples show lists being created as_needed, but I'm okay with every non-dict value being a list, as mentioned in the code comments
The values within the lists will always be strings
I tried to explain as best I could but can elaborate more if needed. Been working on this for a few days and keep getting stuck on the sub key part
There is no simple built-in way of doing this, but you can recreate the logic in python.
def combine_lists(a: list, b: list) -> list:
return a + [i for i in b if i not in a]
def combine_strs(a: str, b: str) -> str:
if a == b:
return a
return [a, b]
class EMPTY:
"A sentinel representing an empty value."
def combine_dicts(a: dict, b: dict) -> dict:
output = {}
keys = list(a) + [k for k in b if k not in a]
for key in keys:
aval = a.get(key, EMPTY)
bval = b.get(key, EMPTY)
if isinstance(aval, list) and isinstance(bval, list):
output[key] = combine_lists(aval, bval)
elif isinstance(aval, str) and isinstance(bval, str):
output[key] = combine_strs(aval, bval)
elif isinstance(aval, dict) and isinstance(bval, dict):
output[key] = combine_dicts(aval, bval)
elif bval is EMPTY:
output[key] = aval
elif aval is EMPTY:
output[key] = bval
else:
raise RuntimeError(
f"Cannot combine types: {type(aval)} and {type(bval)}"
)
return output
Sounds like you want a specialised version of dict. So, you could subclass it to give you the behaviour you want. Being a bit of a Python noob, I started with the answer here : Subclassing Python dictionary to override __setitem__
Then I added the behaviour in your couple of examples.
I also added a MultiValue class which is a subclass of list. This makes it easy to tell if a value in the dict already has multiple values. Also it removes duplicates, as it looks like you don't want them.
class MultiValue(list):
# Class to hold multiple values for a dictionary key. Prevents duplicates.
def append(self, value):
if isinstance(value, MultiValue):
for v in value:
if not v in self:
super(MultiValue, self).append(v)
else:
super(MultiValue, self).append(value)
class MultiValueDict(dict):
# dict which converts a key's value to a MultiValue when the key already exists.
def __init__(self, *args, **kwargs):
self.update(*args, **kwargs)
def __setitem__(self, key, value):
# optional processing here
if key in self:
existing_value = self[key]
if isinstance(existing_value, MultiValueDict) and isinstance(value, dict):
existing_value.update(value)
return
if isinstance(existing_value, MultiValue):
existing_value.append(value)
value = existing_value
else:
value = MultiValue([existing_value, value])
super(MultiValueDict, self).__setitem__(key, value)
def update(self, *args, **kwargs):
if args:
if len(args) > 1:
raise TypeError("update expected at most 1 arguments, "
"got %d" % len(args))
other = dict(args[0])
for key in other:
self[key] = other[key]
for key in kwargs:
self[key] = kwargs[key]
def setdefault(self, key, value=None):
if key not in self:
self[key] = value
return self[key]
Example 1:
a = {
"key1": "value_a1",
"key2": "value_a2"
}
b = {
"key1": "value_b1",
"key3": "value_b3"
}
# combine by creating a MultiValueDict then using update to add b to it.
c = MultiValueDict(a)
c.update(b)
print(c)
# gives {'key1': ['value_a1', 'value_b1'], 'key2': 'value_a2', 'key3': 'value_b3'}
Example 2: The value for key1 is created as a MultiValueDict and the value for the sub_key_1 is a MultiValue, so this may not fit what you're trying to do. It depends how you're building you data set.
a = {
"key1": MultiValueDict({
"sub_key_1": MultiValue(["sub_value_a1", "sub_value_a2"]),
"sub_key_2": "sub_value_a3"
}),
"key2": "value_a2"
}
b = {
"key1": MultiValueDict({
"sub_key_1": MultiValue(["sub_value_a1", "sub_value_b1"]),
"sub_key_2": "sub_value_b3"
}),
"key3": "value_b3" # I'm okay with converting this to a list even if it's not one
}
c = MultiValueDict(a)
c.update(b)
print(c)
# gives {'key1': {'sub_key_1': ['sub_value_a1', 'sub_value_a2', 'sub_value_b1'], 'sub_key_2': ['sub_value_a3', 'sub_value_b3']}, 'key2': 'value_a2', 'key3': 'value_b3'}
a = {
"key1": "value_a1",
"key2": "value_a2"
}
b = {
"key1": "value_b1",
"key3": "value_b3"
}
def appendValues(ax,cx):
if type(ax)==list:#is key's value in a, a list?
cx.extend(ax)#if it is a list then extend
else:#key's value in a, os not a list
cx.append(ax)#so use append
cx=list(set(cx))#make values unique with set
return cx
def combine(a,b):
c={}
for x in b:#first copy b keys and values to c
c[x]=b[x]
for x in a:#now combine a with c
if not x in c:#this key is not in c
c[x]=a[x]#so add it
else:#key exists in c
if type(c[x])==list:#is key's value in c ,a list?
c[x]=appendValues(a[x],c[x])
elif type(c[x])==dict:#is key's value in c a dictionary?
c[x]=combine(c[x],a[x])#combine dictionaries
else:#so key';'s value is not list or dict
c[x]=[c[x]]#make value a list
c[x]=appendValues(a[x],c[x])
return c
c = combine(a, b)
print(c)
print("==========================")
a = {
"key1": {
"sub_key_1": ["sub_value_a1", "sub_value_a2"],
"sub_key_2": "sub_value_a3"
},
"key2": "value_a2"
}
b = {
"key1": {
"sub_key_1": ["sub_value_a1", "sub_value_b1"],
"sub_key_2": "sub_value_b3"
},
"key3": "value_b3" # I'm okay with converting this to a list even if it's not one
}
c = combine(a, b)
print(c)

Python - iterate and update a nested dictionary & lists

Having the following dict, where some of the values can be list of dictionaries:
{
"A": [
{
"B": {
"C": "D",
"X": "CHNAGE ME"
}
},
{
"E": "F"
}
],
"G": {
"Y": "CHANGE ME"
}
}
I would like to recursively iterate over the items and change the pairs of key values where the value is "CHANGE ME", so the result would be:
{
"A": [
{
"B": {
"C": "D",
"X.CHANGED": "CHANGED"
}
},
{
"E": "F"
}
],
"G": {
"Y.CHANGED": "CHANGED"
}
}
Solutions I've found were not handling a case where the value is a list, for example:
import collections
def nested_dict_iter(nested):
for key, value in nested.iteritems():
if isinstance(value, collections.Mapping):
for inner_key, inner_value in nested_dict_iter(value):
yield inner_key, inner_value
else:
yield key, value
How can I achieve my goal?
Using recursion
Ex:
def update(data):
for k, v in data.copy().items():
if isinstance(v, dict): # For DICT
data[k] = update(v)
elif isinstance(v, list): # For LIST
data[k] = [update(i) for i in v]
elif v == 'CHANGE ME': # Update Key-Value
# data.pop(k)
# OR
del data[k]
data[f"{k}.CHANGED"] = 'CHANGED'
return data
print(update(data))
Output:
{
'A':[{'B': {'C': 'D', 'X.CHANGED': 'CHANGED'}}, {'E': 'F'}],
'G':{'Y.CHANGED': 'CHANGED'}
}
Note: I have not tested all corner cases

get list of all nested keys in a json

I have a huge json in the format something like :
{
"Name1": {
"NNum": "11",
"Node1": {
"SubNodeA": "Thomas",
"SubNodeB": "27"
},
"Node2": {
"SubNodeA": "ZZZ",
"SubNodeD": "XXX",
"SubNodeE": "yy"
},
"Node3": {
"child1": 11,
"child2": {
"grandchild": {
"greatgrandchild1": "Rita",
"greatgrandchild2": "US"
}
}
}
}
}
The format or keys are not defined and can go to any depth
I would like to get the list of keys like
keyList= ["Name1.NNum","Name1.Node1.SubNodeA",""Name1.Node1.SubNodeB","Name1.Node2.SubNodeA","Name1.Node2.SubNodeD","Name1.Node2.SubNodeE","Name1.Node3.child1","Name1.Node3.child2.grandchild.greatgrandchild1","Name1.Node3.child2.grandchild.greatgrandchild2"]
A snapshot of the code
def extract_values(obj):
"""Pull all values of specified key from nested JSON."""
arr = []
key_list = []
parent = ""
def extract(obj, arr,parent):
"""Recursively search for values of key in JSON tree."""
if isinstance(obj, dict):
grandparent = ""
for k, v in obj.items():
print ("k ............",k)
parent = grandparent
temp_parent = k
print ("parent >>>>> ",parent)
if isinstance(v, (dict, list)):
parent = temp_parent
print ("IF VALUE DICT .. parent ", parent)
extract(v, arr,parent)
else:
grandparent = parent
parent = parent + "_" + temp_parent
print ("!!!! NOT DICT :).... **** parent ... ", parent)
arr.append(parent)
elif isinstance(obj, list):
for item in obj:
extract(item, arr)
#print ("arr >>>>>>>>>> ", arr)
time.sleep(5)
return arr
results = extract(obj, arr,parent)
return results
but this does not give the expected output.
Expected Output:
keyList= ["Name1.NNum","Name1.Node1.SubNodeA",""Name1.Node1.SubNodeB","Name1.Node2.SubNodeA","Name1.Node2.SubNodeD","Name1.Node2.SubNodeE","Name1.Node3.child1","Name1.Node3.child2.grandchild.greatgrandchild1","Name1.Node3.child2.grandchild.greatgrandchild2"]
Can anybody help me with this.
Thanks in advance
You can use recursion:
d = {'Name1': {'NNum': '11', 'Node1': {'SubNodeA': 'Thomas', 'SubNodeB': '27'}, 'Node2': {'SubNodeA': 'ZZZ', 'SubNodeD': 'XXX', 'SubNodeE': 'yy'}, 'Node3': {'child1': 11, 'child2': {'grandchild': {'greatgrandchild1': 'Rita', 'greatgrandchild2': 'US'}}}}}
def keys(d, c = []):
return [i for a, b in d.items() for i in ([c+[a]] if not isinstance(b, dict) else keys(b, c+[a]))]
result = list(map('.'.join, keys(d)))
Output:
['Name1.NNum', 'Name1.Node1.SubNodeA', 'Name1.Node1.SubNodeB', 'Name1.Node2.SubNodeA', 'Name1.Node2.SubNodeD', 'Name1.Node2.SubNodeE', 'Name1.Node3.child1', 'Name1.Node3.child2.grandchild.greatgrandchild1', 'Name1.Node3.child2.grandchild.greatgrandchild2']
def getKeys(object, prev_key = None, keys = []):
if type(object) != type({}):
keys.append(prev_key)
return keys
new_keys = []
for k, v in object.items():
if prev_key != None:
new_key = "{}.{}".format(prev_key, k)
else:
new_key = k
new_keys.extend(getKeys(v, new_key, []))
return new_keys
This solution assumes that the inner types that might have children are dictionaries.
You can do simple recursion:
d = {
"Name1": {
"NNum": "11",
"Node1": {
"SubNodeA": "Thomas",
"SubNodeB": "27"
},
"Node2": {
"SubNodeA": "ZZZ",
"SubNodeD": "XXX",
"SubNodeE": "yy"
},
"Node3": {
"child1": 11,
"child2": {
"grandchild": {
"greatgrandchild1": "Rita",
"greatgrandchild2": "US"
}
}
}
}
}
def get_keys(d, curr_key=[]):
for k, v in d.items():
if isinstance(v, dict):
yield from get_keys(v, curr_key + [k])
elif isinstance(v, list):
for i in v:
yield from get_keys(i, curr_key + [k])
else:
yield '.'.join(curr_key + [k])
print([*get_keys(d)])
Prints:
['Name1.NNum', 'Name1.Node1.SubNodeA', 'Name1.Node1.SubNodeB', 'Name1.Node2.SubNodeA', 'Name1.Node2.SubNodeD', 'Name1.Node2.SubNodeE', 'Name1.Node3.child1', 'Name1.Node3.child2.grandchild.greatgrandchild1', 'Name1.Node3.child2.grandchild.greatgrandchild2']
What about this?
from collections import Mapping
def extract_paths(base_path, dd):
new_paths = []
for key, value in dd.items():
new_path = base_path + ('.' if base_path else '') + key
if isinstance(value, Mapping):
new_paths.extend(extract_paths(new_path, value))
else:
new_paths.append(new_path)
return new_paths
extract_paths('', your_dict)
Use isinstance to check the dict or not called by function recursively. If dict append to path recursively else print the path
def print_nested_keys(dic,path=''):
for k,v in dic.items():
if isinstance(v,dict):
path+=k+"."
yield from print_nested_keys(v,path)
else:
path+=k
yield path
Output:
>>> [*print_nested_keys(d)] # Here, d is your nested dictionary
['Name1.NNum',
'Name1.NNumNode1.SubNodeA',
'Name1.NNumNode1.SubNodeASubNodeB',
'Name1.NNumNode1.Node2.SubNodeA',
'Name1.NNumNode1.Node2.SubNodeASubNodeD',
'Name1.NNumNode1.Node2.SubNodeASubNodeDSubNodeE',
'Name1.NNumNode1.Node2.Node3.child1',
'Name1.NNumNode1.Node2.Node3.child1child2.grandchild.greatgrandchild1',
'Name1.NNumNode1.Node2.Node3.child1child2.grandchild.greatgrandchild1greatgrandchild2']

Categories