How to get the parent keypath in json file? - python

In python can we get the parent key path for a key, My json looks like following..
For example if i input a1: it should give: PROJ1/LOB1. All keys inside list[] are unique
{
'PROJ1': {
u'LOB1': [u'a1', u'a2'],
u'LOb2': [u'v1', u'v2'],
u'LOBA': [u'o1', u'oa', u'o2', u'o3'],
u'LOBX': [u'n1', u'n2'],
u'LOB': [u'b1', u'b2']
},
'PROJ12': {
u'LOBa': [u'aa1', u'aa2'],
u'LOBX': [u'vx1', u'vx2']
},
}
I found this on a different thread but its not working for the above data. failing with : unhashable type: 'list'
def keypaths(myDict):
for key, value in myDict.items():
if isinstance(value, collections.Mapping):
for subkey, subvalue in self.keypaths(value):
yield [key] + subkey, subvalue
else:
yield [key], value
reverse_dict = {value: keypath for keypath, value in keypaths(example_dict)}

You need to drill down the lists of values returned to get each of the a1, a2... etc. You can make a minor modification to the keypaths function to flip the keys/values being returned::
import collections
sample = {
'PROJ1': {
u'LOB1': [u'a1', u'a2'],
u'LOb2': [u'v1', u'v2'],
u'LOBA': [u'o1', u'oa', u'o2', u'o3'],
u'LOBX': [u'n1', u'n2'],
u'LOB': [u'b1', u'b2']
},
'PROJ12': {
u'LOBa': [u'aa1', u'aa2'],
u'LOBX': [u'vx1', u'vx2']
},
}
def keypaths(myDict):
for key, value in myDict.items():
if isinstance(value, collections.Mapping):
for subvalue, subkey in keypaths(value):
yield subvalue, [key] + subkey
elif isinstance(value,list):
for i in value:
yield i, [key]
else:
yield value, [key]
my_dict = {k:i for k, i in keypaths(sample)}
This assumes all your values are unique, otherwise the paths get overwritten.
Alternatively, you could have done the same without modifying keypaths function (if it's needed for other purposes):
def keypaths(myDict):
for key, value in myDict.items():
if isinstance(value, collections.Mapping):
for subkey, subvalue in keypaths(value):
yield [key] + subkey, subvalue
else:
yield [key], value
my_dict = {i: keypath for keypath, value in keypaths(sample) for i in value}
Note this alternative only works assuming all your values are lists (otherwise the dictionary comprehension would fail).
Output:
{'a1': ['PROJ1', 'LOB1'],
'a2': ['PROJ1', 'LOB1'],
'aa1': ['PROJ12', 'LOBa'],
'aa2': ['PROJ12', 'LOBa'],
'b1': ['PROJ1', 'LOB'],
'b2': ['PROJ1', 'LOB'],
'n1': ['PROJ1', 'LOBX'],
'n2': ['PROJ1', 'LOBX'],
'o1': ['PROJ1', 'LOBA'],
'o2': ['PROJ1', 'LOBA'],
'o3': ['PROJ1', 'LOBA'],
'oa': ['PROJ1', 'LOBA'],
'v1': ['PROJ1', 'LOb2'],
'v2': ['PROJ1', 'LOb2'],
'vx1': ['PROJ12', 'LOBX'],
'vx2': ['PROJ12', 'LOBX']}
Note this only works assuming all your values are lists (otherwise the dictionary comprehension would fail), and with unique values (otherwise the keypaths get overwritten for duplicate values).
Now all you have to do to get the keypath is:
my_dict.get('a1')
# ['PROJ1', 'LOB1']
my_dict.get('o1')
# ['PROJ1', 'LOBA']
my_dict.get('not exist')
# None

Related

Taking the innermost dict in nested dictionary with recursion

Here is my snippet:
info = {'k1': {
'k2': {
'k3': 'v3',
'k4': 'v4',
'k5': 'v5'}
},
}
So I want to use recursion to get the most inner dict {'k3': 'v3', 'k4': 'v4', k5': 'v5'}, because I will have dicts with different nested levels, but in any cases I will need to take this the most inner dictionary. How can I do that?
Here is my try:
def recursion(info):
store = dict()
for key in info.keys():
if isinstance(info[key], dict):
recursion(info[key])
store[key] = info[key]
return store
One option:
def inner_most(d):
def inner_most_helper(d, level = 0):
' Use recursion to find dictionary at highest inner level'
# Max level starts with current dictionary
level_max, d_max = level, d
for k, v in d.items():
if isinstance(v, dict):
# Found a higher level of nesting
level_, d_ = inner_most_helper(v, level + 1)
if level_ > level_max:
d_max = d_
return level_max, d_max
return inner_most_helper(d)[1]
Test
d = {'k1': {
'k2': {
'k3': 'v3',
'k4': 'v4',
'k5': 'v5'}
},
}
print(inner_most(d)) # print inner most dictionary
Output
{'k3': 'v3', 'k4': 'v4', 'k5': 'v5'}
If it would be sufficient to find a dictionary that doesn't have any dictionary nested inside it, you can do this:
def get_childless_dicts(d):
has_child = False
for v in d.values():
if isinstance(v, dict):
yield from get_childless_dicts(v)
has_child = True
if not has_child:
yield d
This yields any childless dicts, so if you know there is only one you can get only the first one using next:
print(next(get_childless_dicts(info)))

Get keys of all child dictionaries

I have a dictionary of this kind where the values are dictionaries as well the dictionaries can have nested dictionaries in them. like this:
data = {'key1': {
'keya':{
'keyc': None
}
'keyb': None
}
'key2': {
'keyi':None,
'keyii': None
}
}
The dictionaries can be many (we don't know how many dictionaries can be there inside the values). How can I get all keys in all values like this?
['key1', 'key2', 'keya', 'keyb', 'keyi', 'keyii']
you could get all the keys using recursion
def get_all_keys_rec(dic):
keys = [key for key in dic]
for val in dic.values():
if type(val)==dict:
inner_keys = get_all_keys_rec(val)
keys.extend(inner_keys)
return keys
print(get_all_keys_rec(data))
output:
['key1', 'key2', 'keya', 'keyb', 'keyc', 'keyi', 'keyii']
keys = []
for key, val in data.items():
keys.append(key)
if isinstance(val, dict):
item = val
while True:
for k, v in item.items():
keys.append(k)
if isinstance(v, dict):
item = v
break
else:
break
print(keys)
This outputs:
['key1', 'keya', 'keyc', 'key2', 'keyi', 'keyii']
Recursive generation, yield from should be your partner:
>>> data = {'key1': {
... 'keya': {
... 'keyc': None
... },
... 'keyb': None
...
... },
... 'key2': {
... 'keyi': None,
... 'keyii': None
... }
... }
>>> def get_all_keys(dct):
... def gen_all_keys(d):
... if isinstance(d, dict):
... yield from d
... for v in d.values():
... yield from gen_all_keys(v)
... return list(gen_all_keys(dct))
...
>>> get_all_keys(data)
['key1', 'key2', 'keya', 'keyb', 'keyc', 'keyi', 'keyii']

Find key in nested dictionary mixed with lists

I get JSON Data back from an API. The dataset is large and nested. I can access the Datenreihen key like this:
jsondata.get("Ergebnis")[0].get("Kontakte").get("Datenreihen")
As you can see, this is a mix of dictionaries and lists.
I tried the following, but with lists it does not work :-(.
def recursive_lookup(k, d):
if k in d:
return d[k]
for v in d.values():
if isinstance(v, dict):
return recursive_lookup(k, v)
return None
# Works
recursive_lookup("Ergebnis", jsondata)
# Returns None
recursive_lookup("Datenreihen", jsondata)
Is there an easy way to access and key in my dictionary, no matter how deeply my object is nested?
This is exampledata:
{
"Success":true,
"Ergebnis":[
{
"ErgA1a: KPI Zeitreihe":{
"Message":"",
"MitZielgruppe":true,
"Beschriftung":[
"2019 KW 27",
"2019 KW 28",
"2019 KW 29"
],
"Datenreihen":{
"Gesamt":{
"Name":"Sympathie [#4]\n(Sehr sympathisch, Sympathisch)",
"Werte":[
39.922142815641145,
37.751410794385762,
38.35504885993484
]
}
}
}
}
],
"rest":[
{
"test":"bla"
}
]
}
data.get("ErgebnisseAnalyse")[0].get("ErgA1a: KPI Zeitreihe")
recursive_lookup("ErgA1a: KPI Zeitreihe", data)
Recursive function to find value in nested dictionary based upon key field
Code
def find_item(obj, field):
"""
Takes a dict with nested lists and dicts,
and searches all dicts for a key of the field
provided.
"""
if isinstance(obj, dict):
for k, v in obj.items():
if k == field:
yield v
elif isinstance(v, dict) or isinstance(v, list):
yield from find_item(v, field)
elif isinstance(obj, list):
for v in obj:
yield from find_item(v, field)
Usage
value = next(find_item(dictionary_object, field), None)
Test
# Nested dictionary
dic = {
"a": [{"b": {"c": 1}},
{"d": 2}],
"e": 3}
# Values form various fields
print(next(find_item(dic, "a"), None)) # Output: [{'b': {'c': 1}}, {'d': 2}]
print(next(find_item(dic, "b"), None)) # Output: {'c': 1}
print(next(find_item(dic, "c"), None)) # Output: 1
print(next(find_item(dic, "d"), None)) # Output: 2
print(next(find_item(dic, "e"), None)) # Output: 3
print(next(find_item(dic, "h"), None)) # Output: None

Lift up all occurrences of a type in a nested dictionary to a top level key

I have a need in a project to find all of a given type in a nested dictionary and move them all to a top level key in the same dictionary.
So far I have the below code, which seems to work. In the example I'm looking for all the items that are integers and moving them to a 'numbers' key.
I'd prefer it if the lift_numbers_to_top function made and returned a copy of the dictionary rather than editing it in place, but I haven't been able to work out a nice way to pass the copy and the numbers back from the recursive function to itself, if that makes sense.
a_dictionary = {
"one": 1,
"two": 2,
"text": "Hello",
"more_text": "Hi",
"internal_dictionary": {
"three": 3,
"two": 2,
"even_more_text": "Hey",
"another_internal_dictionary": {
"four": 4,
"five": 5,
"last_text": "howdy"
}
}
}
def extract_integers(dictionary, level_key=None):
numbers = {}
for key in dictionary:
if type(dictionary[key]) == int:
numbers[level_key + "__" + key if level_key else key] = dictionary[key]
return numbers
def lift_numbers_to_top(dictionary, level_key=None):
numbers = {}
if type(dictionary) == dict:
numbers = extract_integers(dictionary, level_key)
for key in numbers:
keyNumber = key.split('__')[-1]
del dictionary[keyNumber]
for key in dictionary:
numbers = {**numbers, **lift_numbers_to_top(dictionary[key], key)}
return numbers
a_dictionary['numbers'] = lift_numbers_to_top(a_dictionary)
print(a_dictionary)
Result:
{
'text': 'Hello',
'more_text': 'Hi',
'internal_dictionary': {
'even_more_text': 'Hey',
'another_internal_dictionary': {
'last_text': 'howdy'
},
},
'numbers': {
'one': 1,
'two': 2,
'internal_dictionary__two': 2,
'internal_dictionary__three': 3,
'another_internal_dictionary__four': 4,
'another_internal_dictionary__five': 5,
}
}
Use a match function to determine what to lift, and pass along the target object where you move key-value pairs to to recursive calls. If that target is missing, you know the current call is for the top-level. The match function should return the new key for the new dictionary.
To produce a new dictionary, just produce a new dictionary and put recursion results into that object.
I prefer to use #singledispatch() to handle different types when recursing:
from functools import singledispatch
#singledispatch
def lift_values(obj, match, targetname=None, **kwargs):
"""Lift key-value pairs from a nested structure to the top
For key-value pairs anywhere in the nested structure, if
match(path, value) returns a value other than `None`, the
key-value pair is moved to the top-level dictionary when targetname
is None, or to a new dictionary stored under targetname is not None,
using the return value of the match function as the key. path
is the tuple of all keys and indices leading to the value.
For example, for an input
{'foo': True, 'bar': [{'spam': False, 'ham': 42}]}
and the match function lambda p, v: p if isinstance(v, bool) else None
and targetname "flags", this function returns
{'flags': {('foo',): True, ('bar', 0, 'spam'): False}, 'bar': [{'ham': 42}]}
"""
# leaf nodes, no match testing needed, no moving of values
return obj
#lift_values.register(list)
def _handle_list(obj, match, _path=(), **kwargs):
# list values, no lifting, just passing on the recursive call
return [lift_values(v, match, _path=_path + (i,), **kwargs)
for i, v in enumerate(obj)]
#lift_values.register(dict)
def _handle_list(obj, match, targetname=None, _path=(), _target=None):
result = {}
if _target is None:
# this is the top-level object, key-value pairs are lifted to
# a new dictionary stored at this level:
if targetname is not None:
_target = result[targetname] = {}
else:
# no target name? Lift key-value pairs into the top-level
# object rather than a separate sub-object.
_target = result
for key, value in obj.items():
new_path = _path + (key,)
new_key = match(new_path, value)
if new_key is not None:
_target[new_key] = value
else:
result[key] = lift_values(
value, match, _path=new_path, _target=_target)
return result
I included a dispatch function for lists; your sample doesn't use lists, but these are common in JSON data structures so I anticipate you probably want it anyway.
The match function must accept two arguments, the path to the object this key-value pair was found in, and the value. It should return a new key to use or None if not to lift the value.
For your case, the match function would be:
def lift_integers(path, value):
if isinstance(value, int):
return '__'.join(path[-2:])
result = lift_values(a_dictionary, lift_integers, 'numbers')
Demo on your sample input dictionary:
>>> from pprint import pprint
>>> def lift_integers(path, value):
... if isinstance(value, int):
... return '__'.join(path[-2:])
...
>>> lift_values(a_dictionary, lift_integers, 'numbers')
{'numbers': {'one': 1, 'two': 2, 'internal_dictionary__three': 3, 'internal_dictionary__two': 2, 'another_internal_dictionary__four': 4, 'another_internal_dictionary__five': 5}, 'text': 'Hello', 'more_text': 'Hi', 'internal_dictionary': {'even_more_text': 'Hey', 'another_internal_dictionary': {'last_text': 'howdy'}}}
>>> pprint(_)
{'internal_dictionary': {'another_internal_dictionary': {'last_text': 'howdy'},
'even_more_text': 'Hey'},
'more_text': 'Hi',
'numbers': {'another_internal_dictionary__five': 5,
'another_internal_dictionary__four': 4,
'internal_dictionary__three': 3,
'internal_dictionary__two': 2,
'one': 1,
'two': 2},
'text': 'Hello'}
Personally, I'd use the full path as the key in the lifted dictionary to avoid name clashes; either by joining the full path into a new string key with some unique delimiter, or just by making the path tuple itself the new key:
>>> lift_values(a_dictionary, lambda p, v: p if isinstance(v, int) else None, 'numbers')
{'numbers': {('one',): 1, ('two',): 2, ('internal_dictionary', 'three'): 3, ('internal_dictionary', 'two'): 2, ('internal_dictionary', 'another_internal_dictionary', 'four'): 4, ('internal_dictionary', 'another_internal_dictionary', 'five'): 5}, 'text': 'Hello', 'more_text': 'Hi', 'internal_dictionary': {'even_more_text': 'Hey', 'another_internal_dictionary': {'last_text': 'howdy'}}}
>>> pprint(_)
{'internal_dictionary': {'another_internal_dictionary': {'last_text': 'howdy'},
'even_more_text': 'Hey'},
'more_text': 'Hi',
'numbers': {('internal_dictionary', 'another_internal_dictionary', 'five'): 5,
('internal_dictionary', 'another_internal_dictionary', 'four'): 4,
('internal_dictionary', 'three'): 3,
('internal_dictionary', 'two'): 2,
('one',): 1,
('two',): 2},
'text': 'Hello'}
You can use walk through the dict recursively and pop all elements with values as an int to create a new dict
>>> def extract(d):
... new_d = {}
... for k,v in d.items():
... if type(v) == int:
... new_d[k] = d[k]
... elif type(v) == dict:
... for k2,v2 in extract(v).items():
... new_d[k2 if '__' in k2 else k+'__'+k2] = v2
... return new_d
...
>>> a_dictionary['numbers'] = extract(a_dictionary)
>>> pprint(a_dictionary)
{'internal_dictionary': {'another_internal_dictionary': {'last_text': 'howdy'},
'even_more_text': 'Hey'},
'more_text': 'Hi',
'numbers': {'another_internal_dictionary__five': 5,
'another_internal_dictionary__four': 4,
'internal_dictionary__three': 3,
'internal_dictionary__two': 2,
'one': 1,
'two': 2},
'text': 'Hello'}

Flattening a dictionary of dictionaries that contain lists

I have a dictionary of dictionaries that looks like this:
data={'data': 'input',
'test':
{
'and':
{
'range': {'month': [{'start': 'Jan','end': 'July'}]},
'Student': {'Name': ['ABC'], 'Class': ['10']}
}
}
}
I need to flatten this dict into a dataframe.I tried to use json_normalize() to flatten the dictionary and the output I got looked like this:
My desired output is something like the one given below.
This can be done in R by using as.data.frame(unlist(data)) but I want to do the same flattening in Python. I am a novice in python so I dont have much idea about doing this.
I have made an attempt to normalize your json object by writing a recursive function as follows:
data={'data': 'input',
'test':
{
'and':
{
'range': {'month': [{'start': 'Jan','end': 'July'}]},
'Student': {'Name': ['ABC'], 'Class': ['10']}
}
}
}
sequence = ""
subDicts = []
def findAllSubDicts(data):
global subDicts
global sequence
for key, value in data.items():
sequence += key
#print(sequence)
if isinstance(value, str):
subDicts.append([sequence,value])
sequence = sequence[:sequence.rfind(".")+1]
#print(sequence)
elif isinstance(value, dict):
tempSequence = sequence[:sequence.rfind(".")+1]
sequence += "."
#print(sequence)
findAllSubDicts(value)
sequence = tempSequence
elif isinstance(value, list) and isinstance(value[0], dict):
sequence += "."
tempSequence = sequence[:sequence.rfind(".")+1]
#print(sequence)
findAllSubDicts(value[0])
sequence = tempSequence
elif isinstance(value, list) and len(value)==1:
tempSequence = sequence[:sequence.rfind(".")+1]
subDicts.append([sequence,value[0]])
sequence = tempSequence
return subDicts
outDict = findAllSubDicts(data)
for i in outDict:
print(i[0].ljust(40," "), end=" ")
print(i[1])
Printing the results will give you:
data input
test.and.range.month.start Jan
test.and.range.month.end July
test.and.Student.Name ABC
test.and.Student.Class 10
Notify me if you need any clarification or any modification in my code.

Categories