I want to get a list of all keys in a nested dictionary that contains lists and dictionaries.
I currently have this code, but it seems to be missing adding some keys to the list and also duplicate adds some keys.
keys_list = []
def get_keys(d_or_l, keys_list):
if isinstance(d_or_l, dict):
for k, v in iter(sorted(d_or_l.iteritems())):
if isinstance(v, list):
get_keys(v, keys_list)
elif isinstance(v, dict):
get_keys(v, keys_list)
else:
keys_list.append(k)
elif isinstance(d_or_l, list):
for i in d_or_l:
if isinstance(i, list):
get_keys(i, keys_list)
elif isinstance(i, dict):
get_keys(i, keys_list)
else:
print "** Skipping item of type: {}".format(type(d_or_l))
return keys_list
This just takes an empty list and populates it with the keys. d_or_l is a variable and takes the original dict to compare it against.
This should do the job:
def get_keys(dl, keys_list):
if isinstance(dl, dict):
keys_list += dl.keys()
map(lambda x: get_keys(x, keys_list), dl.values())
elif isinstance(dl, list):
map(lambda x: get_keys(x, keys_list), dl)
To avoid duplicates you can use set, e.g.:
keys_list = list( set( keys_list ) )
Example test case:
keys_list = []
d = {1: 2, 3: 4, 5: [{7: {9: 1}}]}
get_keys(d, keys_list)
print keys_list
>>>> [1, 3, 5, 7, 9]
As it stands, your code ignores keys that lead to list or dict values. Remove the else block in your first for loop, you want to add the key no matter what the value is.
keys_list = []
def get_keys(d_or_l, keys_list):
if isinstance(d_or_l, dict):
for k, v in iter(sorted(d_or_l.iteritems())):
if isinstance(v, list):
get_keys(v, keys_list)
elif isinstance(v, dict):
get_keys(v, keys_list)
keys_list.append(k) # Altered line
elif isinstance(d_or_l, list):
for i in d_or_l:
if isinstance(i, list):
get_keys(i, keys_list)
elif isinstance(i, dict):
get_keys(i, keys_list)
else:
print "** Skipping item of type: {}".format(type(d_or_l))
return keys_list
get_keys({1: 2, 3: 4, 5: [{7: {9: 1}}]}, keys_list) returns [1, 3, 9, 7, 5]
To avoid duplication, you could use a set datatype instead of a list.
Here is a simple solution:
def get_nested_keys(d, keys):
for k, v in d.items():
if isinstance(v, dict):
get_nested_keys(v, keys)
else:
keys.append(k)
keys_list = []
get_nested_keys(test_listing, keys_list)
print(keys_list)
If you want to know the hierarchy of the keys as well, you can modify the function like so:
def get_nested_keys(d, keys, prefix):
for k, v in d.items():
if isinstance(v, dict):
get_nested_keys(v, keys, f'{prefix}:{k}')
else:
keys.append(f'{prefix}:{k}')
I would extend #pm007 answer by a python 2 & 3 friendly version:
def get_keys(dl, keys=None):
keys = keys or []
if isinstance(dl, dict):
keys += dl.keys()
_ = [get_keys(x, keys) for x in dl.values()]
elif isinstance(dl, list):
_ = [get_keys(x, keys) for x in dl]
return list(set(keys))
d = {1: 2, 3: 4, 5: {7: {1: 1}}}
get_keys(d)
Furthermore this feels more convenient as you get a function that returns the desired keys instead of magically altering a list
Updating #MackM's response to Python 3 as dict.iteritems has been deprecated (and I prefer to use f-strings over the .format{} styling):
keys_list = []
def get_keys(d_or_l, keys_list):
if isinstance(d_or_l, dict):
for k, v in iter(sorted(d_or_l.items())): # Altered line to update deprecated method
if isinstance(v, list):
get_keys(v, keys_list)
elif isinstance(v, dict):
get_keys(v, keys_list)
keys_list.append(k)
elif isinstance(d_or_l, list):
for i in d_or_l:
if isinstance(i, list):
get_keys(i, keys_list)
elif isinstance(i, dict):
get_keys(i, keys_list)
else:
print(f'** Skipping item of type: {type(d_or_l)}') # Altered line to use f-strings
return keys_list
unique_keys = list(set(get_keys(my_json_dict, keys_list))) # Added line as example use case
Related
I have an input dict like so:
input={'boo': 'its', 'soo': 'your', 'roo': 'choice', 'qoo': 'this', 'fizz': 'is', 'buzz': 'very', 'yoyo': 'rambling', 'wazzw': 'lorem', 'bnn': 'ipsum', 'cc': [{'boo': 'fill', 'soo': 'ing', 'roo': 'in', 'qoo': 'the', 'fizz': 'words', 'buzz': 'here', 'yoyo': 'we', 'wazzw': 'go', 'nummm': 2, 'bsdfff': 3, 'hgdjgkk': 4, 'opu': 1, 'mnb': True}, {'boo': 'again', 'soo': 'loop', 'roo': 'de', 'qoo': 'loop', 'fizz': 'wowzers', 'buzz': 'try', 'yoyo': 'again', 'wazzw': 'how', 'nummm': 1, 'bsdfff': 7, 'hgdjgkk': 0, 'opu': 1, 'mnb': True}], 'soos': ['ya'], 'tyu': 'doin', 'dddd3': 'today'}
Using python builtin libraries how to get hierarchy (dot separated) of each key. ie:
expected_output=['boo','soo','roo','qoo','fizz','buzz','yoyo','wazzw','bnn','cc','cc.boo','cc.soo','cc.roo','cc.qoo','cc.fizz','cc.buzz','cc.yoyo','cc.wazzw','cc.nummm','cc.bsdfff','cc.hgdjgkk','cc.opu','cc.mnb','soos','tyu','dddd3']
First attempt is not handling lists:
def getKeys(object, prev_key = None, keys = []):
if type(object) != type({}):
keys.append(prev_key)
return keys
new_keys = []
for k, v in object.items():
if prev_key != None:
new_key = "{}.{}".format(prev_key, k)
else:
new_key = k
new_keys.extend(getKeys(v, new_key, []))
return new_keys
Modification of mozway's answer; https://www.mycompiler.io/view/6LB7k4TVOuj
# Includes $ for root node, and [] where access is through an array
def hierarchy(struct, path=None):
if isinstance(struct, dict):
path = path if path else '$'
return set(
child_path
for key, obj in struct.items()
for child_path in hierarchy(obj, f'{path}.{key}')
).union(
[path]
)
elif isinstance(struct, list):
path = f'{path}[]' if path else '$[]'
return set(
child_path
for obj in struct
for child_path in hierarchy(obj, path)
).union(
[path]
)
else:
return [path]
Or...
from itertools import chain
# Excludes those $ and [] markers
def hierarchy2(d):
if isinstance(d, dict):
return set(
f'{k}.{x}' if x else k
for k,v in d.items()
for x in chain([''], hierarchy2(v))
)
elif isinstance(d, list):
return set(
v
for l in d
for v in hierarchy2(l)
if v
)
else:
return set()
Using a recursive generator:
def hierarchy(d, prefix=None):
if isinstance(d, dict):
for k, v in d.items():
prefix2 = f'{prefix}.{k}' if prefix else k
yield prefix2
if isinstance(v, list):
seen = set()
for x in v:
if isinstance(x, dict):
yield from hierarchy({k: v for k, v in x.items()
if k not in seen},
prefix=prefix2)
seen.update(x.keys())
else:
yield from hierarchy(x, prefix=prefix2)
elif isinstance(v, dict):
yield from hierarchy(v, prefix=prefix2)
out = list(hierarchy(inpt))
# validation
assert out == expected_output
Output:
['boo', 'soo', 'roo', 'qoo', 'fizz', 'buzz', 'yoyo', 'wazzw', 'bnn',
'cc', 'cc.boo', 'cc.soo', 'cc.roo', 'cc.qoo', 'cc.fizz', 'cc.buzz',
'cc.yoyo', 'cc.wazzw', 'cc.nummm', 'cc.bsdfff', 'cc.hgdjgkk', 'cc.opu', 'cc.mnb',
'soos', 'tyu', 'dddd3']
Different example:
list(hierarchy({'l1': {'l2': {'l3': 'test', 'l4': [['abc'], {'l5': 'def'}]}}}))
# ['l1', 'l1.l2', 'l1.l2.l3', 'l1.l2.l4', 'l1.l2.l4.l5']
To deal with a sub-list, you can iteratively check if each sub-item is a dict, and if it is, recursively concatenate the key paths of the sub-dict to the current key. Use a dict instead of a list to keep track of the keys in order to avoid duplicates:
from itertools import chain
def get_keys(container):
keys = {}
if isinstance(container, dict):
for key, value in container.items():
keys.setdefault(key)
keys.update(dict.fromkeys(f'{key}.{path}' for path in get_keys(value)))
elif isinstance(container, list):
keys.update(dict.fromkeys(chain.from_iterable(map(get_keys, container))))
return list(keys)
Demo: https://replit.com/#blhsing/OpenGoldenIntegrationtesting
I have the following dict:
{
"a": "b",
"c": {'d':'e', 'g':'f'}
}
and I want to flatten the dict, but in this way:
{
"a": "b",
'd':'e',
'g':'f'
}
You can assume there is no duplicate key.
I read about flatten_dict library but it looks there is no support in this.
How can I do this?
Thank you!
For one level of nesting, a nice loop does the trick:
result = {}
for key, value in my_dict.items():
if isinstance(value, dict):
result.update(value) # add subdict directly into the dict
else:
result[key] = value # non-subdict elements are just copied
If you have more nesting, the if/else should be executed recursively:
def flatten(my_dict):
result = {}
for key, value in my_dict.items():
if isinstance(value, dict):
result.update(flatten(value))
else:
result[key] = value
return result
You can use a modified version of this answer:
from collections.abc import MutableMapping
def flatten(d):
items = []
for k, v in d.items():
if isinstance(v, MutableMapping):
items.extend(flatten(v).items())
else:
items.append((k, v))
return dict(items)
>>> x = {
... "a": "b",
... "c": {'d': 'e', 'g': 'f'}
... }
>>> flatten(x)
{'a': 'b', 'd': 'e', 'g': 'f'}
This will work for all forms of any dictionary
I can't wrap my head around this problem! I'm using the following code to get a JSON file into a readable format in python:
jsonDict = json.loads(jsonFile.read())
x = self.searchJSON('search_term', jsonDict)
And then using this recursive function to loop through the dictionary to find the key I need.
def searchJSON(self, searchTerm, di=None):
for k in di:
if k == searchTerm:
return di[k]
if isinstance(k, dict):
self.searchJSON(searchTerm, k)
elif isinstance(k, list):
self.searchJSON(searchTerm, k[0])
elif isinstance(di[k], list) or isinstance(di[k], dict):
self.searchJSON(searchTerm, di[k])
Using this, I can find the value that I'm looking for - no problem.. but I need to return the value back outside the function.
When I add returns:
def searchJSON(self, searchTerm, di=None):
for k in di:
if k == searchTerm:
return di[k]
if isinstance(k, dict):
return self.searchJSON(searchTerm, k)
elif isinstance(k, list):
return self.searchJSON(searchTerm, k[0])
elif isinstance(di[k], list) or isinstance(di[k], dict):
return self.searchJSON(searchTerm, di[k])
The function stop running when it runs into an empty list. I spent a full day trying to figure this out, and this is the closest I've been able to get to returning the value.
Try this out (Updated):
def searchJSON(self, searchTerm, di=None):
for k in di:
if k == searchTerm:
return di[k]
if isinstance(k, dict):
return self.searchJSON(searchTerm, k)
elif isinstance(k, list):
if k:
return self.searchJSON(searchTerm, k[0])
else:
pass
elif isinstance(di[k], list) or isinstance(di[k], dict):
if di[k]:
return self.searchJSON(searchTerm, di[k])
else:
pass
This is Python 3.5 environment, I think the code is self explanatory, here it is, I am expecting both functions to work, but only one is correct.
TLDR:
Assigning out = {**out, **answer} makes out variable containing right keys, but they are lost during the next 2 steps; while assigning c = {**c,**a} works perfectly in test function, and new keys are not lost.
Could someone please explain what I am doing wrong?
def flatify_works(d, out, fhook=None):
for k, v in d.items():
if not isinstance(v, dict) and not isinstance(v, list):
out[k] = v
elif isinstance(v, dict):
flatify_works(v, out, fhook)
elif isinstance(v, list):
if fhook is None:
raise AssertionError("an array with more than 1 elment found.")
answer = fhook(k, v)
for k, v in answer.items():
out[k] = v
def flatify_doesnt_work(d, out, fhook=None):
for k, v in d.items():
if not isinstance(v, dict) and not isinstance(v, list):
out[k] = v
elif isinstance(v, dict):
flatify_doesnt_work(v, out, fhook)
elif isinstance(v, list):
if fhook is None:
raise AssertionError("an array with more than 1 elment found.")
answer = fhook(k, v)
out = {**out, **answer} # put a breakpoint here, and go 2 steps further
def hook(k, v):
l = [d["c"] for d in v]
return {"c": sum(l), "d": "blabla"}
def test_merge_dicts():
a = {"a": 1, "b": 2}
c = {"c": 3}
c = {**c, **a} # merging works perfectly here
print(c)
assert "a" in c and "b" in c and "c" in c # ok
def test_nested_works():
out = {}
flatify_works({"a": 1, "b": [{"c": 0.6, "d": 4}, {"c": 0.4, "d": 4}]}, out, hook)
print("working exemple: {}".format(str(out)))
def test_nested_doesnt_work():
out = {}
flatify_doesnt_work({"a": 1, "b": [{"c": 0.6, "d": 4}, {"c": 0.4, "d": 4}]}, out, hook)
print("not working exemple: {}".format(str(out)))
def main():
test_merge_dicts() # ok
test_nested_works() # ok
test_nested_doesnt_work() # why out = {**out, **answer} is not working as expected?
if __name__ == '__main__':
main()
I think the difference is that in the second function, you reassign the out variable, after which it is detached from the dict that you pass into the function:
out = {**out, **answer} # out is now a different object
Since the function is recursive, the subsequent modifications do not affect the original dict.
In the first function, though, you only do item assignment, which modifies the dict in place, as expected:
out[k] = v
Assuming that I have the following dictionary:
a={'brother': {'name': 'paskal', 'surname': 'leonis'},
'family': {'parents': {'father': 'telis',
'mother': 'xrisanthi',
'name': 'dimitris'}},
'name': 'alekos'}
And the desired output is to get a list:
[['paskal',1],['dimitris',2],['alekos',0]]
So what I want is to get the values of all the keys that have the name with the level that they key was found(starting with zero)
Until now I have succesufully got the values but the levels that I found are incorrect. I am using the following code:
from six import iteritems
def findKey_printValue(key, document,c=0):
if isinstance(document, dict):
for k, v in iteritems(document):
if k == key:
yield [v,c]
elif isinstance(v, dict):
c+=1
for result in findKey_printValue(key, v,c):
yield result
In [125]:list(findKey_printValue('name',a))
Out[125]:[['dimitris', 2], ['paskal', 2], ['alekos', 2]]
Any help?
You need to make sure the c variable only increases as you go down a level. When you run back up the stack, c should not have changed.
Modify this :
c+=1
for result in findKey_printValue(key, v,c):
yield result
to this:
for result in findKey_printValue(key, v,c+1):
yield result
Don't update the value of c in the current function itself, just do it in the call to the recursion:
elif isinstance(v, dict):
for result in findKey_printValue(key, v, c+1):
yield result
Do not pass c, but the increment of the value (else all your results have a reference to c):
from six import iteritems
def findKey_printValue(key, document,c=0):
if isinstance(document, dict):
for k, v in iteritems(document):
if k == key:
yield (v,c)
elif isinstance(v, dict):
for result in findKey_printValue(key, v, c + 1):
yield result
a={'brother': {'name': 'paskal', 'surname': 'leonis'},
'family': {'parents': {'father': 'telis',
'mother': 'xrisanthi',
'name': 'dimitris'}},
'name': 'alekos'}
holder = {}
def get(d, count):
if isinstance(d, dict):
for key, val in d.items():
if isinstance(val, dict):
get(val, count+1)
elif key=='name':
holder[val]=count
get(a, 0)