This is Python 3.5 environment, I think the code is self explanatory, here it is, I am expecting both functions to work, but only one is correct.
TLDR:
Assigning out = {**out, **answer} makes out variable containing right keys, but they are lost during the next 2 steps; while assigning c = {**c,**a} works perfectly in test function, and new keys are not lost.
Could someone please explain what I am doing wrong?
def flatify_works(d, out, fhook=None):
for k, v in d.items():
if not isinstance(v, dict) and not isinstance(v, list):
out[k] = v
elif isinstance(v, dict):
flatify_works(v, out, fhook)
elif isinstance(v, list):
if fhook is None:
raise AssertionError("an array with more than 1 elment found.")
answer = fhook(k, v)
for k, v in answer.items():
out[k] = v
def flatify_doesnt_work(d, out, fhook=None):
for k, v in d.items():
if not isinstance(v, dict) and not isinstance(v, list):
out[k] = v
elif isinstance(v, dict):
flatify_doesnt_work(v, out, fhook)
elif isinstance(v, list):
if fhook is None:
raise AssertionError("an array with more than 1 elment found.")
answer = fhook(k, v)
out = {**out, **answer} # put a breakpoint here, and go 2 steps further
def hook(k, v):
l = [d["c"] for d in v]
return {"c": sum(l), "d": "blabla"}
def test_merge_dicts():
a = {"a": 1, "b": 2}
c = {"c": 3}
c = {**c, **a} # merging works perfectly here
print(c)
assert "a" in c and "b" in c and "c" in c # ok
def test_nested_works():
out = {}
flatify_works({"a": 1, "b": [{"c": 0.6, "d": 4}, {"c": 0.4, "d": 4}]}, out, hook)
print("working exemple: {}".format(str(out)))
def test_nested_doesnt_work():
out = {}
flatify_doesnt_work({"a": 1, "b": [{"c": 0.6, "d": 4}, {"c": 0.4, "d": 4}]}, out, hook)
print("not working exemple: {}".format(str(out)))
def main():
test_merge_dicts() # ok
test_nested_works() # ok
test_nested_doesnt_work() # why out = {**out, **answer} is not working as expected?
if __name__ == '__main__':
main()
I think the difference is that in the second function, you reassign the out variable, after which it is detached from the dict that you pass into the function:
out = {**out, **answer} # out is now a different object
Since the function is recursive, the subsequent modifications do not affect the original dict.
In the first function, though, you only do item assignment, which modifies the dict in place, as expected:
out[k] = v
Related
I have the following dict:
{
"a": "b",
"c": {'d':'e', 'g':'f'}
}
and I want to flatten the dict, but in this way:
{
"a": "b",
'd':'e',
'g':'f'
}
You can assume there is no duplicate key.
I read about flatten_dict library but it looks there is no support in this.
How can I do this?
Thank you!
For one level of nesting, a nice loop does the trick:
result = {}
for key, value in my_dict.items():
if isinstance(value, dict):
result.update(value) # add subdict directly into the dict
else:
result[key] = value # non-subdict elements are just copied
If you have more nesting, the if/else should be executed recursively:
def flatten(my_dict):
result = {}
for key, value in my_dict.items():
if isinstance(value, dict):
result.update(flatten(value))
else:
result[key] = value
return result
You can use a modified version of this answer:
from collections.abc import MutableMapping
def flatten(d):
items = []
for k, v in d.items():
if isinstance(v, MutableMapping):
items.extend(flatten(v).items())
else:
items.append((k, v))
return dict(items)
>>> x = {
... "a": "b",
... "c": {'d': 'e', 'g': 'f'}
... }
>>> flatten(x)
{'a': 'b', 'd': 'e', 'g': 'f'}
This will work for all forms of any dictionary
I have a dictionary
myDict = {'a': [1, 2, 3, 'cow', {'b': 23}],
'c': 25
'd': [4,5,6]}
And I want to create a function that would get the values of the keys. This is my code so far:
def getValueOf(key, myDict):
for d in myDict:
if key in d:
return d[key]
This code works only for some dictionaries. I can't seem to wrap my head around another solution.
This is my expected output:
getValueOf('b', myDict) ==> 23
getValueOf('d', myDict) ==> [4,5,6]
getValueOf('a', myDict) ==> [1, 2, 3, 'cow', {'b': 23}]
Any help would be greatly appreciated!
Here's an inefficient solution using recursion that goes into lists and dicts:
def find(d, needle):
if isinstance(d, dict):
for key in d:
value = d[key]
if key == needle:
return value
else:
ret = find(value, needle)
if ret is not None:
return ret
elif isinstance(d, list):
for value in d:
ret = find(value, needle)
if ret is not None:
return ret
return None
Here's a simple example:
D = {
"a": 2,
"b": [1,5,9,2,4, [5, 6, {"g": "pouet"}]],
"c": {"d": "hello", "e": {"f": "world"}}}
print(find(D, "d"), find(D, "f"))
print(find(D, "g"))
which returns:
hello world
pouet
This can be considered incorrect, since None could be a valid value.
To correctly deal with this possibility, consider this instead:
def find(d, needle):
if isinstance(d, dict):
for key in d:
value = d[key]
if key == needle:
return value
else:
try:
return find(value, needle)
except RuntimeError:
pass
elif isinstance(d, list):
for value in d:
try:
return find(value, needle)
except RuntimeError:
pass
raise RuntimeError
def getValueOf(key,input_dict):
if key in input_dict :
return input_dict[key]
for k in input_dict :
current_value = input_dict[k]
if type(current_value) == list:
for item in current_value :
if type(item)==dict :
return getValueOf(key,item)
elif type(current_value) == dict:
return getValueOf(key,current_value)
return None
i made this slightly different version, maybe it's easier to understand
Yet another, arguably, simple version.
def get_value(findkey, elements):
isdict = type(elements) == dict
found = None
for val in elements:
if isdict and val == findkey:
return elements[val]
val = elements[val] if isdict else val
if type(val) in [list, dict]:
found = get_value(findkey, val)
if found:
return found
Results
myDict = {
"a": 2,
"b": [1, 5, 9, 2, 4, [5, 6, {"g": "pouet"}]],
"c": {"d": "hello", "e": {"f": "world"}},
"h": None,
}
{c: get_value(c, myDict) for c in list('abcdefgh')}
>>>
{'a': 2,
'b': [1, 5, 9, 2, 4, [5, 6, {'g': 'pouet'}]],
'c': {'d': 'hello', 'e': {'f': 'world'}},
'd': 'hello',
'e': {'f': 'world'},
'f': 'world',
'g': 'pouet',
'h': None}
There are many questions about this problem, but in my case they are not working. I'm trying to find a nested dictionary given a target key and value pair. My recursive function returned none (after fix, max depth recursive error).
def recursive_lookup(k, sv, d):
if k in d: return d[k]
for v in d.values():
if isinstance(v, dict):
a = recursive_lookup(k, sv, v)
if a == sv:
if a is not None:
return d
return None
def run():
maly = {'_id': "ObjectId('5def7e8c4802b906dd067f97')", 'METADATA': {'Tags': {'AcquisitionTime': '2019-02-05T15:59:37.5862118Z', 'ImageScaling': {'ImageScaling': {'ImagePixelSize': '4.54,4.54'}}, 'DetectorState': {'CameraState': {'ApplyCameraProfile': 'false', 'ApplyImageOrientation': 'true', 'ExposureTime': '2200000', 'Frame': '0,0,2752,2208', 'ImageOrientation': '3'}}, 'StageXPosition': '+000000141526.5820', 'StageYPosition': '+000000189329.5000', 'FocusPosition': '+000000002097.2550', 'RoiCenterOffsetX': '+000000000000.0000', 'RoiCenterOffsetY': '+000000000000.0000'}, 'DataSchema': None, 'AttachmentSchema': None}}
returned_value = recursive_lookup("FocusPosition", "+000000002097.2550", maly)
print(returned_value)
run()
If I change return d to recursive_lookup(k, sv, d) it is also not working.
It should return the maly dictionary, but it returned None.
How can I fix that problem?
This is the right idea, but a matched result isn't being passed up the call stack correctly. You can also simplify logic by checking key and value on the same call frame--this should also eliminate a bug where the target key-value are on the top level of the dict (there's no previous frame to fall back on to check the value).
def recursive_lookup(target_key, target_val, dictionary):
if target_key in dictionary and dictionary[target_key] == target_val:
return dictionary
for value in dictionary.values():
if isinstance(value, dict):
if result := recursive_lookup(target_key, target_val, value):
return result
if __name__ == "__main__":
maly = {'_id': "ObjectId('5def7e8c4802b906dd067f97')", 'METADATA': {'Tags': {'AcquisitionTime': '2019-02-05T15:59:37.5862118Z', 'ImageScaling': {'ImageScaling': {'ImagePixelSize': '4.54,4.54'}}, 'DetectorState': {'CameraState': {'ApplyCameraProfile': 'false', 'ApplyImageOrientation': 'true', 'ExposureTime': '2200000', 'Frame': '0,0,2752,2208', 'ImageOrientation': '3'}}, 'StageXPosition': '+000000141526.5820', 'StageYPosition': '+000000189329.5000', 'FocusPosition': '+000000002097.2550', 'RoiCenterOffsetX': '+000000000000.0000', 'RoiCenterOffsetY': '+000000000000.0000'}, 'DataSchema': None, 'AttachmentSchema': None}}
print(recursive_lookup("FocusPosition", "+000000002097.2550", maly))
Here's a more-easily verifiable version that uses a simple dictionary and doesn't use the 3.8 assignment expression:
def recursive_lookup(target_key, target_val, dictionary):
if target_key in dictionary and dictionary[target_key] == target_val:
return dictionary
for value in dictionary.values():
if isinstance(value, dict):
result = recursive_lookup(target_key, target_val, value)
if result: return result
if __name__ == "__main__":
dictionary = {
"a": "foo",
"b": {
"c": "bar",
"d": "baz",
"e": {
"f": "quux",
"g": "garply"
}
}
}
print(recursive_lookup("c", "bar", dictionary)) # => {'c': 'bar', 'd': 'baz', 'e': {'f': 'quux', 'g': 'garply'}}
print(recursive_lookup("g", "garply", dictionary)) # => {'f': 'quux', 'g': 'garply'}
This sample code performs a recursive search in a hierarchy of dictionaries. So I guess it may correspond to what you are looking for:
def rec_search(key, dic):
if key in dic:
return dic[key]
for d in dic.values():
if isinstance(d, dict):
val = rec_search(key, d)
if val is not None: return val
return None
maly = {1:'a',
2:'b',
3:{4:'d',
5:'e',
6:{7:'g',
8:'h'}
},
9:{10:'i',
11:'j',
12:{13:'l',
14:'m'}
}
}
print(rec_search(2,maly)) # --> 'b'
print(rec_search(7,maly)) # --> 'g'
print(rec_search(10,maly)) # --> 'i'
print(rec_search(15,maly)) # --> None
EDIT: Corrected code after Sylwester's comment
i think the problem is when you call recursive_search() for the second time
it just keeps looking for sv in the same dictionary which is maly and doesn't search deeper inside the rest that's why it returns None
I want to get a list of all keys in a nested dictionary that contains lists and dictionaries.
I currently have this code, but it seems to be missing adding some keys to the list and also duplicate adds some keys.
keys_list = []
def get_keys(d_or_l, keys_list):
if isinstance(d_or_l, dict):
for k, v in iter(sorted(d_or_l.iteritems())):
if isinstance(v, list):
get_keys(v, keys_list)
elif isinstance(v, dict):
get_keys(v, keys_list)
else:
keys_list.append(k)
elif isinstance(d_or_l, list):
for i in d_or_l:
if isinstance(i, list):
get_keys(i, keys_list)
elif isinstance(i, dict):
get_keys(i, keys_list)
else:
print "** Skipping item of type: {}".format(type(d_or_l))
return keys_list
This just takes an empty list and populates it with the keys. d_or_l is a variable and takes the original dict to compare it against.
This should do the job:
def get_keys(dl, keys_list):
if isinstance(dl, dict):
keys_list += dl.keys()
map(lambda x: get_keys(x, keys_list), dl.values())
elif isinstance(dl, list):
map(lambda x: get_keys(x, keys_list), dl)
To avoid duplicates you can use set, e.g.:
keys_list = list( set( keys_list ) )
Example test case:
keys_list = []
d = {1: 2, 3: 4, 5: [{7: {9: 1}}]}
get_keys(d, keys_list)
print keys_list
>>>> [1, 3, 5, 7, 9]
As it stands, your code ignores keys that lead to list or dict values. Remove the else block in your first for loop, you want to add the key no matter what the value is.
keys_list = []
def get_keys(d_or_l, keys_list):
if isinstance(d_or_l, dict):
for k, v in iter(sorted(d_or_l.iteritems())):
if isinstance(v, list):
get_keys(v, keys_list)
elif isinstance(v, dict):
get_keys(v, keys_list)
keys_list.append(k) # Altered line
elif isinstance(d_or_l, list):
for i in d_or_l:
if isinstance(i, list):
get_keys(i, keys_list)
elif isinstance(i, dict):
get_keys(i, keys_list)
else:
print "** Skipping item of type: {}".format(type(d_or_l))
return keys_list
get_keys({1: 2, 3: 4, 5: [{7: {9: 1}}]}, keys_list) returns [1, 3, 9, 7, 5]
To avoid duplication, you could use a set datatype instead of a list.
Here is a simple solution:
def get_nested_keys(d, keys):
for k, v in d.items():
if isinstance(v, dict):
get_nested_keys(v, keys)
else:
keys.append(k)
keys_list = []
get_nested_keys(test_listing, keys_list)
print(keys_list)
If you want to know the hierarchy of the keys as well, you can modify the function like so:
def get_nested_keys(d, keys, prefix):
for k, v in d.items():
if isinstance(v, dict):
get_nested_keys(v, keys, f'{prefix}:{k}')
else:
keys.append(f'{prefix}:{k}')
I would extend #pm007 answer by a python 2 & 3 friendly version:
def get_keys(dl, keys=None):
keys = keys or []
if isinstance(dl, dict):
keys += dl.keys()
_ = [get_keys(x, keys) for x in dl.values()]
elif isinstance(dl, list):
_ = [get_keys(x, keys) for x in dl]
return list(set(keys))
d = {1: 2, 3: 4, 5: {7: {1: 1}}}
get_keys(d)
Furthermore this feels more convenient as you get a function that returns the desired keys instead of magically altering a list
Updating #MackM's response to Python 3 as dict.iteritems has been deprecated (and I prefer to use f-strings over the .format{} styling):
keys_list = []
def get_keys(d_or_l, keys_list):
if isinstance(d_or_l, dict):
for k, v in iter(sorted(d_or_l.items())): # Altered line to update deprecated method
if isinstance(v, list):
get_keys(v, keys_list)
elif isinstance(v, dict):
get_keys(v, keys_list)
keys_list.append(k)
elif isinstance(d_or_l, list):
for i in d_or_l:
if isinstance(i, list):
get_keys(i, keys_list)
elif isinstance(i, dict):
get_keys(i, keys_list)
else:
print(f'** Skipping item of type: {type(d_or_l)}') # Altered line to use f-strings
return keys_list
unique_keys = list(set(get_keys(my_json_dict, keys_list))) # Added line as example use case
Supposing we have this dict:
d = {'a':1, 'b': {'c':{}}}
What would be the most straightforward way of knowing the nesting depth of it?
You need to create a recursive function:
>>> def depth(d):
... if isinstance(d, dict):
... return 1 + (max(map(depth, d.values())) if d else 0)
... return 0
...
>>> d = {'a':1, 'b': {'c':{}}}
>>> depth(d)
3
You'll have to traverse the dictionary. You could do so with a queue; the following should be safe from circular references:
from collections import deque
def depth(d):
queue = deque([(id(d), d, 1)])
memo = set()
while queue:
id_, o, level = queue.popleft()
if id_ in memo:
continue
memo.add(id_)
if isinstance(o, dict):
queue += ((id(v), v, level + 1) for v in o.values())
return level
Note that because we visit all dictionary values in breath-first order, the level value only ever goes up. The memo set is used to ensure we don't try to traverse a circular reference, endlessly.
Or you could traverse the tree with recursion (which effectively uses function calls as a stack). I've used functools.singledispatch() for easy expansion to other container types:
from functools import singledispatch, wraps
#singledispatch
def depth(_, _level=1, _memo=None):
return _level
def _protect(f):
"""Protect against circular references"""
#wraps(f)
def wrapper(o, _level=1, _memo=None, **kwargs):
_memo, id_ = _memo or set(), id(o)
if id_ in _memo: return _level
_memo.add(id_)
return f(o, _level=_level, _memo=_memo, **kwargs)
return wrapper
def _protected_register(cls, func=None, _orig=depth.register):
"""Include the _protect decorator when registering"""
if func is None and isinstance(cls, type):
return lambda f: _orig(cls, _protect(f))
return _orig(cls, _protect(func)) if func is not None else _orig(_protect(cls))
depth.register = _protected_register
#depth.register
def _dict_depth(d: dict, _level=1, **kw):
return max(depth(v, _level=_level + 1, **kw) for v in d.values())
This is as depth-first search, so now max() is needed to pick the greatest depth for the current object under scrutiny at each level. A dictionary with 3 keys of each different depths should reflect the greatest depth at that level.
The memo set used in either version tracks object ids, so we don't run is circles if you did something like foo = {}; foo["bar"] = foo.
Demo:
>>> d = {'a':1, 'b': {'c':{}}}
>>> depth(d)
3
>>> d = {'foo': {'bar': {'baz': 0}, 'spam': {'ham': {'monty': 1}, 'eric': 'idle'}}, 'john': 'cleese'}
>>> depth(d)
5
>>> circular = {}
>>> circular["self"] = circular
>>> depth(circular)
2
The recursive singledispatch version can be expanded to cover more containers, such as lists:
#depth.register
def _list_depth(l: list, _level=1, **kw):
return max(depth(v, _level=_level + 1, **kw) for v in l)
Because I've augmented the standard .register decorator to handle circular-reference testing, implementing additional container support is relatively trivial. Just remember to pass along any extra keyword arguments to the recursive call!
A non-recursive solution:
def depth(d):
depth=0
q = [(i, depth+1) for i in d.values() if isinstance(i, dict)]
max_depth = 0
while (q):
n, depth = q.pop()
max_depth = max(max_depth, depth)
q = q + [(i, depth+1) for i in n.values() if isinstance(i, dict)]
print max_depth
Iterative solution:
from collections import deque
def depth(d):
q = deque([d])
q2 = deque()
max_depth = 0
while q:
curr_dict = q.popleft()
if isinstance(curr_dict, dict):
for di in curr_dict.itervalues():
q2.append(di)
if not q:
q, q2 = q2, q
max_depth += 1
return max_depth
print depth(None)
print depth({})
print depth({"a": "b"})
print depth({"a": "b", "c": {"d": "e"}, "f": {"g": "h"}, "i": {"j": "k"}, "x": {}, "z": {} })
print depth({'a':1, 'b': {'c':{}}})
print depth({'foo': {'bar': {'baz': 0}, 'spam': {'ham': {'monty': 1}, 'eric': 'idle'}}, 'john': 'cleese'})