I have a nested dictionary object and I want to be able to retrieve values of keys with an arbitrary depth. I'm able to do this by subclassing dict:
>>> class MyDict(dict):
... def recursive_get(self, *args, **kwargs):
... default = kwargs.get('default')
... cursor = self
... for a in args:
... if cursor is default: break
... cursor = cursor.get(a, default)
... return cursor
...
>>> d = MyDict(foo={'bar': 'baz'})
>>> d
{'foo': {'bar': 'baz'}}
>>> d.get('foo')
{'bar': 'baz'}
>>> d.recursive_get('foo')
{'bar': 'baz'}
>>> d.recursive_get('foo', 'bar')
'baz'
>>> d.recursive_get('bogus key', default='nonexistent key')
'nonexistent key'
However, I don't want to have to subclass dict to get this behavior. Is there some built-in method that has equivalent or similar behavior? If not, are there any standard or external modules that provide this behavior?
I'm using Python 2.7 at the moment, though I would be curious to hear about 3.x solutions as well.
A very common pattern to do this is to use an empty dict as your default:
d.get('foo', {}).get('bar')
If you have more than a couple of keys, you could use reduce (note that in Python 3 reduce must be imported: from functools import reduce) to apply the operation multiple times
reduce(lambda c, k: c.get(k, {}), ['foo', 'bar'], d)
Of course, you should consider wrapping this into a function (or a method):
def recursive_get(d, *keys):
return reduce(lambda c, k: c.get(k, {}), keys, d)
#ThomasOrozco's solution is correct, but resorts to a lambda function, which is only necessary to avoid TypeError if an intermediary key does not exist. If this isn't a concern, you can use dict.get directly:
from functools import reduce
def get_from_dict(dataDict, mapList):
"""Iterate nested dictionary"""
return reduce(dict.get, mapList, dataDict)
Here's a demo:
a = {'Alice': {'Car': {'Color': 'Blue'}}}
path = ['Alice', 'Car', 'Color']
get_from_dict(a, path) # 'Blue'
If you wish to be more explicit than using lambda while still avoiding TypeError, you can wrap in a try / except clause:
def get_from_dict(dataDict, mapList):
"""Iterate nested dictionary"""
try:
return reduce(dict.get, mapList, dataDict)
except TypeError:
return None # or some other default value
Finally, if you wish to raise KeyError when a key does not exist at any level, use operator.getitem or dict.__getitem__:
from functools import reduce
from operator import getitem
def getitem_from_dict(dataDict, mapList):
"""Iterate nested dictionary"""
return reduce(getitem, mapList, dataDict)
# or reduce(dict.__getitem__, mapList, dataDict)
Note that [] is syntactic sugar for the __getitem__ method. So this relates precisely how you would ordinarily access a dictionary value. The operator module just provides a more readable means of accessing this method.
You can actually achieve this really neatly in Python 3, given its handling of default keyword arguments and tuple decomposition:
In [1]: def recursive_get(d, *args, default=None):
...: if not args:
...: return d
...: key, *args = args
...: return recursive_get(d.get(key, default), *args, default=default)
...:
Similar code will also work in python 2, but you'd need to revert to using **kwargs, as you did in your example. You'd also need to use indexing to decompose *args.
In any case, there's no need for a loop if you're going to make the function recursive anyway.
You can see that the above code demonstrates the same functionality as your existing method:
In [2]: d = {'foo': {'bar': 'baz'}}
In [3]: recursive_get(d, 'foo')
Out[3]: {'bar': 'baz'}
In [4]: recursive_get(d, 'foo', 'bar')
Out[4]: 'baz'
In [5]: recursive_get(d, 'bogus key', default='nonexistent key')
Out[5]: 'nonexistent key'
You can use a defaultdict to give you an empty dict on missing keys:
from collections import defaultdict
mydict = defaultdict(dict)
This only goes one level deep - mydict[missingkey] is an empty dict, mydict[missingkey][missing key] is a KeyError. You can add as many levels as needed by wrapping it in more defaultdicts, eg defaultdict(defaultdict(dict)). You could also have the innermost one as another defaultdict with a sensible factory function for your use case, eg
mydict = defaultdict(defaultdict(lambda: 'big summer blowout'))
If you need it to go to arbitrary depth, you can do that like so:
def insanity():
return defaultdict(insanity)
print(insanity()[0][0][0][0])
There is none that I am aware of. However, you don't need to subclass dict at all, you can just write a function that takes a dictionary, args and kwargs and does the same thing:
def recursive_get(d, *args, **kwargs):
default = kwargs.get('default')
cursor = d
for a in args:
if cursor is default: break
cursor = recursive_get(cursor, a, default)
return cursor
use it like this
recursive_get(d, 'foo', 'bar')
The OP requested the following behavior
>>> d.recursive_get('bogus key', default='nonexistent key')
'nonexistent key'
(As of June 15, 22022) none of the up-voted answers accomplish this, so I have modified #ThomasOrozco's solution to resolve this
from functools import reduce
def rget(d, *keys, default=None):
"""Use a sentinel to handle both missing keys AND alternate default values"""
sentinel = {}
v = reduce(lambda c, k: c.get(k, sentinel), keys, d)
if v is sentinel:
return default
return v
Below is a complete, unit-test-like demonstration of where the other answers have issues. I've named each approach according to its author. Note that this answer is the only one which passes all 4 test cases, namely
Basic retrieval when key-tree exists
Non-existent key-tree returns None
Option to specify a default aside from None
Values which are an empty dict should return as themselves rather than the default
from functools import reduce
def thomas_orozco(d, *keys):
return reduce(lambda c, k: c.get(k, {}), keys, d)
def jpp(dataDict, *mapList):
"""Same logic as thomas_orozco but exits at the first missing key instead of last"""
try:
return reduce(dict.get, *mapList, dataDict)
except TypeError:
return None
def sapi(d, *args, default=None):
if not args:
return d
key, *args = args
return sapi(d.get(key, default), *args, default=default)
def rget(d, *keys, default=None):
sentinel = {}
v = reduce(lambda c, k: c.get(k, sentinel), keys, d)
if v is sentinel:
return default
return v
def assert_rget_behavior(func):
"""Unit tests for desired behavior of recursive dict.get()"""
fail_count = 0
# Basic retrieval when key-tree exists
d = {'foo': {'bar': 'baz', 'empty': {}}}
try:
v = func(d, 'foo', 'bar')
assert v == 'baz', f'Unexpected value {v} retrieved'
except Exception as e:
print(f'Case 1: Failed basic retrieval with {repr(e)}')
fail_count += 1
# Non-existent key-tree returns None
try:
v = func(d, 'bogus', 'key')
assert v is None, f'Missing key retrieved as {v} instead of None'
except Exception as e:
print(f'Case 2: Failed missing retrieval with {repr(e)}')
fail_count += 1
# Option to specify a default aside from None
default = 'alternate'
try:
v = func(d, 'bogus', 'key', default=default)
assert v == default, f'Missing key retrieved as {v} instead of {default}'
except Exception as e:
print(f'Case 3: Failed default retrieval with {repr(e)}')
fail_count += 1
# Values which are an empty dict should return as themselves rather than the default
try:
v = func(d, 'foo', 'empty')
assert v == {}, f'Empty dict value retrieved as {v} instead of {{}}'
except Exception as e:
print(f'Case 4: Failed retrieval of empty dict value with {repr(e)}')
fail_count += 1
# Success only if all pass
if fail_count == 0:
print('Passed all tests!')
if __name__ == '__main__':
assert_rget_behavior(thomas_orozco) # Fails cases 2 and 3
assert_rget_behavior(jpp) # Fails cases 1, 3, and 4
assert_rget_behavior(sapi) # Fails cases 2 and 3
assert_rget_behavior(rget) # Only one to pass all 3
collections.default_dict will handle the providing of default values for nonexistent keys at least.
The Iterative Solution
def deep_get(d:dict, keys, default=None, create=True):
if not keys:
return default
for key in keys[:-1]:
if key in d:
d = d[key]
elif create:
d[key] = {}
d = d[key]
else:
return default
key = keys[-1]
if key in d:
return d[key]
elif create:
d[key] = default
return default
def deep_set(d:dict, keys, value, create=True):
assert(keys)
for key in keys[:-1]:
if key in d:
d = d[key]
elif create:
d[key] = {}
d = d[key]
d[keys[-1]] = value
return value
I am about to test it inside of a Django project with a line such as:
keys = ('options', 'style', 'body', 'name')
val = deep_set(d, keys, deep_get(s, keys, 'dotted'))
Related
There are times when it's necessary to override a dictionary member that might already exist, execute arbitrary code (such as a callback, which could fail) then set the value back to it's previous state (which includes not being present).
Keeping a copy of the dictionary isn't an option since this dictionary might have other members modified by the callback (which I want to keep).
How should a dictionary item be overridden temporarily?
Here is a very straight-forward implementation:
import contextlib
#contextlib.contextmanager
def temp_item(dictionary, key, value):
empty = object()
original = dictionary.get(key, empty)
dictionary[key] = value
try:
yield dictionary
finally:
if original is empty:
dictionary.pop(key, None)
else:
dictionary[key] = original
This can be used as follows:
d = {'a':1, 'b':2}
with temp_item(d, 'c', '3') as d:
d['d'] = 4
d['a'] = 10
print(d['c'])
print(d)
Which outputs:
3
{'a': 10, 'b': 2, 'd': 4}
unittest.mock provides patch.dict
It can patch not only dictionaries, but objects that behave like them and also clear out the mock with or without mocking its contents
However, it's probably best to only rely on unittest in a test context
import unittest
from unittest.mock import patch
class TestWhatever(unittest.TestCase):
def test_dictionary_mocking(self):
with patch.dict("os.environ", {"RUNNING_AS_DOCKER": "true"}):
self.assertTrue(detect_docker())
with patch.dict("os.environ", clear=True):
self.assertFalse(detect_docker())
This can be done inline using a try/finally block.
# Set to any object you know the dictionary won't use as a value.
sentinel = object()
value_orig = mydict.get(key, sentinel)
mydict[key] = value_new
try:
run_callback()
finally:
if value_orig is sentinel:
# Use pop in case the callback added this key.
mydict.pop(key, None)
else:
mydict[key] = value_orig
Wrapped into a context manager that takes a dictionary as an argument (instead of a single key: value pair, for added flexibility):
class DictOverride:
__slots__ = ("dict_base", "items_override", "values_orig")
_sentinel = object()
def __init__(self, dict_base, dict_override):
sentinel = self._sentinel
self.items_override = tuple(dict_override.items())
self.values_orig = [
dict_base.get(key, sentinel)
for key, _ in self.items_override
]
self.dict_base = dict_base
def __enter__(self):
dict_base = self.dict_base
for key, value in self.items_override:
dict_base[key] = value
def __exit__(self, _type, _value, _traceback):
sentinel = self._sentinel
dict_base = self.dict_base
for (key, value), value_orig in zip(
self.items_override,
self.values_orig,
):
if value_orig is sentinel:
dict_base.pop(key)
else:
dict_base[key] = value_orig
# COntext manager test case
dct_test = {"eggs": "soft", "coconut": "hard"}
print("Original:", dct_test)
with DictOverride(dct_test, {"eggs": "hard"}):
print("Override:", dct_test)
print("Original:", dct_test, "(again)")
Which outputs:
Original: {'eggs': 'soft', 'coconut': 'hard'}
Override: {'eggs': 'hard', 'coconut': 'hard'}
Original: {'eggs': 'soft', 'coconut': 'hard'} (again)
I am developing a python utility. Part of this is to generate an index of files
Is it possible within python to access a multidimensional dictionary dynamically without knowing the depth.
ie if i had the example data:
example = {'main': {'2': {'2': '2-2', '1': '2-1'}, '1': {'2': '1-2', '1': '1-1'}}}
Is there a way i can access elements with something similar to
example["main","2","1"] and it return 2-1?
I am aware i can write my own diving algorithms but my experience shows you cannot then write to the value.
If you absolutely positively need to do it that way, you'll need to roll your own dict class. Luckily you can inherit everything but __getitem__ from dict.
class MyDict(dict):
def __getitem__(self, keys):
if isinstance(keys, str):
# this special-case saves you if you try to do normal indexing
# on a string.
return super().__getitem__(keys)
cur = self
for key in keys:
cur = cur.get(key, {})
# the default option here returns an empty dict instead
# of raising a KeyError. That might not be what you want
return cur
Note that this removes your ability to key by tuple, so key/vals like {("some", "tuple", "values"): "any value"} will be inaccessible unless specifically coded for. That might look something like...
...
for i, key in enumerate(keys):
if keys[i:] in cur:
return cur[keys[i:]]
cur = cur.get(key, {})
You can then cast your mapping to this new dict and search that way.
example = {'main': {'2': {'2': '2-2', '1': '2-1'}, '1': {'2': '1-2', '1': '1-1'}}}
result = MyDict2(example)['2', '2', '1']
You mention having to set values by this as well, in which case also inherit __setitem__.
class MyDict(dict):
def __getitem__(self, keys):
# as above
def __setitem__(self, keys, value):
if isinstance(keys, str):
super().__setitem__(keys, value)
cur = self
for key in keys[:-1]:
cur = cur.setdefault(key, {})
cur[keys[-1]] = value
You can also wrap the idea proposed by #Arya in a derived dict class, e.g.:
class ListAccess(dict):
def __getitem__(self, item):
if type(item) in [tuple,list]:
item = list(item)
ret = self
while True:
try:
ret = ret[item.pop(0)]
except IndexError:
break
return ret
else:
return super(ListAccess, self).__getitem__(item)
store = ListAccess({'main': {'2': {'2': '2-2', '1': '2-1'}, '1': {'2': '1-2', '1': '1-1'}}})
print store['main','2','1']
You can make a function like this:
def get_item(d, keys):
current = d
for k in keys:
current = current[k] # You can add some error handling here
return current
Example of usage: https://repl.it/E49o/1
If you want to modify the value at the last index, you can do something like this.
def set_item(d, keys, new_value):
current = d
for k in keys[:-1]: # All the keys except the last one
current = current[k]
current[keys[-1]] = new_value
I have a text file abc.txt:
abc/pqr/lmn/xyz:pass
abc/pqr/lmn/bcd:pass
I need to parse these statements and output should be in nested dictionary as below:
{'abc':{'pqr':{'lmn':{'xyz':{'pass':1},{'bcd':{'pass':1}}}}}}
where 1 is 'pass' count.
I'm able to do as much as this:
import re
d={}
p=re.compile('[a-zA-z]+')
for line in open('abc.txt'):
for key in p.findall(line):
d['key']={}
Check out the setdefault method on dictionaries.
d = {}
d.setdefault('pqr', {}).setdefault('lmn', {}).setdefault('xyz', {})['pass'] = 1
d.setdefault('pqr', {}).setdefault('lmn', {}).setdefault('bcd', {})['pass'] = 1
d
gives
{'pqr': {'lmn': {'bcd': {'pass': 1}, 'xyz': {'pass': 1}}}}
Here's an updated version of my answer in which leaves of the tree data-structure are now different from those in rest of it. Instead of the tree being strictly a dict-of-nested-dicts, the "leaves" on each branch are now instances of a different subclass of dict named collections.Counter which are useful for counting the number of times each of their keys occur. I did this because of your response to my question about what should happen if the last part of each line was something other than ":pass" (which was "we have to put new count for that key").
Nested dictionaries are often called Tree data-structures and can be defined recursively — the root is a dictionary as are the branches. The following uses a dict subclass instead of a plain dict because it makes constructing them easier since you don't need to special case the creation of the first branch of next level down (except I still do when adding the "leaves" because they are a different subclass, collections.Counter).
from collections import Counter
from functools import reduce
import re
# (Optional) trick to make Counter subclass print like a regular dict.
class Counter(Counter):
def __repr__(self):
return dict(self).__repr__()
# Borrowed from answer # https://stackoverflow.com/a/19829714/355230
class Tree(dict):
def __missing__(self, key):
value = self[key] = type(self)()
return value
# Utility functions based on answer # https://stackoverflow.com/a/14692747/355230
def nested_dict_get(nested_dict, keys):
return reduce(lambda d, k: d[k], keys, nested_dict)
def nested_dict_set(nested_dict, keys, value):
nested_dict_get(nested_dict, keys[:-1])[keys[-1]] = value
def nested_dict_update_count(nested_dict, keys):
counter = nested_dict_get(nested_dict, keys[:-1])
if counter: # Update existing Counter.
counter.update([keys[-1]])
else: # Create a new Counter.
nested_dict_set(nested_dict, keys[:-1], Counter([keys[-1]]))
d = Tree()
pat = re.compile(r'[a-zA-z]+')
with open('abc.txt') as file:
for line in file:
nested_dict_update_count(d, [w for w in pat.findall(line.rstrip())])
print(d) # Prints like a regular dict.
To test the leaf-counting capabilities of the revised code, I used the following test file which includes the same line twice, once ending again with :pass and another ending in :fail.
Expanded abc.txt test file:
abc/pqr/lmn/xyz:pass
abc/pqr/lmn/bcd:pass
abc/pqr/lmn/xyz:fail
abc/pqr/lmn/xyz:pass
Output:
{'abc': {'pqr': {'lmn': {'bcd': {'pass': 1}, 'xyz': {'fail': 1, 'pass': 2}}}}}
If i understand your question:
sources = ["abc/pqr/lmn/xyz:pass", "abc/pqr/lmn/bcd:pass", "abc/pqr/lmn/xyz:pass"]
def prepare_source(source):
path, value = source.split(':')
elements = path.split('/')
return elements, value
def add_key(elements, value):
result = dict()
if len(elements) > 1:
result[elements[0]] = add_key(elements[1:], value)
else:
result[elements[0]] = {value: 1}
return result
# base merge function get from here:
# http://stackoverflow.com/questions/7204805/dictionaries-of-dictionaries-merge
def merge(a, b, path=None):
"merges b into a"
if path is None: path = []
for key in b:
if key in a:
if isinstance(a[key], dict) and isinstance(b[key], dict):
merge(a[key], b[key], path + [str(key)])
elif isinstance(a[key], int) and isinstance(b[key], int):
a[key] += b[key]
else:
raise Exception('Conflict at %s' % '.'.join(path + [str(key)]))
else:
a[key] = b[key]
return a
result = dict()
for source in sources:
result = merge(result, add_key(*prepare_source(source)))
print result
Output will be:
{'abc': {'pqr': {'lmn': {'bcd': {'pass': 1}, 'xyz': {'pass': 2}}}}}
def function(varone=None, vartwo=None, varthree=None):
values = {}
if var1 is not None:
values['var1'] = varone
if var2 is not None:
values['var2'] = vartwo
if var3 is not None:
values['var3'] = varthree
if not values:
raise Exception("No values provided")
Can someone suggest a more elegant, pythonic way to accomplish taking placing non-null named variables and placing them in a dictionary? I do not want the values to be passed in as a dictionary. The key names of "values" are important and must be as they are. The value of "varone" must go into var1, "vartwo" must go into var2 and so on; Thanks.
You could use kwargs:
def function(*args, **kwargs):
values = {}
for k in kwargs:
if kwargs[k] is not None:
values[k] = kwargs[k]
if not values:
raise Exception("No values provided")
return values
>>> function(varone=None, vartwo="fish", varthree=None)
{'vartwo': 'fish'}
With this syntax, Python removes the need to explicitly specify any argument list, and allows functions to handle any old keyword arguments they want.
If you're specifically looking for keys var1 etc instead of varone you just modify the function call:
>>> function(var1=None, var2="fish", var3=None)
{'var2': 'fish'}
If you want to be REALLY slick, you can use list comprehensions:
def function(**kwargs):
values = dict([i for i in kwargs.iteritems() if i[1] != None])
if not values:
raise Exception("foo")
return values
Again, you'll have to alter your parameter names to be consistent with your output keys.
Use **kwargs. Example:
def function(**kwargs):
if not kwargs:
raise Exception("No values provided")
for k, v in kwargs.items():
print("%s: %r") % (k, v)
If you really are going to call function with None arguments, you can strip them out:
def function(**kwargs):
for k, v in kwargs.items():
if v is None:
del kwargs[k]
if not kwargs:
raise Exception("No values provided")
for k, v in kwargs.items():
print("%s: %r") % (k, v)
Obviously you could call the dict values instead, but kwargs is the conventional name, and will make your code more intelligible to other people.
Well, you can pass all those values inside a keyword argument: -
def function(*nkwargs, **kwargs):
values = {}
for k in kwargs:
if kwargs[k] is not None:
values[k] = kwargs[k]
if not values:
raise Exception("No values")
print values
try:
function()
except Exception, e:
print e
function(varOne=123, varTwo=None)
function(varOne=123, varTwo=234)
OUTPUT: -
No values
{'varOne': 123}
{'varOne': 123, 'varTwo': 234}
Call your function as usual, but accept as **kwargs. Then filter them:
def fn(**kwargs):
items = {'var%s' % i: v for i, (k, v) in enumerate(items)}
fn(a=1, b=2, c=3)
if you need a specific set of names, then make a dict of names:
names = dict(zip('varOne varTwo varThree'.split(), range(1, 4)))
walk over this dict and check if the var is in kwargs:
items = {'var%s' % k: kwargs[v] for k, v in names.items() if v in kwargs}
I am looking to create a simple nested "lookup" mechanism in python, and wanted to make sure there wasn't already something somewhere hidden in the vast libraries in python that doesn't already do this before creating it.
I am looking to take a dict that is formatted something like this
my_dict = {
"root": {
"secondary": {
"user1": {
"name": "jim",
"age": 24
},
"user2": {
"name": "fred",
"age": 25
}
}
}
}
and I am trying to have a way to access the data by using a decimal notation that would be something similar to
root.secondary.user2
and return that resulting dict back as a response. I am thinking that there must be something that does this and I could write one without much difficulty but I want to make sure I am not recreating something I might be missing from the documentation. Thanks
There's nothing in the standard library for this purpose, but it is rather easy to code this yourself:
>>> key = "root.secondary.user2"
>>> reduce(dict.get, key.split("."), my_dict)
{'age': 25, 'name': 'fred'}
This exploits the fact that the look-up for the key k in the dictionary d can be written as dict.get(d, k). Applying this iteratively using reduce() leads to the desired result.
Edit: For completeness three functions to get, set or delete dictionary keys using this method:
def get_key(my_dict, key):
return reduce(dict.get, key.split("."), my_dict)
def set_key(my_dict, key, value):
key = key.split(".")
my_dict = reduce(dict.get, key[:-1], my_dict)
my_dict[key[-1]] = value
def del_key(my_dict, key):
key = key.split(".")
my_dict = reduce(dict.get, key[:-1], my_dict)
del my_dict[key[-1]]
You can have that. You can subclass dict, add the key lookup (and even retain the name dict) by using code similar to the one below. The {...} form however will still use the builtin dict class (now called orig_dict), so you have to enclose it, like so: Dict({...}). This implementation recursively converts dictionaries to the new form, so you don't have to use the method above for any dictionary entries that are plain dictionaries themselves.
orig_dict = dict
class Dict(orig_dict):
def __init__(self, *args, **kwargs):
super(Dict, self).__init__(*args, **kwargs)
for k, v in self.iteritems():
if type(v) == orig_dict and not isinstance(v, Dict):
super(Dict, self).__setitem__(k, Dict(v))
def __getattribute__(self, k):
try: return super(Dict, self).__getattribute__(k)
except: return self.__getitem__(k)
def __setattr__(self, k, v):
if self.has_key(k): self.__setitem__(k, v)
else: return super(Dict, self).__setattr__(k, v)
def __delattr__(self, k):
try: self.__delitem__(k)
except: super(Dict, self).__delattr__(k)
def __setitem__(self, k, v):
toconvert = type(v) == orig_dict and not isinstance(v, Dict)
super(Dict, self).__setitem__(k, Dict(v) if toconvert else v)
# dict = Dict <-- you can even do this but I advise against it
# testing:
b = Dict(a=1, b=Dict(c=2, d=3))
c = Dict({'a': 1, 'b': {'c': 2, 'd': 3}})
d = Dict(a=1, b={'c': 2, 'd': {'e': 3, 'f': {'g': 4}}})
b.a = b.b
b.b = 1
d.b.d.f.g = 40
del d.b.d.e
d.b.c += d.b.d.f.g
c.b.c += c.a
del c.a
print b
print c
print d
Recursion still works.
def walk_into( dict, key ):
head, _, tail = key.partition('.')
if tail:
return walk_into( dict[head], tail )
return dict, key
d, k = walk_into( my_dict, "root.secondary.user2" )
d[k] can be used for getting or putting a new value.
I have a pretty complete implementation for this and some other stuff here. Repository here, trict.util combined with the __get__ method in trict.trict might have the stuff you need if you don't feel like installing it. Also it actually is in conda-forge even though the README might say otherwise if I haven't gotten around to updating it before you're reading this.