How to temporarily override a member of a dictionary? - python

There are times when it's necessary to override a dictionary member that might already exist, execute arbitrary code (such as a callback, which could fail) then set the value back to it's previous state (which includes not being present).
Keeping a copy of the dictionary isn't an option since this dictionary might have other members modified by the callback (which I want to keep).
How should a dictionary item be overridden temporarily?

Here is a very straight-forward implementation:
import contextlib
#contextlib.contextmanager
def temp_item(dictionary, key, value):
empty = object()
original = dictionary.get(key, empty)
dictionary[key] = value
try:
yield dictionary
finally:
if original is empty:
dictionary.pop(key, None)
else:
dictionary[key] = original
This can be used as follows:
d = {'a':1, 'b':2}
with temp_item(d, 'c', '3') as d:
d['d'] = 4
d['a'] = 10
print(d['c'])
print(d)
Which outputs:
3
{'a': 10, 'b': 2, 'd': 4}

unittest.mock provides patch.dict
It can patch not only dictionaries, but objects that behave like them and also clear out the mock with or without mocking its contents
However, it's probably best to only rely on unittest in a test context
import unittest
from unittest.mock import patch
class TestWhatever(unittest.TestCase):
def test_dictionary_mocking(self):
with patch.dict("os.environ", {"RUNNING_AS_DOCKER": "true"}):
self.assertTrue(detect_docker())
with patch.dict("os.environ", clear=True):
self.assertFalse(detect_docker())

This can be done inline using a try/finally block.
# Set to any object you know the dictionary won't use as a value.
sentinel = object()
value_orig = mydict.get(key, sentinel)
mydict[key] = value_new
try:
run_callback()
finally:
if value_orig is sentinel:
# Use pop in case the callback added this key.
mydict.pop(key, None)
else:
mydict[key] = value_orig
Wrapped into a context manager that takes a dictionary as an argument (instead of a single key: value pair, for added flexibility):
class DictOverride:
__slots__ = ("dict_base", "items_override", "values_orig")
_sentinel = object()
def __init__(self, dict_base, dict_override):
sentinel = self._sentinel
self.items_override = tuple(dict_override.items())
self.values_orig = [
dict_base.get(key, sentinel)
for key, _ in self.items_override
]
self.dict_base = dict_base
def __enter__(self):
dict_base = self.dict_base
for key, value in self.items_override:
dict_base[key] = value
def __exit__(self, _type, _value, _traceback):
sentinel = self._sentinel
dict_base = self.dict_base
for (key, value), value_orig in zip(
self.items_override,
self.values_orig,
):
if value_orig is sentinel:
dict_base.pop(key)
else:
dict_base[key] = value_orig
# COntext manager test case
dct_test = {"eggs": "soft", "coconut": "hard"}
print("Original:", dct_test)
with DictOverride(dct_test, {"eggs": "hard"}):
print("Override:", dct_test)
print("Original:", dct_test, "(again)")
Which outputs:
Original: {'eggs': 'soft', 'coconut': 'hard'}
Override: {'eggs': 'hard', 'coconut': 'hard'}
Original: {'eggs': 'soft', 'coconut': 'hard'} (again)

Related

How to loop in parallel to alter objects inside a dictionary in Python?

Here is a minimum example of my problem:
import concurrent.futures
from functools import partial
# Object class
class obj:
def __init__(self,tup):
self.tup = tup
# Function includes attributes in objects of class above
def new(fi,fdic):
fdic[fi].new = 'work_'+ str(fdic[fi].tup)
# Dictionary full of instances of obj above
dic = {'a':obj(1),
'b':obj(2),
'c':obj(3),
'd':obj(4),
'e':obj(5),
'f':obj(6),
}
partial_new = partial(new, fdic=dic)
Now I want to multiprocess all the objects in the dictionary (because I have too many in reality).
The code below runs. But it does not "work", because I actually need ProcessPool (I think? Because I want to process things in parallel).
with concurrent.futures.ThreadPoolExecutor() as executor:
for _ in executor.map(partial_new, dic.keys()):
pass
print(dic['b'].new)
This one does not run:
with concurrent.futures.ProcessPoolExecutor() as executor:
for _ in executor.map(partial_new, dic.keys()):
pass
print(dic['b'].new)
My question is: How do I make this work?
I just need to use the function to modify all the objects inside the dictionary in parallel. Later I wills save the full dictionary, but the function that I apply does not return anything (if this makes things easier).
Is the issue that it takes a long time to calculate the new value?
def get_new_value(dictionary_item):
key, value = dictionary_item
return key, 'work_' + str(value.tup)
with concurrent.futures.ProcessPoolExecutor() as executor:
for key, new_value in executor.map(get_new_value, dic.items()):
dic[key].new = new_value
You can only have one thread modifying dic. But you can pass key and value to a thread, have the thread return the key and the new value, and then have the original thread do the work of updating the dictionary.
You'll probably want to specify a chunksize to map
=== edited ===
As promised, my complete file.
import concurrent.futures
# Object class
class obj:
def __init__(self, tup):
self.tup = tup
# Dictionary full of instances of obj above
dic = {'a': obj(1),
'b': obj(2),
'c': obj(3),
'd': obj(4),
'e': obj(5),
'f': obj(6),
}
def get_new_value(dictionary_item):
key, value = dictionary_item
return key, 'work_' + str(value.tup)
def go():
with concurrent.futures.ProcessPoolExecutor() as executor:
for key, new_value in executor.map(get_new_value, dic.items()):
dic[key].new = new_value
# Make sure it really worked!
for key, value in dic.items():
print(key, value.new)
if __name__ == '__main__':
go()
You can use ThreadPool from the multiprocess module in the following way:
Create a list of the dict keys (ls = [a for a in dict.keys())
Define a function that given a pointer to a dict and a key does the alteration you desire
use ThreadPool's starmap() method to run that function on the list you created and the dict
join and close the thread pool

Nested dictionary that acts as defaultdict when setting items but not when getting items

I want to implement a dict-like data structure that has the following properties:
from collections import UserDict
class TestDict(UserDict):
pass
test_dict = TestDict()
# Create empty dictionaries at 'level_1' and 'level_2' and insert 'Hello' at the 'level_3' key.
test_dict['level_1']['level_2']['level_3'] = 'Hello'
>>> test_dict
{
'level_1': {
'level_2': {
'level_3': 'Hello'
}
}
}
# However, this should not return an empty dictionary but raise a KeyError.
>>> test_dict['unknown_key']
KeyError: 'unknown_key'
The problem, to my knowledge, is that python does not know whether __getitem__ is being called in the context of setting an item, i.e. the first example, or in the context of getting and item, the second example.
I have already seen Python `defaultdict`: Use default when setting, but not when getting, but I do not think that this question is a duplicate, or that it answers my question.
Please let me know if you have any ideas.
Thanks in advance.
EDIT:
It is possible to achieve something similar using:
def set_nested_item(dict_in: Union[dict, TestDict], value, keys):
for i, key in enumerate(keys):
is_last = i == (len(keys) - 1)
if is_last:
dict_in[key] = value
else:
if key not in dict_in:
dict_in[key] = {}
else:
if not isinstance(dict_in[key], (dict, TestDict)):
dict_in[key] = {}
dict_in[key] = set_nested_item(dict_in[key], value, keys[(i + 1):])
return dict_in
class TestDict(UserDict):
def __init__(self):
super().__init__()
def __setitem__(self, key, value):
if isinstance(key, list):
self.update(set_nested_item(self, value, key))
else:
super().__setitem__(key, value)
test_dict[['level_1', 'level_2', 'level_3']] = 'Hello'
>>> test_dict
{
'level_1': {
'level_2': {
'level_3': 'Hello'
}
}
}
It's impossible.
test_dict['level_1']['level_2']['level_3'] = 'Hello'
is semantically equivalent to:
temp1 = test_dict['level_1'] # Should this line fail?
temp1['level_2']['level_3'] = 'Hello'
But... if determined to implement it anyway, you could inspect the Python stack to grab/parse the calling line of code, and then vary the behaviour depending on whether the calling line of code contains an assignment! Unfortunately, sometimes the calling code isn't available in the stack trace (e.g. when called interactively), in which case you need to work with Python bytecode.
import dis
import inspect
from collections import UserDict
def get_opcodes(code_object, lineno):
"""Utility function to extract Python VM opcodes for line of code"""
line_ops = []
instructions = dis.get_instructions(code_object).__iter__()
for instruction in instructions:
if instruction.starts_line == lineno:
# found start of our line
line_ops.append(instruction.opcode)
break
for instruction in instructions:
if not instruction.starts_line:
line_ops.append(instruction.opcode)
else:
# start of next line
break
return line_ops
class TestDict(UserDict):
def __getitem__(self, key):
try:
return super().__getitem__(key)
except KeyError:
# inspect the stack to get calling line of code
frame = inspect.stack()[1].frame
opcodes = get_opcodes(frame.f_code, frame.f_lineno)
# STORE_SUBSCR is Python opcode for TOS1[TOS] = TOS2
if dis.opmap['STORE_SUBSCR'] in opcodes:
# calling line of code contains a dict/array assignment
default = TestDict()
super().__setitem__(key, default)
return default
else:
raise
test_dict = TestDict()
test_dict['level_1']['level_2']['level_3'] = 'Hello'
print(test_dict)
# {'level_1': {'level_2': {'level_3': 'Hello'}}}
test_dict['unknown_key']
# KeyError: 'unknown_key'
The above is just a partial solution. It can still be fooled if there are other dictionary/array assignments on the same line, e.g. other['key'] = test_dict['unknown_key']. A more complete solution would need to actually parse the line of code to figure out where the variable occurs in the assignment.

How do I use dictionary key,value pair to set class instance attributes "pythonic"ly?

I have created some Python classes to use as multivariate data structures, which are then used for various tasks. In some instances, I like to populate the classes with various value sets. The default parameter filename "ho2.defaults" would look something like this:
name = 'ho2'
mass_option = 'h1o16'
permutation = 'odd'
parity = 'odd'
j_total = 10
lr = 40
br = 60
jmax = 60
mass_lr = 14578.471659
mass_br = 1781.041591
length_lr = ( 1.0, 11.0, 2.65 )
length_br = ( 0.0, 11.0, 2.46 )
use_spline = True
energy_units = 'au'
pes_zpe = -7.407998138300982E-2
pes_cutoff = 0.293994
Currently, I create a dictionary from reading the desired key,value pairs from file, and now I'd like a "pythonic" way of making those dictionary keys be class instance variable names, i.e.
# Instantiate Molecule Class
molecule = Molecule()
# Create Dictionary of default values
default_dict = read_dict_from_file(filename)
# Set populate class instance variables with dictionary values
for key,value in default_dict:
molecule.key = value
So the Class's instance variable "molecule.name" could be set with the dictionary key,value pair. I could do this by hand, but I'ms sure there is a better way to loop through it. In actuality, the dictionary could be large, and I'd rather allow the user to choose which values they want to populate, so the dictionary could change. What am I missing here?
You would use setattr: setattr(molecule, key, value)
The simple way is:
vars(molecule).update(default_dict)
This will clobber any pre-existing attributes though. For a more delicate approach try:
for name, value in default_dict.items():
if not hasattr(molecule, name):
setattr(molecule, name value)
I'd invert the logic so that the object dynamically answers questions:
class Settings(object):
ATTRS = {'foo', 'bar'}
def __init__(self, defaults):
self.__dict__['data'] = defaults.copy()
def __getattr__(self, key):
if key not in self.ATTRS or key not in self.data:
raise AttributeError("'{}' object has no attribute '{}'".format(
self.__class__.__name__, key))
return self.data[key]
def __setattr__(self, key, value):
self.data[key] = value
s = Settings({'a': 'b', 'foo': 'foo!', 'spam': 'eggs'})
print s.foo
try:
print s.spam
except AttributeError:
pass
else:
raise AssertionError("That should have failed because 'spam' isn't in Settings.ATTRS")
try:
print s.bar
except AttributeError:
pass
else:
raise AssertionError("That should have failed because 'bar' wasn't passed in")
class Molecule(settings):
ATTRS = {'name', 'mass_option', ...}
molecule = Molecule(default_dict)

Is there a recursive version of the dict.get() built-in?

I have a nested dictionary object and I want to be able to retrieve values of keys with an arbitrary depth. I'm able to do this by subclassing dict:
>>> class MyDict(dict):
... def recursive_get(self, *args, **kwargs):
... default = kwargs.get('default')
... cursor = self
... for a in args:
... if cursor is default: break
... cursor = cursor.get(a, default)
... return cursor
...
>>> d = MyDict(foo={'bar': 'baz'})
>>> d
{'foo': {'bar': 'baz'}}
>>> d.get('foo')
{'bar': 'baz'}
>>> d.recursive_get('foo')
{'bar': 'baz'}
>>> d.recursive_get('foo', 'bar')
'baz'
>>> d.recursive_get('bogus key', default='nonexistent key')
'nonexistent key'
However, I don't want to have to subclass dict to get this behavior. Is there some built-in method that has equivalent or similar behavior? If not, are there any standard or external modules that provide this behavior?
I'm using Python 2.7 at the moment, though I would be curious to hear about 3.x solutions as well.
A very common pattern to do this is to use an empty dict as your default:
d.get('foo', {}).get('bar')
If you have more than a couple of keys, you could use reduce (note that in Python 3 reduce must be imported: from functools import reduce) to apply the operation multiple times
reduce(lambda c, k: c.get(k, {}), ['foo', 'bar'], d)
Of course, you should consider wrapping this into a function (or a method):
def recursive_get(d, *keys):
return reduce(lambda c, k: c.get(k, {}), keys, d)
#ThomasOrozco's solution is correct, but resorts to a lambda function, which is only necessary to avoid TypeError if an intermediary key does not exist. If this isn't a concern, you can use dict.get directly:
from functools import reduce
def get_from_dict(dataDict, mapList):
"""Iterate nested dictionary"""
return reduce(dict.get, mapList, dataDict)
Here's a demo:
a = {'Alice': {'Car': {'Color': 'Blue'}}}
path = ['Alice', 'Car', 'Color']
get_from_dict(a, path) # 'Blue'
If you wish to be more explicit than using lambda while still avoiding TypeError, you can wrap in a try / except clause:
def get_from_dict(dataDict, mapList):
"""Iterate nested dictionary"""
try:
return reduce(dict.get, mapList, dataDict)
except TypeError:
return None # or some other default value
Finally, if you wish to raise KeyError when a key does not exist at any level, use operator.getitem or dict.__getitem__:
from functools import reduce
from operator import getitem
def getitem_from_dict(dataDict, mapList):
"""Iterate nested dictionary"""
return reduce(getitem, mapList, dataDict)
# or reduce(dict.__getitem__, mapList, dataDict)
Note that [] is syntactic sugar for the __getitem__ method. So this relates precisely how you would ordinarily access a dictionary value. The operator module just provides a more readable means of accessing this method.
You can actually achieve this really neatly in Python 3, given its handling of default keyword arguments and tuple decomposition:
In [1]: def recursive_get(d, *args, default=None):
...: if not args:
...: return d
...: key, *args = args
...: return recursive_get(d.get(key, default), *args, default=default)
...:
Similar code will also work in python 2, but you'd need to revert to using **kwargs, as you did in your example. You'd also need to use indexing to decompose *args.
In any case, there's no need for a loop if you're going to make the function recursive anyway.
You can see that the above code demonstrates the same functionality as your existing method:
In [2]: d = {'foo': {'bar': 'baz'}}
In [3]: recursive_get(d, 'foo')
Out[3]: {'bar': 'baz'}
In [4]: recursive_get(d, 'foo', 'bar')
Out[4]: 'baz'
In [5]: recursive_get(d, 'bogus key', default='nonexistent key')
Out[5]: 'nonexistent key'
You can use a defaultdict to give you an empty dict on missing keys:
from collections import defaultdict
mydict = defaultdict(dict)
This only goes one level deep - mydict[missingkey] is an empty dict, mydict[missingkey][missing key] is a KeyError. You can add as many levels as needed by wrapping it in more defaultdicts, eg defaultdict(defaultdict(dict)). You could also have the innermost one as another defaultdict with a sensible factory function for your use case, eg
mydict = defaultdict(defaultdict(lambda: 'big summer blowout'))
If you need it to go to arbitrary depth, you can do that like so:
def insanity():
return defaultdict(insanity)
print(insanity()[0][0][0][0])
There is none that I am aware of. However, you don't need to subclass dict at all, you can just write a function that takes a dictionary, args and kwargs and does the same thing:
def recursive_get(d, *args, **kwargs):
default = kwargs.get('default')
cursor = d
for a in args:
if cursor is default: break
cursor = recursive_get(cursor, a, default)
return cursor
use it like this
recursive_get(d, 'foo', 'bar')
The OP requested the following behavior
>>> d.recursive_get('bogus key', default='nonexistent key')
'nonexistent key'
(As of June 15, 22022) none of the up-voted answers accomplish this, so I have modified #ThomasOrozco's solution to resolve this
from functools import reduce
def rget(d, *keys, default=None):
"""Use a sentinel to handle both missing keys AND alternate default values"""
sentinel = {}
v = reduce(lambda c, k: c.get(k, sentinel), keys, d)
if v is sentinel:
return default
return v
Below is a complete, unit-test-like demonstration of where the other answers have issues. I've named each approach according to its author. Note that this answer is the only one which passes all 4 test cases, namely
Basic retrieval when key-tree exists
Non-existent key-tree returns None
Option to specify a default aside from None
Values which are an empty dict should return as themselves rather than the default
from functools import reduce
def thomas_orozco(d, *keys):
return reduce(lambda c, k: c.get(k, {}), keys, d)
def jpp(dataDict, *mapList):
"""Same logic as thomas_orozco but exits at the first missing key instead of last"""
try:
return reduce(dict.get, *mapList, dataDict)
except TypeError:
return None
def sapi(d, *args, default=None):
if not args:
return d
key, *args = args
return sapi(d.get(key, default), *args, default=default)
def rget(d, *keys, default=None):
sentinel = {}
v = reduce(lambda c, k: c.get(k, sentinel), keys, d)
if v is sentinel:
return default
return v
def assert_rget_behavior(func):
"""Unit tests for desired behavior of recursive dict.get()"""
fail_count = 0
# Basic retrieval when key-tree exists
d = {'foo': {'bar': 'baz', 'empty': {}}}
try:
v = func(d, 'foo', 'bar')
assert v == 'baz', f'Unexpected value {v} retrieved'
except Exception as e:
print(f'Case 1: Failed basic retrieval with {repr(e)}')
fail_count += 1
# Non-existent key-tree returns None
try:
v = func(d, 'bogus', 'key')
assert v is None, f'Missing key retrieved as {v} instead of None'
except Exception as e:
print(f'Case 2: Failed missing retrieval with {repr(e)}')
fail_count += 1
# Option to specify a default aside from None
default = 'alternate'
try:
v = func(d, 'bogus', 'key', default=default)
assert v == default, f'Missing key retrieved as {v} instead of {default}'
except Exception as e:
print(f'Case 3: Failed default retrieval with {repr(e)}')
fail_count += 1
# Values which are an empty dict should return as themselves rather than the default
try:
v = func(d, 'foo', 'empty')
assert v == {}, f'Empty dict value retrieved as {v} instead of {{}}'
except Exception as e:
print(f'Case 4: Failed retrieval of empty dict value with {repr(e)}')
fail_count += 1
# Success only if all pass
if fail_count == 0:
print('Passed all tests!')
if __name__ == '__main__':
assert_rget_behavior(thomas_orozco) # Fails cases 2 and 3
assert_rget_behavior(jpp) # Fails cases 1, 3, and 4
assert_rget_behavior(sapi) # Fails cases 2 and 3
assert_rget_behavior(rget) # Only one to pass all 3
collections.default_dict will handle the providing of default values for nonexistent keys at least.
The Iterative Solution
def deep_get(d:dict, keys, default=None, create=True):
if not keys:
return default
for key in keys[:-1]:
if key in d:
d = d[key]
elif create:
d[key] = {}
d = d[key]
else:
return default
key = keys[-1]
if key in d:
return d[key]
elif create:
d[key] = default
return default
def deep_set(d:dict, keys, value, create=True):
assert(keys)
for key in keys[:-1]:
if key in d:
d = d[key]
elif create:
d[key] = {}
d = d[key]
d[keys[-1]] = value
return value
I am about to test it inside of a Django project with a line such as:
keys = ('options', 'style', 'body', 'name')
val = deep_set(d, keys, deep_get(s, keys, 'dotted'))

Reverse of `__getitem___`

d[x] where d is a dict, invokes d.__getitem__(x). Is there a way to create a class F, so that y=F(X); d[y] would invoke some method in F instead: y.someMethod(d)?
Background: I'm trying to make a dict with "aliased" keys, so that if I have d[a]=42, then d[alias_of_a] would return 42 as well. This is pretty straightforward with the custom __getitem__, for example:
class oneOf(object):
def __init__(self, *keys):
self.keys = keys
class myDict(dict):
def __getitem__(self, item):
if isinstance(item, oneOf):
for k in item.keys:
if k in self:
return self[k]
return dict.__getitem__(self, item)
a = myDict({
'Alpha': 1,
'B': 2,
})
print a[oneOf('A', 'Alpha')]
print a[oneOf('B', 'Bravo')]
However, I'm wondering if it could be possible without overriding dict:
a = {
'Alpha': 1,
'B': 2,
}
print a[???('A', 'Alpha')]
print a[???('B', 'Bravo')]
If this is not possible, how to make it work the other way round:
a = {
???('A', 'Alpha'): 1,
???('B', 'Bravo'): 2,
}
print a['A']
print a['Bravo']
What it important to me is that I'd like to avoid extending dict.
This use-case is impossible:
a = {
'Alpha': 1,
'B': 2,
}
a[???('A', 'Alpha')]
a[???('B', 'Bravo')]
This is because the dict will first hash the object. In order to force a collision, which will allow overriding equality to take hold, the hashes need to match. But ???('A', 'Alpha') can only hash to one of 'A' or 'Alpha', and if it makes the wrong choice it has failed.
The other use-case has a similar deduction applied to it:
a = {
???('A', 'Alpha'): 1,
???('B', 'Bravo'): 2,
}
a['A']
a['Bravo']
a['A'] will look up with a different hash to a['Alpha'], so again ???('A', 'Alpha') needs to have both hashes, which is impossible.
You need cooperation from both the keys and the values in order for this to work.
You could in theory use inspect.getouterframes in the __hash__ method to check the values of the dictionary, but this would only work if dictionaries had Python frames. If your intent is to monkey patch a function that sort-of does what you want but not quite, this might (just about) work(ish, sort of).
import inspect
class VeryHackyAnyOfHack:
def __init__(self, variable_name_hack, *args):
self.variable_name_hack = variable_name_hack
self.equal_to = args
def __eq__(self, other):
return other in self.equal_to
def __hash__(self):
outer_frame = inspect.getouterframes(inspect.currentframe())[1]
assumed_target_dict = outer_frame[0].f_locals[self.variable_name_hack]
for item in self.equal_to:
if item in assumed_target_dict:
return hash(item)
# Failure
return hash(item[0])
This is used like so:
import random
def check_thing_agains_dict(item):
if random.choice([True, False]):
internal_dict = {"red": "password123"}
else:
internal_dict = {"blue": "password123"}
return internal_dict[item]
myhack = VeryHackyAnyOfHack('internal_dict', "red", "blue")
check_thing_agains_dict(myhack)
#>>> 'password123'
Again, the very fact that you have to do this means that in practice it's not possible. It's also a language extension, so this isn't portable.
The built-in dict provides very simple lookup semantics: given a hashable object x, return the object y that x was mapped to previously. If you want multiple keys that map to the same object, you'll need to set that up explicitly:
# First, initialize the dictionary with one key per equivalence class
a = { 'a': 1, 'b': 2 }
# Then, set up any aliases.
a['Alpha'] = a['a']
a['Bravo'] = a['b']
The TransformDict class being considered for inclusion in Python 3.5 would simplify this somewhat by allowing you to replace step 2 with a "secondary" lookup function that would map the given key to its canonical representation prior to the primary lookup. Something like
def key_transform(key):
if key in {'Alpha', 'Aleph'}:
return 'a'
elif key in {'Bravo', 'Beta', 'Beth'}:
return 'b'
a = TransformDict(key_transform, a=1, b=2)
assert a['Alpha'] is a['a']

Categories