How to convert Python dataclass to dictionary of string literal? - python

Given a dataclass like below:
class MessageHeader(BaseModel):
message_id: uuid.UUID
def dict(self, **kwargs):
return json.loads(self.json())
I would like to get a dictionary of string literal when I call dict on MessageHeader
The desired outcome of dictionary is like below:
{'message_id': '383b0bfc-743e-4738-8361-27e6a0753b5a'}
I want to avoid using 3rd party library like pydantic & I do not want to use json.loads(self.json()) as there are extra round trips
Is there any better way to convert a dataclass to a dictionary with string literal like above?

You can use dataclasses.asdict:
from dataclasses import dataclass, asdict
class MessageHeader(BaseModel):
message_id: uuid.UUID
def dict(self):
return {k: str(v) for k, v in asdict(self).items()}
If you're sure that your class only has string values, you can skip the dictionary comprehension entirely:
class MessageHeader(BaseModel):
message_id: uuid.UUID
dict = asdict

For absolute pure, unadulterated speed and boundless efficiency, the kinds of which could even cause the likes of Chuck Norris to take pause and helplessly look on in awe, I humbly recommend this remarkably well planned-out approach with __dict__:
def dict(self):
_dict = self.__dict__.copy()
_dict['message_id'] = str(_dict['message_id'])
return _dict
For a class that defines a __slots__ attribute, such as with #dataclass(slots=True), the above approach most likely won't work, as the __dict__ attribute won't be available on class instances. In that case, a highly efficient "shoot for the moon" approach such as below could instead be viable:
def dict(self):
body_lines = ','.join(f"'{f}':" + (f'str(self.{f})' if f == 'message_id'
else f'self.{f}') for f in self.__slots__)
# Compute the text of the entire function.
txt = f'def dict(self):\n return {{{body_lines}}}'
ns = {}
exec(txt, locals(), ns)
_dict_fn = self.__class__.dict = ns['dict']
return _dict_fn(self)
In case anyone's teetering at the edge of their seats right now (I know, this is really incredible, breakthrough-level stuff) - I've added my personal timings via the timeit module below, that should hopefully shed a little more light in the performance aspect of things.
FYI, the approaches with pure __dict__ are inevitably much faster than dataclasses.asdict().
Note: Even though __dict__ works better in this particular case, dataclasses.asdict() will likely be better for composite dictionaries, such as ones with nested dataclasses, or values with mutable types such as dict or list.
from dataclasses import dataclass, asdict, field
from uuid import UUID, uuid4
class DictMixin:
"""Mixin class to add a `dict()` method on classes that define a __slots__ attribute"""
def dict(self):
body_lines = ','.join(f"'{f}':" + (f'str(self.{f})' if f == 'message_id'
else f'self.{f}') for f in self.__slots__)
# Compute the text of the entire function.
txt = f'def dict(self):\n return {{{body_lines}}}'
ns = {}
exec(txt, locals(), ns)
_dict_fn = self.__class__.dict = ns['dict']
return _dict_fn(self)
#dataclass
class MessageHeader:
message_id: UUID = field(default_factory=uuid4)
string: str = 'a string'
integer: int = 1000
floating: float = 1.0
def dict1(self):
_dict = self.__dict__.copy()
_dict['message_id'] = str(_dict['message_id'])
return _dict
def dict2(self):
return {k: str(v) if k == 'message_id' else v
for k, v in self.__dict__.items()}
def dict3(self):
return {k: str(v) if k == 'message_id' else v
for k, v in asdict(self).items()}
#dataclass(slots=True)
class MessageHeaderWithSlots(DictMixin):
message_id: UUID = field(default_factory=uuid4)
string: str = 'a string'
integer: int = 1000
floating: float = 1.0
def dict2(self):
return {k: str(v) if k == 'message_id' else v
for k, v in asdict(self).items()}
if __name__ == '__main__':
from timeit import timeit
header = MessageHeader()
header_with_slots = MessageHeaderWithSlots()
n = 10000
print('dict1(): ', timeit('header.dict1()', number=n, globals=globals()))
print('dict2(): ', timeit('header.dict2()', number=n, globals=globals()))
print('dict3(): ', timeit('header.dict3()', number=n, globals=globals()))
print('slots -> dict(): ', timeit('header_with_slots.dict()', number=n, globals=globals()))
print('slots -> dict2(): ', timeit('header_with_slots.dict2()', number=n, globals=globals()))
print()
dict__ = header.dict1()
print(dict__)
asdict__ = header.dict3()
print(asdict__)
assert isinstance(dict__['message_id'], str)
assert isinstance(dict__['integer'], int)
assert header.dict1() == header.dict2() == header.dict3()
assert header_with_slots.dict() == header_with_slots.dict2()
Results on my Mac M1 laptop:
dict1(): 0.005992999998852611
dict2(): 0.00800508284009993
dict3(): 0.07069579092785716
slots -> dict(): 0.00583599996753037
slots -> dict2(): 0.07395245810039341
{'message_id': 'b4e17ef9-1a58-4007-9cef-39158b094da2', 'string': 'a string', 'integer': 1000, 'floating': 1.0}
{'message_id': 'b4e17ef9-1a58-4007-9cef-39158b094da2', 'string': 'a string', 'integer': 1000, 'floating': 1.0}
Note: For a more "complete" implementation of DictMixin (named as SerializableMixin), check out a related answer I had also added.

Related

What's the appropriate data structure to hold a group of variables

I'm given a third party function mk_config that gives me objects according to a (key, configs). The typical usage is, when you need some of the these objects, you say
args = mk_config('args', **configs)
validator = mk_config('validator', **configs)
postproc = mk_config('postproc', **configs)
and then work with said objects.
Since I don't like seeing big blocks of boilerplate, I think, okay, the following might be better:
def mk_configs_tuple(keys, **configs):
return [mk_config(k, **configs) for k in keys]
args, validator, postproc = mk_configs_tuple(
['args', 'validator', 'postproc'],
**configs)
But there's still a repetition of the the key names, and if dealing with 20, this could get out of hand. I could inject these in locals() to be D.R.Y., but most would say that's too dry, so I think I could do this:
def mk_configs_dict(keys, **configs):
return {k: mk_config(k, **configs) for k in keys}
m = mk_configs_dict(['args', 'validator', 'postproc'], **configs)
assert list(m) == ['args', 'validator', 'postproc']
Now, dictionaries are fast, but they don't always autosuggest well in IDEs, and it's more cumbersome to write [''] than . (as JSON would). So I think:
def mk_config_obj(keys, **configs):
class ConfigObj: ...
config_obj = ConfigObj()
for key in keys:
setattr(config_obj, key, mk_config(key, **configs))
return config_obj
config_obj = mk_config_obj(['args', 'validator', 'postproc'], **configs)
assert {'args', 'validator', 'postproc'}.issubset(dir(config_obj))
But then I think if I can't do this with what python comes with naturally, I'm missing somethiing.
Maybe a named tuple?
from collections import namedtuple
def mk_config_obj(keys, **configs):
return namedtuple('ConfigObj', keys)(*[mk_config(key, **configs) for key in keys])
I like to use a list of dictionaries sometimes
list_of_dictionaries = []
a = {}
s = {}
d = {}
f = {}
list_of_dictionaries.append(a)
list_of_dictionaries.append(s)
list_of_dictionaries.append(d)
list_of_dictionaries.append(f)
The best answer I have so far is:
from collections import namedtuple
def mk_config_nt(keys, **configs):
ConfigNT = namedtuple('ConfigNT', field_names=keys)
return ConfigNT(**{k: mk_config(k, **configs) for k in keys})
Why I like it?
Because I get my keys as attributes:
config_nt = mk_config_nt(['args', 'validator', 'postproc'], **configs)
assert {'args', 'validator', 'postproc'}.issubset(dir(config_nt))
which I can use autosuggest/complete on (though there's two extra annoying method names: count and index).
Because I still have the choice to do this:
args, validator, postproc = mk_config_nt(['args', 'validator', 'postproc'], **configs)
And... it's actually faster than a dict, somehow. Key/attribute access timing (on a three item group):
custom object: 36.9 ns
dict: 32.4 ns
namedtuple: 30.7 ns
Say hello to Enums.
from enum import Enum
class MKConfigKey(Enum):
args = 'args'
validator = 'validator'
postproc = 'post-roc'
config_map = {key: mk_config(key.value, **config) for key in MKConfigKey}

How do I change class <type> to Python type <list>

I'm new to OOP so this maybe is an anti-pattern but, I have a class:
class SomeObject:
""" WIP OBJECT """
parsed_dates = []
def __init__(self, description, weekdays, months):
self.description = str(description)
self.weekdays = frozenset(SomeObject.flatn(weekdays)) # flat set of ints
self.months = frozenset(SomeObject.flatn(months)) # flat set of ints
def __repr__(self):
return self.description
( ... )
def accepts(self, dt):
return any([
dt.weekday() in self.weekdays,
dt.month in self.months,
])
And I build a defaultdict with:
def main(dt_start, dt_end, s: str):
d = defaultdict(list)
description = pull_desc(s=s)
active_days = convert_tuples_to_numpy_array(description, np.str)
SomeObject.active_days(active_days_for_meter)
for test_set in SomeObject.parsed_dates:
for dt in [dt for dt in SomeObject.date_range(dt_start, dt_end) if test_set.accepts(dt)]:
# add the datetime objects to a list of values with the description as the key
d[test_set].append(str(dt.isoformat()))
return d
Which returns:
defaultdict(<class 'list'>, {['Saturdays']: ['2020-01-04', '2020-01-11', '2020-01-18']}
Now when I go to compare the key (resembles a list) of this defaultdict() to say , Python doesn't recognize the key as its of type '__main__.SomeObject'> rather than type list
i.e:
dic = {"Saturday": 1}
for k1, v2 in dic.items()
for k2, v2 in defaultdict.items():
if k1 == k2:
print("Chicken Wing")
The chicken wing will never print because type(k1) = <str>
and type(k2) = '<__main__.SomeObject'>
How do I get around this?
What exactly I am misunderstanding about OOP?
What concepts should I read up on to help me understand?
All info is appreciated, thanks
So, it turns out that test_set is an instance of SomeObject which is why type(k2) = '<__main__.SomeObject'>.
It seems that the bit you are missing is:
d[str(test_set)].append(str(dt.isoformat()))
and then your __repr__() method will kick in.
Update:
The reason that main() returns defaultdict(<class 'list'>, {'Saturdays': ['2020-01-04', '2020-01-11', '2020-01-18']} is that you have a __repr__() method and defaultdict is calling str() on each key of the dict.

Python: Accessing YAML values using "dot notation"

I'm using a YAML configuration file. So this is the code to load my config in Python:
import os
import yaml
with open('./config.yml') as file:
config = yaml.safe_load(file)
This code actually creates a dictionary. Now the problem is that in order to access the values I need to use tons of brackets.
YAML:
mysql:
user:
pass: secret
Python:
import os
import yaml
with open('./config.yml') as file:
config = yaml.safe_load(file)
print(config['mysql']['user']['pass']) # <--
I'd prefer something like that (dot notation):
config('mysql.user.pass')
So, my idea is to utilize the PyStache render() interface.
import os
import yaml
with open('./config.yml') as file:
config = yaml.safe_load(file)
import pystache
def get_config_value( yml_path, config ):
return pystache.render('{{' + yml_path + '}}', config)
get_config_value('mysql.user.pass', config)
Would that be a "good" solution? If not, what would be a better alternative?
Additional question [Solved]
I've decided to use Ilja Everilä's solution. But now I've got an additional question: How would you create a wrapper Config class around DotConf?
The following code doesn't work but I hope you get the idea what I'm trying to do:
class Config( DotDict ):
def __init__( self ):
with open('./config.yml') as file:
DotDict.__init__(yaml.safe_load(file))
config = Config()
print(config.django.admin.user)
Error:
AttributeError: 'super' object has no attribute '__getattr__'
Solution
You just need to pass self to the constructor of the super class.
DotDict.__init__(self, yaml.safe_load(file))
Even better soltution (Ilja Everilä)
super().__init__(yaml.safe_load(file))
The Simple
You could use reduce to extract the value from the config:
In [41]: config = {'asdf': {'asdf': {'qwer': 1}}}
In [42]: from functools import reduce
...:
...: def get_config_value(key, cfg):
...: return reduce(lambda c, k: c[k], key.split('.'), cfg)
...:
In [43]: get_config_value('asdf.asdf.qwer', config)
Out[43]: 1
This solution is easy to maintain and has very few new edge cases, if your YAML uses a very limited subset of the language.
The Correct
Use a proper YAML parser and tools, such as in this answer.
The Convoluted
On a lighter note (not to be taken too seriously), you could create a wrapper that allows using attribute access:
In [47]: class DotConfig:
...:
...: def __init__(self, cfg):
...: self._cfg = cfg
...: def __getattr__(self, k):
...: v = self._cfg[k]
...: if isinstance(v, dict):
...: return DotConfig(v)
...: return v
...:
In [48]: DotConfig(config).asdf.asdf.qwer
Out[48]: 1
Do note that this fails for keywords, such as "as", "pass", "if" and the like.
Finally, you could get really crazy (read: probably not a good idea) and customize dict to handle dotted string and tuple keys as a special case, with attribute access to items thrown in the mix (with its limitations):
In [58]: class DotDict(dict):
...:
...: # update, __setitem__ etc. omitted, but required if
...: # one tries to set items using dot notation. Essentially
...: # this is a read-only view.
...:
...: def __getattr__(self, k):
...: try:
...: v = self[k]
...: except KeyError:
...: return super().__getattr__(k)
...: if isinstance(v, dict):
...: return DotDict(v)
...: return v
...:
...: def __getitem__(self, k):
...: if isinstance(k, str) and '.' in k:
...: k = k.split('.')
...: if isinstance(k, (list, tuple)):
...: return reduce(lambda d, kk: d[kk], k, self)
...: return super().__getitem__(k)
...:
...: def get(self, k, default=None):
...: if isinstance(k, str) and '.' in k:
...: try:
...: return self[k]
...: except KeyError:
...: return default
...: return super().get(k, default=default)
...:
In [59]: dotconf = DotDict(config)
In [60]: dotconf['asdf.asdf.qwer']
Out[60]: 1
In [61]: dotconf['asdf', 'asdf', 'qwer']
Out[61]: 1
In [62]: dotconf.asdf.asdf.qwer
Out[62]: 1
In [63]: dotconf.get('asdf.asdf.qwer')
Out[63]: 1
In [64]: dotconf.get('asdf.asdf.asdf')
In [65]: dotconf.get('asdf.asdf.asdf', 'Nope')
Out[65]: 'Nope'
On the one hand your example takes the right approach by using get_config_value('mysql.user.pass', config) instead of solving the dotted access with attributes. I am not sure
if you realised that on purpose you were not trying to do the more intuitive:
print(config.mysql.user.pass)
which you can't get to work, even when overloading __getattr__, as pass is a Python language element.
However your example describes only a very restricted subset of YAML files as it doesn't involve any sequence collections, nor any complex keys.
If you want to cover more than the tiny subset you can e.g. extend the powerful round-trip capable objects of ruamel.yaml:¹
import ruamel.yaml
def mapping_string_access(self, s, delimiter=None, key_delim=None):
def p(v):
try:
v = int(v)
except:
pass
return v
# possible extend for primitives like float, datetime, booleans, etc.
if delimiter is None:
delimiter = '.'
if key_delim is None:
key_delim = ','
try:
key, rest = s.split(delimiter, 1)
except ValueError:
key, rest = s, None
if key_delim in key:
key = tuple((p(key) for key in key.split(key_delim)))
else:
key = p(key)
if rest is None:
return self[key]
return self[key].string_access(rest, delimiter, key_delim)
ruamel.yaml.comments.CommentedMap.string_access = mapping_string_access
def sequence_string_access(self, s, delimiter=None, key_delim=None):
if delimiter is None:
delimiter = '.'
try:
key, rest = s.split(delimiter, 1)
except ValueError:
key, rest = s, None
key = int(key)
if rest is None:
return self[key]
return self[key].string_access(rest, delimiter, key_delim)
ruamel.yaml.comments.CommentedSeq.string_access = sequence_string_access
Once that is set up you are can run the following:
yaml_str = """\
mysql:
user:
pass: secret
list: [a: 1, b: 2, c: 3]
[2016, 9, 14]: some date
42: some answer
"""
yaml = ruamel.yaml.YAML()
config = yaml.load(yaml_str)
def get_config_value(path, data, **kw):
return data.string_access(path, **kw)
print(get_config_value('mysql.user.pass', config))
print(get_config_value('mysql:user:pass', config, delimiter=":"))
print(get_config_value('mysql.list.1.b', config))
print(get_config_value('mysql.2016,9,14', config))
print(config.string_access('mysql.42'))
giving:
secret
secret
2
some date
some answer
showing that with a bit more forethought and very little extra work you can have flexible dotted access to many to a vast range of YAML files, and not just those consisting of recursive mappings with string scalars as keys.
As shown you can directly call config.string_access(mysql.user.pass) instead of defining and using get_config_value()
this works with strings and integers as mapping keys, but can be easily extended to support other key types (boolean, date, date-time).
¹ This was done using ruamel.yaml a YAML 1.2 parser, of which I am the author.
I ended up using python-box.
This package provides multiple ways to read config files (yaml, csv, json, ...).
And not only that, it allows you to pass dict or strings directly:
from box import Box
import yaml # Only required for different loaders
# Pass dict directly
movie_box = Box({ "Robin Hood: Men in Tights": { "imdb stars": 6.7, "length": 104 } })
# Load from yaml file
# Here it is also possible to use PyYAML arguments,
# for example to specify different loaders e.g. SafeLoader or FullLoader
conf = Box.from_yaml(filename="./config.yaml", Loader=yaml.FullLoader)
conf.mysql.user.pass
A lot more examples, are available in the Wiki.
It's quite old question, but I came here hunting for the answer, but looking for more simpler solution. Finally, came up with my own solution using easydict library; installed using pip install easydict
def yaml_load(fileName):
import yaml
from easydict import EasyDict as edict
fc = None
with open(fileName, 'r') as f:
fc = edict(yaml.load(f))
## or use safe_load
## fc = edict(yaml.safe_load(f))
return fc
Now, simply call yaml_load with the valid yaml filename:
config = yaml_load('./config.yml')
## assuming: config["mysql"]["user"]["pass"] is a valid key in config.yml
print("{}".format(config.mysql.user.pass))
I had the same problem a while ago and built this getter:
def get(self, key):
"""Tries to find the configuration value for a given key.
:param str key: Key in dot-notation (e.g. 'foo.lol').
:return: The configuration value. None if no value was found.
"""
try:
return self.__lookup(self.config, key)
except KeyError:
return None
def __lookup(self, dct, key):
"""Checks dct recursive to find the value for key.
Is used by get() interanlly.
:param dict dct: The configuration dict.
:param str key: The key we are looking for.
:return: The configuration value.
:raise KeyError: If the given key is not in the configuration dict.
"""
if '.' in key:
key, node = key.split('.', 1)
return self.__lookup(dct[key], node)
else:
return dct[key]
The getter looks-up a config value from self.config in a recursive manner (by using __lookup).
If you have trouble adjusting this for your case, feel free to ask for further help.
I generally follow a best practice of converting config (any kind, not just yaml) to an in memory object.
This way the text based config is unwrapped by 1 function and the text is thrown away, giving a beautiful object to work with as against having every function to deal with the internals of the config. That way all functions only know of that one internal object interface. If any new parameter is added/renamed/deleted from the config file, the only function to change is the loader function which loads the config into the in memory object.
Below is an example i did for loading FloydHub config yaml file into an in-memory object. I feel it is a very good design pattern.
First define a config representative class like below:
class FloydYamlConfig(object):
class Input:
def __init__(self, destination, source):
self.destination = destination
self.source = source
def __init__(self, floyd_yaml_dict):
self.machine = floyd_yaml_dict['machine']
self.env = floyd_yaml_dict['env']
self.description = floyd_yaml_dict['description']
self.max_runtime = floyd_yaml_dict['max_runtime']
self.command = floyd_yaml_dict['command']
self.input = []
for input_conf in floyd_yaml_dict['input']:
input_obj = self.Input(destination=input_conf['destination'], source=input_conf['source'])
self.input.append(input_obj)
def __str__(self):
input_str = ''
for input_obj in self.input:
input_str += '\ndestination: {}\n source: {}'.format(input_obj.destination, input_obj.source)
print_str = ('machine: {}\n'
'env: {}\n'
'input: {}\n'
'description: {}\n'
'max_runtime: {}\n'
'command: {}\n').format(
self.machine, self.env, input_str, self.description, self.max_runtime, self.command)
return print_str
Then load the yaml into the object for further use:
floyd_conf = read_floyd_yaml_config(args.floyd_yaml_path)
def read_floyd_yaml_config(floyd_yaml_path) -> FloydYamlConfig:
with open(floyd_yaml_path) as f:
yaml_conf_dict = yaml.safe_load(f)
floyd_conf = FloydYamlConfig(yaml_conf_dict)
# print(floyd_conf)
return floyd_conf
Sample yaml
# see: https://docs.floydhub.com/floyd_config
machine: gpu2
env: tensorflow-1.0
input:
- destination: data
source: abc/datasets/my-data/6
- destination: config
source: abc/datasets/my-config/1
description: this is a test
max_runtime: 3600
command: >-
echo 'hello world'

Is there a recursive version of the dict.get() built-in?

I have a nested dictionary object and I want to be able to retrieve values of keys with an arbitrary depth. I'm able to do this by subclassing dict:
>>> class MyDict(dict):
... def recursive_get(self, *args, **kwargs):
... default = kwargs.get('default')
... cursor = self
... for a in args:
... if cursor is default: break
... cursor = cursor.get(a, default)
... return cursor
...
>>> d = MyDict(foo={'bar': 'baz'})
>>> d
{'foo': {'bar': 'baz'}}
>>> d.get('foo')
{'bar': 'baz'}
>>> d.recursive_get('foo')
{'bar': 'baz'}
>>> d.recursive_get('foo', 'bar')
'baz'
>>> d.recursive_get('bogus key', default='nonexistent key')
'nonexistent key'
However, I don't want to have to subclass dict to get this behavior. Is there some built-in method that has equivalent or similar behavior? If not, are there any standard or external modules that provide this behavior?
I'm using Python 2.7 at the moment, though I would be curious to hear about 3.x solutions as well.
A very common pattern to do this is to use an empty dict as your default:
d.get('foo', {}).get('bar')
If you have more than a couple of keys, you could use reduce (note that in Python 3 reduce must be imported: from functools import reduce) to apply the operation multiple times
reduce(lambda c, k: c.get(k, {}), ['foo', 'bar'], d)
Of course, you should consider wrapping this into a function (or a method):
def recursive_get(d, *keys):
return reduce(lambda c, k: c.get(k, {}), keys, d)
#ThomasOrozco's solution is correct, but resorts to a lambda function, which is only necessary to avoid TypeError if an intermediary key does not exist. If this isn't a concern, you can use dict.get directly:
from functools import reduce
def get_from_dict(dataDict, mapList):
"""Iterate nested dictionary"""
return reduce(dict.get, mapList, dataDict)
Here's a demo:
a = {'Alice': {'Car': {'Color': 'Blue'}}}
path = ['Alice', 'Car', 'Color']
get_from_dict(a, path) # 'Blue'
If you wish to be more explicit than using lambda while still avoiding TypeError, you can wrap in a try / except clause:
def get_from_dict(dataDict, mapList):
"""Iterate nested dictionary"""
try:
return reduce(dict.get, mapList, dataDict)
except TypeError:
return None # or some other default value
Finally, if you wish to raise KeyError when a key does not exist at any level, use operator.getitem or dict.__getitem__:
from functools import reduce
from operator import getitem
def getitem_from_dict(dataDict, mapList):
"""Iterate nested dictionary"""
return reduce(getitem, mapList, dataDict)
# or reduce(dict.__getitem__, mapList, dataDict)
Note that [] is syntactic sugar for the __getitem__ method. So this relates precisely how you would ordinarily access a dictionary value. The operator module just provides a more readable means of accessing this method.
You can actually achieve this really neatly in Python 3, given its handling of default keyword arguments and tuple decomposition:
In [1]: def recursive_get(d, *args, default=None):
...: if not args:
...: return d
...: key, *args = args
...: return recursive_get(d.get(key, default), *args, default=default)
...:
Similar code will also work in python 2, but you'd need to revert to using **kwargs, as you did in your example. You'd also need to use indexing to decompose *args.
In any case, there's no need for a loop if you're going to make the function recursive anyway.
You can see that the above code demonstrates the same functionality as your existing method:
In [2]: d = {'foo': {'bar': 'baz'}}
In [3]: recursive_get(d, 'foo')
Out[3]: {'bar': 'baz'}
In [4]: recursive_get(d, 'foo', 'bar')
Out[4]: 'baz'
In [5]: recursive_get(d, 'bogus key', default='nonexistent key')
Out[5]: 'nonexistent key'
You can use a defaultdict to give you an empty dict on missing keys:
from collections import defaultdict
mydict = defaultdict(dict)
This only goes one level deep - mydict[missingkey] is an empty dict, mydict[missingkey][missing key] is a KeyError. You can add as many levels as needed by wrapping it in more defaultdicts, eg defaultdict(defaultdict(dict)). You could also have the innermost one as another defaultdict with a sensible factory function for your use case, eg
mydict = defaultdict(defaultdict(lambda: 'big summer blowout'))
If you need it to go to arbitrary depth, you can do that like so:
def insanity():
return defaultdict(insanity)
print(insanity()[0][0][0][0])
There is none that I am aware of. However, you don't need to subclass dict at all, you can just write a function that takes a dictionary, args and kwargs and does the same thing:
def recursive_get(d, *args, **kwargs):
default = kwargs.get('default')
cursor = d
for a in args:
if cursor is default: break
cursor = recursive_get(cursor, a, default)
return cursor
use it like this
recursive_get(d, 'foo', 'bar')
The OP requested the following behavior
>>> d.recursive_get('bogus key', default='nonexistent key')
'nonexistent key'
(As of June 15, 22022) none of the up-voted answers accomplish this, so I have modified #ThomasOrozco's solution to resolve this
from functools import reduce
def rget(d, *keys, default=None):
"""Use a sentinel to handle both missing keys AND alternate default values"""
sentinel = {}
v = reduce(lambda c, k: c.get(k, sentinel), keys, d)
if v is sentinel:
return default
return v
Below is a complete, unit-test-like demonstration of where the other answers have issues. I've named each approach according to its author. Note that this answer is the only one which passes all 4 test cases, namely
Basic retrieval when key-tree exists
Non-existent key-tree returns None
Option to specify a default aside from None
Values which are an empty dict should return as themselves rather than the default
from functools import reduce
def thomas_orozco(d, *keys):
return reduce(lambda c, k: c.get(k, {}), keys, d)
def jpp(dataDict, *mapList):
"""Same logic as thomas_orozco but exits at the first missing key instead of last"""
try:
return reduce(dict.get, *mapList, dataDict)
except TypeError:
return None
def sapi(d, *args, default=None):
if not args:
return d
key, *args = args
return sapi(d.get(key, default), *args, default=default)
def rget(d, *keys, default=None):
sentinel = {}
v = reduce(lambda c, k: c.get(k, sentinel), keys, d)
if v is sentinel:
return default
return v
def assert_rget_behavior(func):
"""Unit tests for desired behavior of recursive dict.get()"""
fail_count = 0
# Basic retrieval when key-tree exists
d = {'foo': {'bar': 'baz', 'empty': {}}}
try:
v = func(d, 'foo', 'bar')
assert v == 'baz', f'Unexpected value {v} retrieved'
except Exception as e:
print(f'Case 1: Failed basic retrieval with {repr(e)}')
fail_count += 1
# Non-existent key-tree returns None
try:
v = func(d, 'bogus', 'key')
assert v is None, f'Missing key retrieved as {v} instead of None'
except Exception as e:
print(f'Case 2: Failed missing retrieval with {repr(e)}')
fail_count += 1
# Option to specify a default aside from None
default = 'alternate'
try:
v = func(d, 'bogus', 'key', default=default)
assert v == default, f'Missing key retrieved as {v} instead of {default}'
except Exception as e:
print(f'Case 3: Failed default retrieval with {repr(e)}')
fail_count += 1
# Values which are an empty dict should return as themselves rather than the default
try:
v = func(d, 'foo', 'empty')
assert v == {}, f'Empty dict value retrieved as {v} instead of {{}}'
except Exception as e:
print(f'Case 4: Failed retrieval of empty dict value with {repr(e)}')
fail_count += 1
# Success only if all pass
if fail_count == 0:
print('Passed all tests!')
if __name__ == '__main__':
assert_rget_behavior(thomas_orozco) # Fails cases 2 and 3
assert_rget_behavior(jpp) # Fails cases 1, 3, and 4
assert_rget_behavior(sapi) # Fails cases 2 and 3
assert_rget_behavior(rget) # Only one to pass all 3
collections.default_dict will handle the providing of default values for nonexistent keys at least.
The Iterative Solution
def deep_get(d:dict, keys, default=None, create=True):
if not keys:
return default
for key in keys[:-1]:
if key in d:
d = d[key]
elif create:
d[key] = {}
d = d[key]
else:
return default
key = keys[-1]
if key in d:
return d[key]
elif create:
d[key] = default
return default
def deep_set(d:dict, keys, value, create=True):
assert(keys)
for key in keys[:-1]:
if key in d:
d = d[key]
elif create:
d[key] = {}
d = d[key]
d[keys[-1]] = value
return value
I am about to test it inside of a Django project with a line such as:
keys = ('options', 'style', 'body', 'name')
val = deep_set(d, keys, deep_get(s, keys, 'dotted'))

How to add __iter__ to dynamic type?

Source
def flags(*opts):
keys = [t[0] for t in opts]
words = [t[1] for t in opts]
nums = [2**i for i in range(len(opts))]
attrs = dict(zip(keys,nums))
choices = iter(zip(nums,words))
return type('Flags', (), dict(attrs))
Abilities = flags(
('FLY', 'Can fly'),
('FIREBALL', 'Can shoot fireballs'),
('INVISIBLE', 'Can turn invisible'),
)
Question
How can I add an __iter__ method to Abilities so that I can iterate over choices?
Why?
This way I can use things like
hero.abilities = Abilities.FLY | Abilities.FIREBALL
if hero.abilities & Abilities.FIREBALL:
for k, v in Abilities:
print k, v
in my code without having to use any magic numbers or strings, and I can also save the set of flags to the DB as a single int, or display the list in a readable format.
Other improvements are welcome.
There's no need to use a dynamic type here; I'd restructure this as a simple class, for example:
class flags(object):
def __init__(self, *opts):
keys = [t[0] for t in opts]
words = [t[1] for t in opts]
nums = [2**i for i in range(len(opts))]
self.attrs = dict(zip(keys,nums))
self.choices = zip(nums,words)
def __getattr__(self, a):
return self.attrs[a]
def __iter__(self):
return iter(self.choices)
Abilities = flags(
('FLY', 'Can fly'),
('FIREBALL', 'Can shoot fireballs'),
('INVISIBLE', 'Can turn invisible'),
)
print Abilities.FLY
for k, v in Abilities:
print k, v
Why are you doing it the hard way? If you want a dict with __getattr__ overriding why not start with one:
class Flags(dict):
def __init__(self, *args):
dict.__init__(self, args)
def __getattr__(self, name):
return self[name]
...
This also has the Advantage of Least Surprise, since dict.__iter__() generates keys and dict.iteritems() yields tuples.
You need two key changes -- the last lines of flags should be:
choices = iter(zip(nums,words))
attrs['__iter__'] = lambda self: choices
return type('Flags', (), dict(attrs))()
Note that I've added a line setting __iter__, and a trailing () in the return to instantiate the type (to loop on a type you'd have to use a custom metaclass -- way overkill, no need).
The last line of flags should actually be:
return type('Flags', (), attrs)()
as there's no reason to make a copy of attrs, which is already a dict (but that's an innocuous redundancy, not a killer mistake;-).
It would be a more Pythonic solution if you implement your own __getattr__ method for accessing dynamic fields instead of dealing with metaclasses through type.
Edit: It's not clear to me what do you mean by choices, but here is an example:
class Abilities(object):
def __init__(self, abilities):
self.abilities = abilities
def __getattr__(self, name):
a = [x for x in self.abilities if x[0] == name]
if len(a) != 1:
raise AttributeError('attribute {0} not found'.format(name))
title, id, help = a[0]
return id
def __iter__(self):
return (id, help for title, id, help in self.abilities)
SPEC = [
('FLY', 10, 'Can fly'),
('FIREBALL', 13, 'Can shoot fireballs'),
('INVISIBLE', 14, 'Can turn invisible'),
]
abitilies = Abilities(SPEC)
hero.abilities = abilities.FLY | abilities.FIREBALL
for k, v in abilities:
print k, v
Based on your guys' suggestions, I came up with this:
Source
class enumerable(object):
def __init__(self, func, *opts):
keys = func(len(opts))
self.attrs = dict(zip([t[0] for t in opts], keys))
self.opts = zip(keys, [t[1] for t in opts])
def __getattr__(self, a):
return self.attrs[a]
def __len__(self):
return len(self.opts)
def __iter__(self):
return iter(self.opts)
def __deepcopy__(self, memo):
return self
class enum(enumerable):
def __init__(self, *opts):
return super(enum, self).__init__(range, *opts)
class flags(enumerable):
def __init__(self, *opts):
return super(flags, self).__init__(lambda l: [1<<i for i in range(l)], *opts)
### tests:
Abilities = enum(
('FLY', 'Can fly'),
('FIREBALL', 'Can shoot fireballs'),
('INVISIBLE', 'Can turn invisible'),
('FROST_NOVA', 'Can call down an ice storm'),
('BLINK', 'Can teleport short distances'),
)
print 'Fireball = %d' % Abilities.FIREBALL
print 'Number of options = %d' % len(Abilities)
for k, v in Abilities:
print '%d: %s' % (k, v)
Output
Fireball = 1
Number of options = 5
0: Can fly
1: Can shoot fireballs
2: Can turn invisible
3: Can call down an ice storm
4: Can teleport short distances
For whatever reason, my particular application needs __deepcopy__ to be implemented. Since these classes are for building "constants", none of their attributes should ever be changed after creation; thus I hope it's safe just to return self.

Categories