nested dictionary to object-like dict with properties - python

say I have two (simple toy) nested data structure like this:
d = dict(zip(list('abc'), list(range(3))))
nested_dict = {k:d.copy() for k in d}
nested_listof_dict = {k:[d.copy() for _ in range(3)] for k in d}
Now I want to make this behave more like a 'regular' class-like object (meaning dot-indexable)
class dictobj(dict):
def __init__(self, data: dict, name):
data['_name'] = name
super().__init__(data)
for name, item in data.items():
if isinstance(item, (list, tuple)):
setattr(self, name, [dictobj(x, name) if isinstance(x, dict) else x for x in item])
else:
setattr(self, name, dictobj(item, name) if isinstance(item, dict) else item)
def __repr__(self):
return f"{self['_name']}"
data_dictobj = dictobj(data, 'test') # size 1185 bytes
which works nicely for both the nested dict and nested_listof_dict
assert nested_listof_dict.a[0].b == nested_listof_dict['a'][0]['b']
but, since both attributes and dictionaries are mutable, this might happen
nested_listof_dict['a'][0]['b'] = 2
assert nested_listof_dict.a[0].b != nested_listof_dict['a'][0]['b'] # unwanted behavior
So, therefore it would be a good idea to implement the attributes as properties. I figured it would probably be a good idea to avoid using lambda functions because of closure scoping. First looking at getting the getter implemented, I focused on the nested_dict, since it's a simpler structure.
class dictobj(dict):
def __init__(self, data: dict, name):
def make_property(self, name, item):
def getter(self):
return dictobj(item, name) if isinstance(item, dict) else item
setattr(self.__class__, name, property(getter))
# def setter(self, value):
# if not isinstance(value, type(item)):
# raise ValueError(f'cannot change the data structure, expected '+
# f'{type(item).__name__} got {type(value).__name__}')
# self[name] = value
# setattr(self.__class__, name, property(getter, setter))
data['_name'] = name
super().__init__(data)
for name, item in data.items():
if isinstance(item, (list, tuple)):
setattr(self, name, [dictobj(x, name) if isinstance(x, dict) else x for x in item])
else:
make_property(self, name, item)
def __repr__(self):
return f"{self['_name']}"
then test if the the attribute can no longer be set
d = dictobj(d, 'test')
# d.a = 1 # fails as should: "AttributeError: can't set attribute"
# d.a.a = 1 # fails as should: "AttributeError: can't set attribute"
But somehow I am still messing up, the following behavior is observed:
print(d.a) # returns object "a" - as desired
print(d.a) # returns 0 - second call returns the nested value
I don't know how to avoid this behavior from occurring.
Apart from that, I would also like to generate a setter that enforces the data structure to be maintained. Un-out-commenting the setter I wrote above, not surprisingly, also yields unintended behavior
d.a = {1} # ValueError: cannot change the data structure, expected dict got set - as desired
d.a.a = 2 # AttributeError: 'int' object has no attribute 'a'
d.a = 2
assert d.a == 0 and d['a'] == 2 # again unintended
I would like to understand what I'm doing wrong, and to make this work. It should also be noted that I have not even yet considered generating properties for the nested_listof_dict, which would also be needed.

munch does exactly what I need

Related

Why Python dict have attribute statuses but dot operator does not work? [duplicate]

How do I make Python dictionary members accessible via a dot "."?
For example, instead of writing mydict['val'], I'd like to write mydict.val.
Also I'd like to access nested dicts this way. For example
mydict.mydict2.val
would refer to
mydict = { 'mydict2': { 'val': ... } }
I've always kept this around in a util file. You can use it as a mixin on your own classes too.
class dotdict(dict):
"""dot.notation access to dictionary attributes"""
__getattr__ = dict.get
__setattr__ = dict.__setitem__
__delattr__ = dict.__delitem__
mydict = {'val':'it works'}
nested_dict = {'val':'nested works too'}
mydict = dotdict(mydict)
mydict.val
# 'it works'
mydict.nested = dotdict(nested_dict)
mydict.nested.val
# 'nested works too'
You can do it using this class I just made. With this class you can use the Map object like another dictionary(including json serialization) or with the dot notation. I hope to help you:
class Map(dict):
"""
Example:
m = Map({'first_name': 'Eduardo'}, last_name='Pool', age=24, sports=['Soccer'])
"""
def __init__(self, *args, **kwargs):
super(Map, self).__init__(*args, **kwargs)
for arg in args:
if isinstance(arg, dict):
for k, v in arg.iteritems():
self[k] = v
if kwargs:
for k, v in kwargs.iteritems():
self[k] = v
def __getattr__(self, attr):
return self.get(attr)
def __setattr__(self, key, value):
self.__setitem__(key, value)
def __setitem__(self, key, value):
super(Map, self).__setitem__(key, value)
self.__dict__.update({key: value})
def __delattr__(self, item):
self.__delitem__(item)
def __delitem__(self, key):
super(Map, self).__delitem__(key)
del self.__dict__[key]
Usage examples:
m = Map({'first_name': 'Eduardo'}, last_name='Pool', age=24, sports=['Soccer'])
# Add new key
m.new_key = 'Hello world!'
# Or
m['new_key'] = 'Hello world!'
print m.new_key
print m['new_key']
# Update values
m.new_key = 'Yay!'
# Or
m['new_key'] = 'Yay!'
# Delete key
del m.new_key
# Or
del m['new_key']
Install dotmap via pip
pip install dotmap
It does everything you want it to do and subclasses dict, so it operates like a normal dictionary:
from dotmap import DotMap
m = DotMap()
m.hello = 'world'
m.hello
m.hello += '!'
# m.hello and m['hello'] now both return 'world!'
m.val = 5
m.val2 = 'Sam'
On top of that, you can convert it to and from dict objects:
d = m.toDict()
m = DotMap(d) # automatic conversion in constructor
This means that if something you want to access is already in dict form, you can turn it into a DotMap for easy access:
import json
jsonDict = json.loads(text)
data = DotMap(jsonDict)
print data.location.city
Finally, it automatically creates new child DotMap instances so you can do things like this:
m = DotMap()
m.people.steve.age = 31
Comparison to Bunch
Full disclosure: I am the creator of the DotMap. I created it because Bunch was missing these features
remembering the order items are added and iterating in that order
automatic child DotMap creation, which saves time and makes for cleaner code when you have a lot of hierarchy
constructing from a dict and recursively converting all child dict instances to DotMap
Derive from dict and and implement __getattr__ and __setattr__.
Or you can use Bunch which is very similar.
I don't think it's possible to monkeypatch built-in dict class.
Use SimpleNamespace:
>>> from types import SimpleNamespace
>>> d = dict(x=[1, 2], y=['a', 'b'])
>>> ns = SimpleNamespace(**d)
>>> ns.x
[1, 2]
>>> ns
namespace(x=[1, 2], y=['a', 'b'])
Fabric has a really nice, minimal implementation. Extending that to allow for nested access, we can use a defaultdict, and the result looks something like this:
from collections import defaultdict
class AttributeDict(defaultdict):
def __init__(self):
super(AttributeDict, self).__init__(AttributeDict)
def __getattr__(self, key):
try:
return self[key]
except KeyError:
raise AttributeError(key)
def __setattr__(self, key, value):
self[key] = value
Make use of it as follows:
keys = AttributeDict()
keys.abc.xyz.x = 123
keys.abc.xyz.a.b.c = 234
That elaborates a bit on Kugel's answer of "Derive from dict and and implement __getattr__ and __setattr__". Now you know how!
I tried this:
class dotdict(dict):
def __getattr__(self, name):
return self[name]
you can try __getattribute__ too.
make every dict a type of dotdict would be good enough, if you want to init this from a multi-layer dict, try implement __init__ too.
I recently came across the 'Box' library which does the same thing.
Installation command : pip install python-box
Example:
from box import Box
mydict = {"key1":{"v1":0.375,
"v2":0.625},
"key2":0.125,
}
mydict = Box(mydict)
print(mydict.key1.v1)
I found it to be more effective than other existing libraries like dotmap, which generate python recursion error when you have large nested dicts.
link to library and details: https://pypi.org/project/python-box/
If you want to pickle your modified dictionary, you need to add few state methods to above answers:
class DotDict(dict):
"""dot.notation access to dictionary attributes"""
def __getattr__(self, attr):
return self.get(attr)
__setattr__= dict.__setitem__
__delattr__= dict.__delitem__
def __getstate__(self):
return self
def __setstate__(self, state):
self.update(state)
self.__dict__ = self
You can achieve this using SimpleNamespace
from types import SimpleNamespace
# Assign values
args = SimpleNamespace()
args.username = 'admin'
# Retrive values
print(args.username) # output: admin
Don't. Attribute access and indexing are separate things in Python, and you shouldn't want them to perform the same. Make a class (possibly one made by namedtuple) if you have something that should have accessible attributes and use [] notation to get an item from a dict.
To build upon epool's answer, this version allows you to access any dict inside via the dot operator:
foo = {
"bar" : {
"baz" : [ {"boo" : "hoo"} , {"baba" : "loo"} ]
}
}
For instance, foo.bar.baz[1].baba returns "loo".
class Map(dict):
def __init__(self, *args, **kwargs):
super(Map, self).__init__(*args, **kwargs)
for arg in args:
if isinstance(arg, dict):
for k, v in arg.items():
if isinstance(v, dict):
v = Map(v)
if isinstance(v, list):
self.__convert(v)
self[k] = v
if kwargs:
for k, v in kwargs.items():
if isinstance(v, dict):
v = Map(v)
elif isinstance(v, list):
self.__convert(v)
self[k] = v
def __convert(self, v):
for elem in range(0, len(v)):
if isinstance(v[elem], dict):
v[elem] = Map(v[elem])
elif isinstance(v[elem], list):
self.__convert(v[elem])
def __getattr__(self, attr):
return self.get(attr)
def __setattr__(self, key, value):
self.__setitem__(key, value)
def __setitem__(self, key, value):
super(Map, self).__setitem__(key, value)
self.__dict__.update({key: value})
def __delattr__(self, item):
self.__delitem__(item)
def __delitem__(self, key):
super(Map, self).__delitem__(key)
del self.__dict__[key]
Building on Kugel's answer and taking Mike Graham's words of caution into consideration, what if we make a wrapper?
class DictWrap(object):
""" Wrap an existing dict, or create a new one, and access with either dot
notation or key lookup.
The attribute _data is reserved and stores the underlying dictionary.
When using the += operator with create=True, the empty nested dict is
replaced with the operand, effectively creating a default dictionary
of mixed types.
args:
d({}): Existing dict to wrap, an empty dict is created by default
create(True): Create an empty, nested dict instead of raising a KeyError
example:
>>>dw = DictWrap({'pp':3})
>>>dw.a.b += 2
>>>dw.a.b += 2
>>>dw.a['c'] += 'Hello'
>>>dw.a['c'] += ' World'
>>>dw.a.d
>>>print dw._data
{'a': {'c': 'Hello World', 'b': 4, 'd': {}}, 'pp': 3}
"""
def __init__(self, d=None, create=True):
if d is None:
d = {}
supr = super(DictWrap, self)
supr.__setattr__('_data', d)
supr.__setattr__('__create', create)
def __getattr__(self, name):
try:
value = self._data[name]
except KeyError:
if not super(DictWrap, self).__getattribute__('__create'):
raise
value = {}
self._data[name] = value
if hasattr(value, 'items'):
create = super(DictWrap, self).__getattribute__('__create')
return DictWrap(value, create)
return value
def __setattr__(self, name, value):
self._data[name] = value
def __getitem__(self, key):
try:
value = self._data[key]
except KeyError:
if not super(DictWrap, self).__getattribute__('__create'):
raise
value = {}
self._data[key] = value
if hasattr(value, 'items'):
create = super(DictWrap, self).__getattribute__('__create')
return DictWrap(value, create)
return value
def __setitem__(self, key, value):
self._data[key] = value
def __iadd__(self, other):
if self._data:
raise TypeError("A Nested dict will only be replaced if it's empty")
else:
return other
Use __getattr__, very simple, works in
Python 3.4.3
class myDict(dict):
def __getattr__(self,val):
return self[val]
blockBody=myDict()
blockBody['item1']=10000
blockBody['item2']="StackOverflow"
print(blockBody.item1)
print(blockBody.item2)
Output:
10000
StackOverflow
I like the Munch and it gives lot of handy options on top of dot access.
import munch
temp_1 = {'person': { 'fname': 'senthil', 'lname': 'ramalingam'}}
dict_munch = munch.munchify(temp_1)
dict_munch.person.fname
The language itself doesn't support this, but sometimes this is still a useful requirement. Besides the Bunch recipe, you can also write a little method which can access a dictionary using a dotted string:
def get_var(input_dict, accessor_string):
"""Gets data from a dictionary using a dotted accessor-string"""
current_data = input_dict
for chunk in accessor_string.split('.'):
current_data = current_data.get(chunk, {})
return current_data
which would support something like this:
>> test_dict = {'thing': {'spam': 12, 'foo': {'cheeze': 'bar'}}}
>> output = get_var(test_dict, 'thing.spam.foo.cheeze')
>> print output
'bar'
>>
I ended up trying BOTH the AttrDict and the Bunch libraries and found them to be way to slow for my uses. After a friend and I looked into it, we found that the main method for writing these libraries results in the library aggressively recursing through a nested object and making copies of the dictionary object throughout. With this in mind, we made two key changes. 1) We made attributes lazy-loaded 2) instead of creating copies of a dictionary object, we create copies of a light-weight proxy object. This is the final implementation. The performance increase of using this code is incredible. When using AttrDict or Bunch, these two libraries alone consumed 1/2 and 1/3 respectively of my request time(what!?). This code reduced that time to almost nothing(somewhere in the range of 0.5ms). This of course depends on your needs, but if you are using this functionality quite a bit in your code, definitely go with something simple like this.
class DictProxy(object):
def __init__(self, obj):
self.obj = obj
def __getitem__(self, key):
return wrap(self.obj[key])
def __getattr__(self, key):
try:
return wrap(getattr(self.obj, key))
except AttributeError:
try:
return self[key]
except KeyError:
raise AttributeError(key)
# you probably also want to proxy important list properties along like
# items(), iteritems() and __len__
class ListProxy(object):
def __init__(self, obj):
self.obj = obj
def __getitem__(self, key):
return wrap(self.obj[key])
# you probably also want to proxy important list properties along like
# __iter__ and __len__
def wrap(value):
if isinstance(value, dict):
return DictProxy(value)
if isinstance(value, (tuple, list)):
return ListProxy(value)
return value
See the original implementation here by https://stackoverflow.com/users/704327/michael-merickel.
The other thing to note, is that this implementation is pretty simple and doesn't implement all of the methods you might need. You'll need to write those as required on the DictProxy or ListProxy objects.
def dict_to_object(dick):
# http://stackoverflow.com/a/1305663/968442
class Struct:
def __init__(self, **entries):
self.__dict__.update(entries)
return Struct(**dick)
If one decides to permanently convert that dict to object this should do. You can create a throwaway object just before accessing.
d = dict_to_object(d)
This solution is a refinement upon the one offered by epool to address the requirement of the OP to access nested dicts in a consistent manner. The solution by epool did not allow for accessing nested dicts.
class YAMLobj(dict):
def __init__(self, args):
super(YAMLobj, self).__init__(args)
if isinstance(args, dict):
for k, v in args.iteritems():
if not isinstance(v, dict):
self[k] = v
else:
self.__setattr__(k, YAMLobj(v))
def __getattr__(self, attr):
return self.get(attr)
def __setattr__(self, key, value):
self.__setitem__(key, value)
def __setitem__(self, key, value):
super(YAMLobj, self).__setitem__(key, value)
self.__dict__.update({key: value})
def __delattr__(self, item):
self.__delitem__(item)
def __delitem__(self, key):
super(YAMLobj, self).__delitem__(key)
del self.__dict__[key]
With this class, one can now do something like: A.B.C.D.
For infinite levels of nesting of dicts, lists, lists of dicts, and dicts of lists.
It also supports pickling
This is an extension of this answer.
class DotDict(dict):
# https://stackoverflow.com/a/70665030/913098
"""
Example:
m = Map({'first_name': 'Eduardo'}, last_name='Pool', age=24, sports=['Soccer'])
Iterable are assumed to have a constructor taking list as input.
"""
def __init__(self, *args, **kwargs):
super(DotDict, self).__init__(*args, **kwargs)
args_with_kwargs = []
for arg in args:
args_with_kwargs.append(arg)
args_with_kwargs.append(kwargs)
args = args_with_kwargs
for arg in args:
if isinstance(arg, dict):
for k, v in arg.items():
self[k] = v
if isinstance(v, dict):
self[k] = DotDict(v)
elif isinstance(v, str) or isinstance(v, bytes):
self[k] = v
elif isinstance(v, Iterable):
klass = type(v)
map_value: List[Any] = []
for e in v:
map_e = DotDict(e) if isinstance(e, dict) else e
map_value.append(map_e)
self[k] = klass(map_value)
def __getattr__(self, attr):
return self.get(attr)
def __setattr__(self, key, value):
self.__setitem__(key, value)
def __setitem__(self, key, value):
super(DotDict, self).__setitem__(key, value)
self.__dict__.update({key: value})
def __delattr__(self, item):
self.__delitem__(item)
def __delitem__(self, key):
super(DotDict, self).__delitem__(key)
del self.__dict__[key]
def __getstate__(self):
return self.__dict__
def __setstate__(self, d):
self.__dict__.update(d)
if __name__ == "__main__":
import pickle
def test_map():
d = {
"a": 1,
"b": {
"c": "d",
"e": 2,
"f": None
},
"g": [],
"h": [1, "i"],
"j": [1, "k", {}],
"l":
[
1,
"m",
{
"n": [3],
"o": "p",
"q": {
"r": "s",
"t": ["u", 5, {"v": "w"}, ],
"x": ("z", 1)
}
}
],
}
map_d = DotDict(d)
w = map_d.l[2].q.t[2].v
assert w == "w"
pickled = pickle.dumps(map_d)
unpickled = pickle.loads(pickled)
assert unpickled == map_d
kwargs_check = DotDict(a=1, b=[dict(c=2, d="3"), 5])
assert kwargs_check.b[0].d == "3"
kwargs_and_args_check = DotDict(d, a=1, b=[dict(c=2, d="3"), 5])
assert kwargs_and_args_check.l[2].q.t[2].v == "w"
assert kwargs_and_args_check.b[0].d == "3"
test_map()
I dislike adding another log to a (more than) 10-year old fire, but I'd also check out the dotwiz library, which I've recently released - just this year actually.
It's a relatively tiny library, which also performs really well for get (access) and set (create) times in benchmarks, at least as compared to other alternatives.
Install dotwiz via pip
pip install dotwiz
It does everything you want it to do and subclasses dict, so it operates like a normal dictionary:
from dotwiz import DotWiz
dw = DotWiz()
dw.hello = 'world'
dw.hello
dw.hello += '!'
# dw.hello and dw['hello'] now both return 'world!'
dw.val = 5
dw.val2 = 'Sam'
On top of that, you can convert it to and from dict objects:
d = dw.to_dict()
dw = DotWiz(d) # automatic conversion in constructor
This means that if something you want to access is already in dict form, you can turn it into a DotWiz for easy access:
import json
json_dict = json.loads(text)
data = DotWiz(json_dict)
print data.location.city
Finally, something exciting I am working on is an existing feature request so that it automatically creates new child DotWiz instances so you can do things like this:
dw = DotWiz()
dw['people.steve.age'] = 31
dw
# ✫(people=✫(steve=✫(age=31)))
Comparison with dotmap
I've added a quick and dirty performance comparison with dotmap below.
First, install both libraries with pip:
pip install dotwiz dotmap
I came up with the following code for benchmark purposes:
from timeit import timeit
from dotwiz import DotWiz
from dotmap import DotMap
d = {'hey': {'so': [{'this': {'is': {'pretty': {'cool': True}}}}]}}
dw = DotWiz(d)
# ✫(hey=✫(so=[✫(this=✫(is=✫(pretty={'cool'})))]))
dm = DotMap(d)
# DotMap(hey=DotMap(so=[DotMap(this=DotMap(is=DotMap(pretty={'cool'})))]))
assert dw.hey.so[0].this['is'].pretty.cool == dm.hey.so[0].this['is'].pretty.cool
n = 100_000
print('dotwiz (create): ', round(timeit('DotWiz(d)', number=n, globals=globals()), 3))
print('dotmap (create): ', round(timeit('DotMap(d)', number=n, globals=globals()), 3))
print('dotwiz (get): ', round(timeit("dw.hey.so[0].this['is'].pretty.cool", number=n, globals=globals()), 3))
print('dotmap (get): ', round(timeit("dm.hey.so[0].this['is'].pretty.cool", number=n, globals=globals()), 3))
Results, on my M1 Mac, running Python 3.10:
dotwiz (create): 0.189
dotmap (create): 1.085
dotwiz (get): 0.014
dotmap (get): 0.335
This also works with nested dicts and makes sure that dicts which are appended later behave the same:
class DotDict(dict):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
# Recursively turn nested dicts into DotDicts
for key, value in self.items():
if type(value) is dict:
self[key] = DotDict(value)
def __setitem__(self, key, item):
if type(item) is dict:
item = DotDict(item)
super().__setitem__(key, item)
__setattr__ = __setitem__
__getattr__ = dict.__getitem__
Using namedtuple allows dot access.
It is like a lightweight object which also has the properties of a tuple.
It allows to define properties and access them using the dot operator.
from collections import namedtuple
Data = namedtuple('Data', ['key1', 'key2'])
dataObj = Data(val1, key2=val2) # can instantiate using keyword arguments and positional arguments
Access using dot operator
dataObj.key1 # Gives val1
datObj.key2 # Gives val2
Access using tuple indices
dataObj[0] # Gives val1
dataObj[1] # Gives val2
But remember this is a tuple; not a dict. So the below code will give error
dataObj['key1'] # Gives TypeError: tuple indices must be integers or slices, not str
Refer: namedtuple
It is an old question but I recently found that sklearn has an implemented version dict accessible by key, namely Bunch
https://scikit-learn.org/stable/modules/generated/sklearn.utils.Bunch.html#sklearn.utils.Bunch
Simplest solution.
Define a class with only pass statement in it. Create object for this class and use dot notation.
class my_dict:
pass
person = my_dict()
person.id = 1 # create using dot notation
person.phone = 9999
del person.phone # Remove a property using dot notation
name_data = my_dict()
name_data.first_name = 'Arnold'
name_data.last_name = 'Schwarzenegger'
person.name = name_data
person.name.first_name # dot notation access for nested properties - gives Arnold
One simple way to get dot access (but not array access), is to use a plain object in Python. Like this:
class YourObject:
def __init__(self, *args, **kwargs):
for k, v in kwargs.items():
setattr(self, k, v)
...and use it like this:
>>> obj = YourObject(key="value")
>>> print(obj.key)
"value"
... to convert it to a dict:
>>> print(obj.__dict__)
{"key": "value"}
The answer of #derek73 is very neat, but it cannot be pickled nor (deep)copied, and it returns None for missing keys. The code below fixes this.
Edit: I did not see the answer above that addresses the exact same point (upvoted). I'm leaving the answer here for reference.
class dotdict(dict):
__setattr__ = dict.__setitem__
__delattr__ = dict.__delitem__
def __getattr__(self, name):
try:
return self[name]
except KeyError:
raise AttributeError(name)
I just needed to access a dictionary using a dotted path string, so I came up with:
def get_value_from_path(dictionary, parts):
""" extracts a value from a dictionary using a dotted path string """
if type(parts) is str:
parts = parts.split('.')
if len(parts) > 1:
return get_value_from_path(dictionary[parts[0]], parts[1:])
return dictionary[parts[0]]
a = {'a':{'b':'c'}}
print(get_value_from_path(a, 'a.b')) # c
The implemention used by kaggle_environments is a function called structify.
class Struct(dict):
def __init__(self, **entries):
entries = {k: v for k, v in entries.items() if k != "items"}
dict.__init__(self, entries)
self.__dict__.update(entries)
def __setattr__(self, attr, value):
self.__dict__[attr] = value
self[attr] = value
# Added benefit of cloning lists and dicts.
def structify(o):
if isinstance(o, list):
return [structify(o[i]) for i in range(len(o))]
elif isinstance(o, dict):
return Struct(**{k: structify(v) for k, v in o.items()})
return o
https://github.com/Kaggle/kaggle-environments/blob/master/kaggle_environments/utils.py
This may be useful for testing AI simulation agents in games like ConnectX
from kaggle_environments import structify
obs = structify({ 'remainingOverageTime': 60, 'step': 0, 'mark': 1, 'board': [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]})
conf = structify({ 'timeout': 2, 'actTimeout': 2, 'agentTimeout': 60, 'episodeSteps': 1000, 'runTimeout': 1200, 'columns': 7, 'rows': 6, 'inarow': 4, '__raw_path__': '/kaggle_simulations/agent/main.py' })
def agent(obs, conf):
action = obs.step % conf.columns
return action
Not a direct answer to the OP's question, but inspired by and perhaps useful for some.. I've created an object-based solution using the internal __dict__ (In no way optimized code)
payload = {
"name": "John",
"location": {
"lat": 53.12312312,
"long": 43.21345112
},
"numbers": [
{
"role": "home",
"number": "070-12345678"
},
{
"role": "office",
"number": "070-12345679"
}
]
}
class Map(object):
"""
Dot style access to object members, access raw values
with an underscore e.g.
class Foo(Map):
def foo(self):
return self.get('foo') + 'bar'
obj = Foo(**{'foo': 'foo'})
obj.foo => 'foobar'
obj._foo => 'foo'
"""
def __init__(self, *args, **kwargs):
for arg in args:
if isinstance(arg, dict):
for k, v in arg.iteritems():
self.__dict__[k] = v
self.__dict__['_' + k] = v
if kwargs:
for k, v in kwargs.iteritems():
self.__dict__[k] = v
self.__dict__['_' + k] = v
def __getattribute__(self, attr):
if hasattr(self, 'get_' + attr):
return object.__getattribute__(self, 'get_' + attr)()
else:
return object.__getattribute__(self, attr)
def get(self, key):
try:
return self.__dict__.get('get_' + key)()
except (AttributeError, TypeError):
return self.__dict__.get(key)
def __repr__(self):
return u"<{name} object>".format(
name=self.__class__.__name__
)
class Number(Map):
def get_role(self):
return self.get('role')
def get_number(self):
return self.get('number')
class Location(Map):
def get_latitude(self):
return self.get('lat') + 1
def get_longitude(self):
return self.get('long') + 1
class Item(Map):
def get_name(self):
return self.get('name') + " Doe"
def get_location(self):
return Location(**self.get('location'))
def get_numbers(self):
return [Number(**n) for n in self.get('numbers')]
# Tests
obj = Item({'foo': 'bar'}, **payload)
assert type(obj) == Item
assert obj._name == "John"
assert obj.name == "John Doe"
assert type(obj.location) == Location
assert obj.location._lat == 53.12312312
assert obj.location._long == 43.21345112
assert obj.location.latitude == 54.12312312
assert obj.location.longitude == 44.21345112
for n in obj.numbers:
assert type(n) == Number
if n.role == 'home':
assert n.number == "070-12345678"
if n.role == 'office':
assert n.number == "070-12345679"

Python object.__getattribute__ not behaving correctly after override

I'm trying to replicate mongoengine functionality that lets you define field objects that can be used like normal python objects in the code.
My idea is to create a FieldHolder class that contains the value and (de)serialization logic, and a Document object with overridden __setattr__ and __getattribute__ methods.
In my code draft, if I set x.h to some value, this value gets correctly assigned to x.h._value. When I get x.h, I correctly get x.h._value.
However, I would also like get h as a FieldHolder object and not as its value. I have tried using object.__getattribute__ (inside serialize method), but I'm still getting h._value (object.__getattribute__(self, 'h') returns abc). What am I doing wrong? Thanks
class FieldHolder:
_value = None
# Some serialization and deserialization methods
class Document(object):
h = FieldHolder()
def __setattr__(self, key, value):
attr = getattr(self, key, None)
if attr is not None and isinstance(attr, FieldHolder):
attr._value = value
else:
super().__setattr__(key, value)
def __getattribute__(self, key):
val = super().__getattribute__(key)
if isinstance(val, FieldHolder):
return val._value
else:
return val
def serialize(self):
res = {}
for name, value in vars(self).items():
obj = object.__getattribute__(self, name) # not working as expected
if isinstance(obj, FieldHolder):
res[name] = value
return res
x = Document()
x.h = "abc" # h._value is now "abc"
print(x.h) # prints "abc"
s = x.serialize() # should return {'h': 'abc'} but returns {}
print(s)

Dynamically get dict elements via getattr?

I want to dynamically query which objects from a class I would like to retrieve. getattr seems like what I want, and it performs fine for top-level objects in the class. However, I'd like to also specify sub-elements.
class MyObj(object):
def __init__(self):
self.d = {'a':1, 'b':2}
self.c = 3
myobj = MyObj()
val = getattr(myobj, "c")
print val # Correctly prints 3
val = getattr(myobj, "d['a']") # Seemingly incorrectly formatted query
print val # Throws an AttributeError
How can I get the object's dictionary elements via a string?
The reason you're getting an error is that getattr(myobj, "d['a']") looks for an attribute named d['a'] on the object, and there isn't one. Your attribute is named d and it's a dictionary. Once you have a reference to the dictionary, then you can access items in it.
mydict = getattr(myobj, "d")
val = mydict["a"]
Or as others have shown, you can combine this in one step (I showed it as two to better illustrate what is actually happening):
val = getattr(myobj, "d")["a"]
Your question implies that you think that items of a dictionary in an object are "sub-elements" of the object. An item in a dictionary, however, is a different thing from an attribute of an object. (getattr() wouldn't work with something like o.a either, though; it just gets one attribute of one object. If that's an object too and you want to get one of its attributes, that's another getattr().)
You can pretty easily write a function that walks an attribute path (given in a string) and attempts to resolve each name either as a dictionary key or an attribute:
def resolve(obj, attrspec):
for attr in attrspec.split("."):
try:
obj = obj[attr]
except (TypeError, KeyError):
obj = getattr(obj, attr)
return obj
The basic idea here is that you take a path and for each component of the path, try to find either an item in a dictionary-like container or an attribute on an object. When you get to the end of the path, return what you've got. Your example would be resolve(myobj, "d.a")
You simply use square brackets to get the dictionary's element:
val = getattr(myobj, "d")["a"]
That'll set val to 1.
If you need the dictionary item to be dynamic as well, you'll need to call get on the result of getattr:
value = getattr(myobj, 'd').get('a')
Thanks to Kindall's answer, I found the following works well for dict keys that are stings.
class Obj2(object):
def __init__(self):
self.d = {'a':'A', 'b':'B', 'c': {'three': 3, 'twothree': (2,3)}}
self.c = 4
class MyObj(object):
def __init__(self):
self.d = {'a':1, 'b':2, 'c': {'two': 2, 'onetwo': (1,2)}}
self.c = 3
self.obj2 = Obj2()
def resolve(self, obj, attrspec):
attrssplit = attrspec.split(".")
attr = attrssplit[0]
try:
obj = obj[attr]
except (TypeError, KeyError):
obj = getattr(obj, attr)
if len(attrssplit) > 1:
attrspec = attrspec.partition(".")[2] # right part of the string.
return self.resolve(obj, attrspec) # Recurse
return obj
def __getattr__(self, name):
return self.resolve(self, name)
# Test
myobj = MyObj()
print getattr(myobj, "c")
print getattr(myobj, "d.a")
print getattr(myobj, "d.c.two")
print getattr(myobj, "obj2.d.a")
print getattr(myobj, "obj2.d.c.twothree")

python: method depends on whether param is int or string

Is there a more pythonic way to write __getitem__ than the following? The issue is checking type and doing different things depending on the type of the parameter in the call.
class This():
def __init__(self, name, value):
self.name, self.value = name, value
class That():
def __init__(self):
self.this_list = []
def add_this(self, this):
self.this_list.append(this)
def __getitem__(self, x):
if isinstance(x, int):
return self.this_list[x] # could wrap in try/except for error checking
elif isinstance(x, str):
for this in self.this_list:
if this.name == x:
return this
return None
a = This('a', 1)
b = This('b', 2)
c = That()
c.add_this(a)
c.add_this(b)
print c[1].name
print c['a'].name
There are quite a few options, but I think there is not one best choice. It depends on your use case and preferences. Just to give you a few hints:
Do you really have to store the data in a list? In your example you could use a dictionary and insert the object twice: Once using the integer as key and once using the string as a key. That would make your __getitem__ quite simple. ;-)
Another option would be to make your interface more explicit and use byInt/byString methods. You should choose better names of course.
If you give more details about what you really want to do, I could propose more alternatives.
You are almost always better off testing the behavior of the kind of item you want rather than explicitly testing for type. In your case, I'd simply try to get the desired item by index first and catch TypeError to check by name.
def __getitem__(self, key):
try:
return self.this_list[key]
except TypeError:
try:
return next(item for item in self.this_list if item.name == key)
except StopIteration:
raise KeyError("key `%s` not found" % key)
Note that this will automatically work with slices too, since in this case the key will be a slice object and that will work fine with the [...] notation.
You should probably be using a dict rather than a list inside your class, though, rather than searching a list for an object attribute. Exceptions would be if you really need slicing or if the names can be changed by code outside your class.
Another (perhaps slightly unconventional) possibility is to implement the special method __eq__() on your This class, allowing it to be compared to a string, so that if the class's name attribute is (say) "Jerry", then This("Jerry", 0) == "Jerry". Then you don't actually need the container class and can just use a regular list:
class This(object):
def __init__(self, name, value):
self.name, self.value = name, value
def __eq__(self, other):
return self.name == other
thislist = [This("Jerry", 42), This("Amy", 36)]
"Jerry" in thislist # True
thislist.index("Amy") # 1
The syntax for accessing an item by name is still a little hairy:
thislist[thislist.index("Amy")]
But you can simply subclass list and combine this with my previous suggestion, which becomes simpler and more generic, since it works with any object that knows how to compare itself to whatever kind of key you're using:
class That(list):
def __getitem__(self, key):
try:
return list.__getitem__(self, key)
except TypeError:
return list.__getitem__(self, self.index(key))
thislist = That([This("Jerry", 42), This("Amy", 36)])
thislist["Amy"].value # 36
Is there a more pythonic way to write getitem in the following?
Only slightly. __getitem__ is used by both sequences, where int's and slice's are used, and by mappings, where pretty much anything can be used. It looks like you are implementing both sequence-type and mapping-type interfaces, so you're stuck with checking type.
Missing two things:
support for slices (but only put it in if you want your That to support it)
raising an exception for failure (returning None in this case is not pythonic)
Here's an updated __getitem__:
def __getitem__(self, x):
if isinstance(x, int):
return self.this_list[x]
elif isinstance(x, slice):
return self.this_list[slice]
elif isinstance(x, str):
for this in self.this_list:
if this.name == x:
return this
return None
raise KeyError("invalid key: %r" % x)
At this point you have two possible exceptions being raised
IndexError (if x is outside the range of this_list)
KeyError (if the name is not found, or something besides str or int was passed in)
This may be fine for you, or you might want to create a custom Exception that gets returned in all cases:
class LookupError(Exception):
"x is neither int nor str, or no matching This instance found"
Here's the updated code (Python 2.x):
class LookupError(IndexError, KeyError):
"x is neither int nor str, or no matching This instance found"
class This():
def __init__(self, name, value):
self.name, self.value = name, value
class That(object):
def __init__(self):
self.this_list = []
def add_this(self, this):
self.this_list.append(this)
def __getitem__(self, x):
try:
if isinstance(x, int):
return self.this_list[x]
elif isinstance(x, slice):
return self.this_list[slice]
elif isinstance(x, str):
for this in self.this_list:
if this.name == x:
return this
raise KeyError("invalid key: %r" % x)
except (IndexError, KeyError), err:
raise LookupError(err.message)
a = This('a', 1)
b = This('b', 2)
c = That()
c.add_this(a)
c.add_this(b)
print c[1].name
print c['a'].name
try:
print c[2.0]
except LookupError, e:
print e
try:
print c['c']
except LookupError, e:
print e
You can define two private methods __getitem_int() and __getitem_str(). Then you can use getattr() to get handle to proper method depending of type(x).__name__ and call type-specific method.
See how KantGenerator.parse() is implemented in dive into python parsing xml example.

Recursively convert python object graph to dictionary

I'm trying to convert the data from a simple object graph into a dictionary. I don't need type information or methods and I don't need to be able to convert it back to an object again.
I found this question about creating a dictionary from an object's fields, but it doesn't do it recursively.
Being relatively new to python, I'm concerned that my solution may be ugly, or unpythonic, or broken in some obscure way, or just plain old NIH.
My first attempt appeared to work until I tried it with lists and dictionaries, and it seemed easier just to check if the object passed had an internal dictionary, and if not, to just treat it as a value (rather than doing all that isinstance checking). My previous attempts also didn't recurse into lists of objects:
def todict(obj):
if hasattr(obj, "__iter__"):
return [todict(v) for v in obj]
elif hasattr(obj, "__dict__"):
return dict([(key, todict(value))
for key, value in obj.__dict__.iteritems()
if not callable(value) and not key.startswith('_')])
else:
return obj
This seems to work better and doesn't require exceptions, but again I'm still not sure if there are cases here I'm not aware of where it falls down.
Any suggestions would be much appreciated.
An amalgamation of my own attempt and clues derived from Anurag Uniyal and Lennart Regebro's answers works best for me:
def todict(obj, classkey=None):
if isinstance(obj, dict):
data = {}
for (k, v) in obj.items():
data[k] = todict(v, classkey)
return data
elif hasattr(obj, "_ast"):
return todict(obj._ast())
elif hasattr(obj, "__iter__") and not isinstance(obj, str):
return [todict(v, classkey) for v in obj]
elif hasattr(obj, "__dict__"):
data = dict([(key, todict(value, classkey))
for key, value in obj.__dict__.items()
if not callable(value) and not key.startswith('_')])
if classkey is not None and hasattr(obj, "__class__"):
data[classkey] = obj.__class__.__name__
return data
else:
return obj
One line of code to convert an object to JSON recursively.
import json
def get_json(obj):
return json.loads(
json.dumps(obj, default=lambda o: getattr(o, '__dict__', str(o)))
)
obj = SomeClass()
print("Json = ", get_json(obj))
I don't know what is the purpose of checking for basestring or object is? also dict will not contain any callables unless you have attributes pointing to such callables, but in that case isn't that part of object?
so instead of checking for various types and values, let todict convert the object and if it raises the exception, user the orginal value.
todict will only raise exception if obj doesn't have dict
e.g.
class A(object):
def __init__(self):
self.a1 = 1
class B(object):
def __init__(self):
self.b1 = 1
self.b2 = 2
self.o1 = A()
def func1(self):
pass
def todict(obj):
data = {}
for key, value in obj.__dict__.iteritems():
try:
data[key] = todict(value)
except AttributeError:
data[key] = value
return data
b = B()
print todict(b)
it prints {'b1': 1, 'b2': 2, 'o1': {'a1': 1}}
there may be some other cases to consider, but it may be a good start
special cases
if a object uses slots then you will not be able to get dict e.g.
class A(object):
__slots__ = ["a1"]
def __init__(self):
self.a1 = 1
fix for the slots cases can be to use dir() instead of directly using the dict
I realize that this answer is a few years too late, but I thought it might be worth sharing since it's a Python 3.3+ compatible modification to the original solution by #Shabbyrobe that has generally worked well for me:
import collections
try:
# Python 2.7+
basestring
except NameError:
# Python 3.3+
basestring = str
def todict(obj):
"""
Recursively convert a Python object graph to sequences (lists)
and mappings (dicts) of primitives (bool, int, float, string, ...)
"""
if isinstance(obj, basestring):
return obj
elif isinstance(obj, dict):
return dict((key, todict(val)) for key, val in obj.items())
elif isinstance(obj, collections.Iterable):
return [todict(val) for val in obj]
elif hasattr(obj, '__dict__'):
return todict(vars(obj))
elif hasattr(obj, '__slots__'):
return todict(dict((name, getattr(obj, name)) for name in getattr(obj, '__slots__')))
return obj
If you're not interested in callable attributes, for example, they can be stripped in the dictionary comprehension:
elif isinstance(obj, dict):
return dict((key, todict(val)) for key, val in obj.items() if not callable(val))
A slow but easy way to do this is to use jsonpickle to convert the object to a JSON string and then json.loads to convert it back to a python dictionary:
dict = json.loads(jsonpickle.encode( obj, unpicklable=False ))
In Python there are many ways of making objects behave slightly differently, like metaclasses and whatnot, and it can override getattr and thereby have "magical" attributes you can't see through dict, etc. In short, it's unlikely that you are going to get a 100% complete picture in the generic case with whatever method you use.
Therefore, the answer is: If it works for you in the use case you have now, then the code is correct. ;-)
To make somewhat more generic code you could do something like this:
import types
def todict(obj):
# Functions, methods and None have no further info of interest.
if obj is None or isinstance(subobj, (types.FunctionType, types.MethodType))
return obj
try: # If it's an iterable, return all the contents
return [todict(x) for x in iter(obj)]
except TypeError:
pass
try: # If it's a dictionary, recurse over it:
result = {}
for key in obj:
result[key] = todict(obj)
return result
except TypeError:
pass
# It's neither a list nor a dict, so it's a normal object.
# Get everything from dir and __dict__. That should be most things we can get hold of.
attrs = set(dir(obj))
try:
attrs.update(obj.__dict__.keys())
except AttributeError:
pass
result = {}
for attr in attrs:
result[attr] = todict(getattr(obj, attr, None))
return result
Something like that. That code is untested, though. This still doesn't cover the case when you override getattr, and I'm sure there are many more cases that it doens't cover and may not be coverable. :)
No custom implementation is required. jsons library can be used.
import jsons
object_dict = jsons.dump(object_instance)
Thanks #AnuragUniyal!
You made my day!
This is my variant of code that's working for me:
# noinspection PyProtectedMember
def object_to_dict(obj):
data = {}
if getattr(obj, '__dict__', None):
for key, value in obj.__dict__.items():
try:
data[key] = object_to_dict(value)
except AttributeError:
data[key] = value
return data
else:
return obj
A little update to Shabbyrobe's answer to make it work for namedtuples:
def obj2dict(obj, classkey=None):
if isinstance(obj, dict):
data = {}
for (k, v) in obj.items():
data[k] = obj2dict(v, classkey)
return data
elif hasattr(obj, "_asdict"):
return obj2dict(obj._asdict())
elif hasattr(obj, "_ast"):
return obj2dict(obj._ast())
elif hasattr(obj, "__iter__"):
return [obj2dict(v, classkey) for v in obj]
elif hasattr(obj, "__dict__"):
data = dict([(key, obj2dict(value, classkey))
for key, value in obj.__dict__.iteritems()
if not callable(value) and not key.startswith('_')])
if classkey is not None and hasattr(obj, "__class__"):
data[classkey] = obj.__class__.__name__
return data
else:
return obj
def list_object_to_dict(lst):
return_list = []
for l in lst:
return_list.append(object_to_dict(l))
return return_list
def object_to_dict(object):
dict = vars(object)
for k,v in dict.items():
if type(v).__name__ not in ['list', 'dict', 'str', 'int', 'float']:
dict[k] = object_to_dict(v)
if type(v) is list:
dict[k] = list_object_to_dict(v)
return dict
Looked at all solutions, and #hbristow's answer was closest to what I was looking for.
Added enum.Enum handling since this was causing a RecursionError: maximum recursion depth exceeded error and reordered objects with __slots__ to have precedence of objects defining __dict__.
def todict(obj):
"""
Recursively convert a Python object graph to sequences (lists)
and mappings (dicts) of primitives (bool, int, float, string, ...)
"""
if isinstance(obj, str):
return obj
elif isinstance(obj, enum.Enum):
return str(obj)
elif isinstance(obj, dict):
return dict((key, todict(val)) for key, val in obj.items())
elif isinstance(obj, collections.Iterable):
return [todict(val) for val in obj]
elif hasattr(obj, '__slots__'):
return todict(dict((name, getattr(obj, name)) for name in getattr(obj, '__slots__')))
elif hasattr(obj, '__dict__'):
return todict(vars(obj))
return obj
I'd comment on the accepted answer but my rep is not high enough...
The accepted answer is great but add another elif just after the if to support NamedTuples serialization to dict properly too:
elif hasattr(obj, "_asdict"):
return todict(obj._asdict())
Well. Added functionality of limiting the depth to #Shabbyrobe answer. Thought it might be worth for the objects which loop back.
def todict(obj, limit=sys.getrecursionlimit(), classkey=None):
if isinstance(obj, dict):
if limit>=1:
data = {}
for (k, v) in obj.items():
data[k] = todict(v, limit-1,classkey)
return data
else:
return 'class:'+obj.__class__.__name__
elif hasattr(obj, "_ast"):
return todict(obj._ast(), limit-1) if limit>=1 else {'class:'+obj.__class__.__name__}
elif hasattr(obj, "__iter__") and not isinstance(obj, str):
return [todict(v, limit-1, classkey) for v in obj] if limit>=1 else {'class:'+obj.__class__.__name__}
elif hasattr(obj, "__dict__"):
if limit>=1:
data = dict([(key, todict(value, limit-1, classkey))
for key, value in obj.__dict__.items()
if not callable(value) and not key.startswith('_')])
if classkey is not None and hasattr(obj, "__class__"):
data[classkey] = obj.__class__.__name__
return data
else:
return 'class:'+obj.__class__.__name__
else:
return obj
previous answers not work when class field is class instance. use this:
from dataclasses import dataclass, field
#dataclass
class BaseNumber:
number:str = ''
probability:float = 0.
#dataclass
class ContainerInfo:
type:str = ''
height:int = ''
width:str = ''
length:str = ''
#dataclass
class AdditionalNumber:
number:str = ''
prob:float = 0.
info:ContainerInfo = ContainerInfo()
#dataclass
class ContainerData:
container_number = BaseNumber()
container_type = AdditionalNumber()
errors:list = field(default_factory=list)
def todict(self, obj='sadasdas'):
if obj == 'sadasdas':
obj = self
if isinstance(obj, dict):
data = {}
for (k, v) in obj.items():
data[k] = self.todict(v)
return data
elif hasattr(obj, "_ast"):
return self.todict(obj._ast())
elif hasattr(obj, "__iter__") and not isinstance(obj, str):
return [self.todict(v) for v in obj]
elif hasattr(obj, "__dict__"):
aaa = dir(obj)
data = dict([(key, self.todict(value))
for key, value in {field: getattr(obj, field) for field in dir(obj)}.items()
if not callable(value) and not key.startswith('_')
])
return data
else:
return obj

Categories