Creating a custom Counter object with special set function - python

From Adding a single character to add keys in Counter , #AshwiniChaudhary gave an excellent answer to create a new Counter object with a different set() function:
from collections import Counter
class CustomCounter(Counter):
def __setitem__(self, key, value):
if len(key) > 1 and not key.endswith(u"\uE000"):
key += u"\uE000"
super(CustomCounter, self).__setitem__(key, value)
To allow user-defined char/str to append to the key, I've tried:
from collections import Counter, defaultdict
class AppendedStrCounter(Counter):
def __init__(self, str_to_append):
self._appended_str = str_to_append
super(AppendedStrCounter, self).__init__()
def __setitem__(self, key, value):
if len(key) > 1 and not key.endswith(self._appended_str):
key += self._appended_str
super(AppendedStrCounter, self).__setitem__(tuple(key), value)
But it's returning an empty Counter:
>>> class AppendedStrCounter(Counter):
... def __init__(self, str_to_append):
... self._appended_str = str_to_append
... super(AppendedStrCounter, self).__init__()
... def __setitem__(self, key, value):
... if len(key) > 1 and not key.endswith(self._appended_str):
... key += self._appended_str
... super(AppendedStrCounter, self).__setitem__(tuple(key), value)
...
>>> AppendedStrCounter('foo bar bar blah'.split())
AppendedStrCounter()
That's because I'm missing the iter in the __init__():
from collections import Counter, defaultdict
class AppendedStrCounter(Counter):
def __init__(self, iter, str_to_append):
self._appended_str = str_to_append
super(AppendedStrCounter, self).__init__(iter)
def __setitem__(self, key, value):
if len(key) > 1 and not key.endswith(self._appended_str):
key += self._appended_str
super(AppendedStrCounter, self).__setitem__(tuple(key), value)
[out]:
>>> AppendedStrCounter('foo bar bar blah'.split(), u'\ue000')
AppendedStrCounter({('f', 'o', 'o', '\ue000'): 1, ('b', 'a', 'r', '\ue000'): 1, ('b', 'l', 'a', 'h', '\ue000'): 1})
But the value for 'bar' is wrong, it should be 2 instead of 1.
Is using iter to the __init__() the right way to initialize the Counter?

As pointed out in
Felix's comment,
collections.Counter
does not document how its __init__ method adds keys or sets values, only that it does.
Since it is not explicitly designed for subclassing, the wisest thing to do is not subclass it.
The
collections.abc
module exists to provide easily-subclassed abstract classes of Python's builtin types, including dict
(MutableMapping, in ABC terms).
So, if all you need is "a Counter-like class"
(as opposed to "a subclass of Counter that will satisfy builtins like isinstance and issubclass),
you can create your own MutableMapping that has-a Counter, and then "middleman" the initializer and the three methods that Counter adds to the typical dict:
import collections
import collections.abc
def _identity(s):
'''
Default mutator function.
'''
return s
class CustomCounter(collections.abc.MutableMapping):
'''
Overrides the 5 methods of a MutableMapping:
__getitem__, __setitem__, __delitem__, __iter__, __len__
...and the 3 non-Mapping methods of Counter:
elements, most_common, subtract
'''
def __init__(self, values=None, *, mutator=_identity):
self._mutator = mutator
if values is None:
self._counter = collections.Counter()
else:
values = (self._mutator(v) for v in values)
self._counter = collections.Counter(values)
return
def __getitem__(self, item):
return self._counter[self._mutator(item)]
def __setitem__(self, item, value):
self._counter[self._mutator(item)] = value
return
def __delitem__(self, item):
del self._counter[self._mutator(item)]
return
def __iter__(self):
return iter(self._counter)
def __len__(self):
return len(self._counter)
def __repr__(self):
return ''.join([
self.__class__.__name__,
'(',
repr(dict(self._counter)),
')'
])
def elements(self):
return self._counter.elements()
def most_common(self, n):
return self._counter.most_common(n)
def subtract(self, values):
if isinstance(values, collections.abc.Mapping):
values = {self._mutator(k): v for k, v in values.items()}
return self._counter.subtract(values)
else:
values = (self._mutator(v) for v in values)
return self._counter.subtract(values)
def main():
def mutator(s):
# Asterisks are easier to print than '\ue000'.
return '*' + s + '*'
words = 'the lazy fox jumps over the brown dog'.split()
# Test None (allowed by collections.Counter).
ctr_none = CustomCounter(None)
assert 0 == len(ctr_none)
# Test typical dict and collections.Counter methods.
ctr = CustomCounter(words, mutator=mutator)
print(ctr)
assert 1 == ctr['dog']
assert 2 == ctr['the']
assert 7 == len(ctr)
del(ctr['lazy'])
assert 6 == len(ctr)
ctr.subtract(['jumps', 'dog'])
assert 0 == ctr['dog']
assert 6 == len(ctr)
ctr.subtract({'the': 5, 'bogus': 100})
assert -3 == ctr['the']
assert -100 == ctr['bogus']
assert 7 == len(ctr)
return
if "__main__" == __name__:
main()
Output (line-wrapped, for ease of reading):
CustomCounter({
'*brown*': 1,
'*lazy*': 1,
'*the*': 2,
'*over*': 1,
'*jumps*': 1,
'*fox*': 1,
'*dog*': 1
})
I added a keyword-only argument to the initializer, mutator, to store the function that converts real-world whatevers to the "mutant" counted versions.
Note that this likely means that CustomCounter no longer stores "hashable objects", but "hashable objects that don't make the mutator barf".
Also, if the standard library's Counter ever gets new methods, you'll have to update CustomCounter to "override" them.
(You could maybe work around that by using
__getattr__
to pass any unknown attributes to self._counter, but any keys in the arguments will be handed to the Counter in their raw, "un-mutated" form.
Finally, as I noted before, it's not actually a subclass of collections.Counter, if other code is specifically looking for one.

Related

Why Python dict have attribute statuses but dot operator does not work? [duplicate]

How do I make Python dictionary members accessible via a dot "."?
For example, instead of writing mydict['val'], I'd like to write mydict.val.
Also I'd like to access nested dicts this way. For example
mydict.mydict2.val
would refer to
mydict = { 'mydict2': { 'val': ... } }
I've always kept this around in a util file. You can use it as a mixin on your own classes too.
class dotdict(dict):
"""dot.notation access to dictionary attributes"""
__getattr__ = dict.get
__setattr__ = dict.__setitem__
__delattr__ = dict.__delitem__
mydict = {'val':'it works'}
nested_dict = {'val':'nested works too'}
mydict = dotdict(mydict)
mydict.val
# 'it works'
mydict.nested = dotdict(nested_dict)
mydict.nested.val
# 'nested works too'
You can do it using this class I just made. With this class you can use the Map object like another dictionary(including json serialization) or with the dot notation. I hope to help you:
class Map(dict):
"""
Example:
m = Map({'first_name': 'Eduardo'}, last_name='Pool', age=24, sports=['Soccer'])
"""
def __init__(self, *args, **kwargs):
super(Map, self).__init__(*args, **kwargs)
for arg in args:
if isinstance(arg, dict):
for k, v in arg.iteritems():
self[k] = v
if kwargs:
for k, v in kwargs.iteritems():
self[k] = v
def __getattr__(self, attr):
return self.get(attr)
def __setattr__(self, key, value):
self.__setitem__(key, value)
def __setitem__(self, key, value):
super(Map, self).__setitem__(key, value)
self.__dict__.update({key: value})
def __delattr__(self, item):
self.__delitem__(item)
def __delitem__(self, key):
super(Map, self).__delitem__(key)
del self.__dict__[key]
Usage examples:
m = Map({'first_name': 'Eduardo'}, last_name='Pool', age=24, sports=['Soccer'])
# Add new key
m.new_key = 'Hello world!'
# Or
m['new_key'] = 'Hello world!'
print m.new_key
print m['new_key']
# Update values
m.new_key = 'Yay!'
# Or
m['new_key'] = 'Yay!'
# Delete key
del m.new_key
# Or
del m['new_key']
Install dotmap via pip
pip install dotmap
It does everything you want it to do and subclasses dict, so it operates like a normal dictionary:
from dotmap import DotMap
m = DotMap()
m.hello = 'world'
m.hello
m.hello += '!'
# m.hello and m['hello'] now both return 'world!'
m.val = 5
m.val2 = 'Sam'
On top of that, you can convert it to and from dict objects:
d = m.toDict()
m = DotMap(d) # automatic conversion in constructor
This means that if something you want to access is already in dict form, you can turn it into a DotMap for easy access:
import json
jsonDict = json.loads(text)
data = DotMap(jsonDict)
print data.location.city
Finally, it automatically creates new child DotMap instances so you can do things like this:
m = DotMap()
m.people.steve.age = 31
Comparison to Bunch
Full disclosure: I am the creator of the DotMap. I created it because Bunch was missing these features
remembering the order items are added and iterating in that order
automatic child DotMap creation, which saves time and makes for cleaner code when you have a lot of hierarchy
constructing from a dict and recursively converting all child dict instances to DotMap
Derive from dict and and implement __getattr__ and __setattr__.
Or you can use Bunch which is very similar.
I don't think it's possible to monkeypatch built-in dict class.
Use SimpleNamespace:
>>> from types import SimpleNamespace
>>> d = dict(x=[1, 2], y=['a', 'b'])
>>> ns = SimpleNamespace(**d)
>>> ns.x
[1, 2]
>>> ns
namespace(x=[1, 2], y=['a', 'b'])
Fabric has a really nice, minimal implementation. Extending that to allow for nested access, we can use a defaultdict, and the result looks something like this:
from collections import defaultdict
class AttributeDict(defaultdict):
def __init__(self):
super(AttributeDict, self).__init__(AttributeDict)
def __getattr__(self, key):
try:
return self[key]
except KeyError:
raise AttributeError(key)
def __setattr__(self, key, value):
self[key] = value
Make use of it as follows:
keys = AttributeDict()
keys.abc.xyz.x = 123
keys.abc.xyz.a.b.c = 234
That elaborates a bit on Kugel's answer of "Derive from dict and and implement __getattr__ and __setattr__". Now you know how!
I tried this:
class dotdict(dict):
def __getattr__(self, name):
return self[name]
you can try __getattribute__ too.
make every dict a type of dotdict would be good enough, if you want to init this from a multi-layer dict, try implement __init__ too.
I recently came across the 'Box' library which does the same thing.
Installation command : pip install python-box
Example:
from box import Box
mydict = {"key1":{"v1":0.375,
"v2":0.625},
"key2":0.125,
}
mydict = Box(mydict)
print(mydict.key1.v1)
I found it to be more effective than other existing libraries like dotmap, which generate python recursion error when you have large nested dicts.
link to library and details: https://pypi.org/project/python-box/
If you want to pickle your modified dictionary, you need to add few state methods to above answers:
class DotDict(dict):
"""dot.notation access to dictionary attributes"""
def __getattr__(self, attr):
return self.get(attr)
__setattr__= dict.__setitem__
__delattr__= dict.__delitem__
def __getstate__(self):
return self
def __setstate__(self, state):
self.update(state)
self.__dict__ = self
You can achieve this using SimpleNamespace
from types import SimpleNamespace
# Assign values
args = SimpleNamespace()
args.username = 'admin'
# Retrive values
print(args.username) # output: admin
Don't. Attribute access and indexing are separate things in Python, and you shouldn't want them to perform the same. Make a class (possibly one made by namedtuple) if you have something that should have accessible attributes and use [] notation to get an item from a dict.
To build upon epool's answer, this version allows you to access any dict inside via the dot operator:
foo = {
"bar" : {
"baz" : [ {"boo" : "hoo"} , {"baba" : "loo"} ]
}
}
For instance, foo.bar.baz[1].baba returns "loo".
class Map(dict):
def __init__(self, *args, **kwargs):
super(Map, self).__init__(*args, **kwargs)
for arg in args:
if isinstance(arg, dict):
for k, v in arg.items():
if isinstance(v, dict):
v = Map(v)
if isinstance(v, list):
self.__convert(v)
self[k] = v
if kwargs:
for k, v in kwargs.items():
if isinstance(v, dict):
v = Map(v)
elif isinstance(v, list):
self.__convert(v)
self[k] = v
def __convert(self, v):
for elem in range(0, len(v)):
if isinstance(v[elem], dict):
v[elem] = Map(v[elem])
elif isinstance(v[elem], list):
self.__convert(v[elem])
def __getattr__(self, attr):
return self.get(attr)
def __setattr__(self, key, value):
self.__setitem__(key, value)
def __setitem__(self, key, value):
super(Map, self).__setitem__(key, value)
self.__dict__.update({key: value})
def __delattr__(self, item):
self.__delitem__(item)
def __delitem__(self, key):
super(Map, self).__delitem__(key)
del self.__dict__[key]
Building on Kugel's answer and taking Mike Graham's words of caution into consideration, what if we make a wrapper?
class DictWrap(object):
""" Wrap an existing dict, or create a new one, and access with either dot
notation or key lookup.
The attribute _data is reserved and stores the underlying dictionary.
When using the += operator with create=True, the empty nested dict is
replaced with the operand, effectively creating a default dictionary
of mixed types.
args:
d({}): Existing dict to wrap, an empty dict is created by default
create(True): Create an empty, nested dict instead of raising a KeyError
example:
>>>dw = DictWrap({'pp':3})
>>>dw.a.b += 2
>>>dw.a.b += 2
>>>dw.a['c'] += 'Hello'
>>>dw.a['c'] += ' World'
>>>dw.a.d
>>>print dw._data
{'a': {'c': 'Hello World', 'b': 4, 'd': {}}, 'pp': 3}
"""
def __init__(self, d=None, create=True):
if d is None:
d = {}
supr = super(DictWrap, self)
supr.__setattr__('_data', d)
supr.__setattr__('__create', create)
def __getattr__(self, name):
try:
value = self._data[name]
except KeyError:
if not super(DictWrap, self).__getattribute__('__create'):
raise
value = {}
self._data[name] = value
if hasattr(value, 'items'):
create = super(DictWrap, self).__getattribute__('__create')
return DictWrap(value, create)
return value
def __setattr__(self, name, value):
self._data[name] = value
def __getitem__(self, key):
try:
value = self._data[key]
except KeyError:
if not super(DictWrap, self).__getattribute__('__create'):
raise
value = {}
self._data[key] = value
if hasattr(value, 'items'):
create = super(DictWrap, self).__getattribute__('__create')
return DictWrap(value, create)
return value
def __setitem__(self, key, value):
self._data[key] = value
def __iadd__(self, other):
if self._data:
raise TypeError("A Nested dict will only be replaced if it's empty")
else:
return other
Use __getattr__, very simple, works in
Python 3.4.3
class myDict(dict):
def __getattr__(self,val):
return self[val]
blockBody=myDict()
blockBody['item1']=10000
blockBody['item2']="StackOverflow"
print(blockBody.item1)
print(blockBody.item2)
Output:
10000
StackOverflow
I like the Munch and it gives lot of handy options on top of dot access.
import munch
temp_1 = {'person': { 'fname': 'senthil', 'lname': 'ramalingam'}}
dict_munch = munch.munchify(temp_1)
dict_munch.person.fname
The language itself doesn't support this, but sometimes this is still a useful requirement. Besides the Bunch recipe, you can also write a little method which can access a dictionary using a dotted string:
def get_var(input_dict, accessor_string):
"""Gets data from a dictionary using a dotted accessor-string"""
current_data = input_dict
for chunk in accessor_string.split('.'):
current_data = current_data.get(chunk, {})
return current_data
which would support something like this:
>> test_dict = {'thing': {'spam': 12, 'foo': {'cheeze': 'bar'}}}
>> output = get_var(test_dict, 'thing.spam.foo.cheeze')
>> print output
'bar'
>>
I ended up trying BOTH the AttrDict and the Bunch libraries and found them to be way to slow for my uses. After a friend and I looked into it, we found that the main method for writing these libraries results in the library aggressively recursing through a nested object and making copies of the dictionary object throughout. With this in mind, we made two key changes. 1) We made attributes lazy-loaded 2) instead of creating copies of a dictionary object, we create copies of a light-weight proxy object. This is the final implementation. The performance increase of using this code is incredible. When using AttrDict or Bunch, these two libraries alone consumed 1/2 and 1/3 respectively of my request time(what!?). This code reduced that time to almost nothing(somewhere in the range of 0.5ms). This of course depends on your needs, but if you are using this functionality quite a bit in your code, definitely go with something simple like this.
class DictProxy(object):
def __init__(self, obj):
self.obj = obj
def __getitem__(self, key):
return wrap(self.obj[key])
def __getattr__(self, key):
try:
return wrap(getattr(self.obj, key))
except AttributeError:
try:
return self[key]
except KeyError:
raise AttributeError(key)
# you probably also want to proxy important list properties along like
# items(), iteritems() and __len__
class ListProxy(object):
def __init__(self, obj):
self.obj = obj
def __getitem__(self, key):
return wrap(self.obj[key])
# you probably also want to proxy important list properties along like
# __iter__ and __len__
def wrap(value):
if isinstance(value, dict):
return DictProxy(value)
if isinstance(value, (tuple, list)):
return ListProxy(value)
return value
See the original implementation here by https://stackoverflow.com/users/704327/michael-merickel.
The other thing to note, is that this implementation is pretty simple and doesn't implement all of the methods you might need. You'll need to write those as required on the DictProxy or ListProxy objects.
def dict_to_object(dick):
# http://stackoverflow.com/a/1305663/968442
class Struct:
def __init__(self, **entries):
self.__dict__.update(entries)
return Struct(**dick)
If one decides to permanently convert that dict to object this should do. You can create a throwaway object just before accessing.
d = dict_to_object(d)
This solution is a refinement upon the one offered by epool to address the requirement of the OP to access nested dicts in a consistent manner. The solution by epool did not allow for accessing nested dicts.
class YAMLobj(dict):
def __init__(self, args):
super(YAMLobj, self).__init__(args)
if isinstance(args, dict):
for k, v in args.iteritems():
if not isinstance(v, dict):
self[k] = v
else:
self.__setattr__(k, YAMLobj(v))
def __getattr__(self, attr):
return self.get(attr)
def __setattr__(self, key, value):
self.__setitem__(key, value)
def __setitem__(self, key, value):
super(YAMLobj, self).__setitem__(key, value)
self.__dict__.update({key: value})
def __delattr__(self, item):
self.__delitem__(item)
def __delitem__(self, key):
super(YAMLobj, self).__delitem__(key)
del self.__dict__[key]
With this class, one can now do something like: A.B.C.D.
For infinite levels of nesting of dicts, lists, lists of dicts, and dicts of lists.
It also supports pickling
This is an extension of this answer.
class DotDict(dict):
# https://stackoverflow.com/a/70665030/913098
"""
Example:
m = Map({'first_name': 'Eduardo'}, last_name='Pool', age=24, sports=['Soccer'])
Iterable are assumed to have a constructor taking list as input.
"""
def __init__(self, *args, **kwargs):
super(DotDict, self).__init__(*args, **kwargs)
args_with_kwargs = []
for arg in args:
args_with_kwargs.append(arg)
args_with_kwargs.append(kwargs)
args = args_with_kwargs
for arg in args:
if isinstance(arg, dict):
for k, v in arg.items():
self[k] = v
if isinstance(v, dict):
self[k] = DotDict(v)
elif isinstance(v, str) or isinstance(v, bytes):
self[k] = v
elif isinstance(v, Iterable):
klass = type(v)
map_value: List[Any] = []
for e in v:
map_e = DotDict(e) if isinstance(e, dict) else e
map_value.append(map_e)
self[k] = klass(map_value)
def __getattr__(self, attr):
return self.get(attr)
def __setattr__(self, key, value):
self.__setitem__(key, value)
def __setitem__(self, key, value):
super(DotDict, self).__setitem__(key, value)
self.__dict__.update({key: value})
def __delattr__(self, item):
self.__delitem__(item)
def __delitem__(self, key):
super(DotDict, self).__delitem__(key)
del self.__dict__[key]
def __getstate__(self):
return self.__dict__
def __setstate__(self, d):
self.__dict__.update(d)
if __name__ == "__main__":
import pickle
def test_map():
d = {
"a": 1,
"b": {
"c": "d",
"e": 2,
"f": None
},
"g": [],
"h": [1, "i"],
"j": [1, "k", {}],
"l":
[
1,
"m",
{
"n": [3],
"o": "p",
"q": {
"r": "s",
"t": ["u", 5, {"v": "w"}, ],
"x": ("z", 1)
}
}
],
}
map_d = DotDict(d)
w = map_d.l[2].q.t[2].v
assert w == "w"
pickled = pickle.dumps(map_d)
unpickled = pickle.loads(pickled)
assert unpickled == map_d
kwargs_check = DotDict(a=1, b=[dict(c=2, d="3"), 5])
assert kwargs_check.b[0].d == "3"
kwargs_and_args_check = DotDict(d, a=1, b=[dict(c=2, d="3"), 5])
assert kwargs_and_args_check.l[2].q.t[2].v == "w"
assert kwargs_and_args_check.b[0].d == "3"
test_map()
I dislike adding another log to a (more than) 10-year old fire, but I'd also check out the dotwiz library, which I've recently released - just this year actually.
It's a relatively tiny library, which also performs really well for get (access) and set (create) times in benchmarks, at least as compared to other alternatives.
Install dotwiz via pip
pip install dotwiz
It does everything you want it to do and subclasses dict, so it operates like a normal dictionary:
from dotwiz import DotWiz
dw = DotWiz()
dw.hello = 'world'
dw.hello
dw.hello += '!'
# dw.hello and dw['hello'] now both return 'world!'
dw.val = 5
dw.val2 = 'Sam'
On top of that, you can convert it to and from dict objects:
d = dw.to_dict()
dw = DotWiz(d) # automatic conversion in constructor
This means that if something you want to access is already in dict form, you can turn it into a DotWiz for easy access:
import json
json_dict = json.loads(text)
data = DotWiz(json_dict)
print data.location.city
Finally, something exciting I am working on is an existing feature request so that it automatically creates new child DotWiz instances so you can do things like this:
dw = DotWiz()
dw['people.steve.age'] = 31
dw
# ✫(people=✫(steve=✫(age=31)))
Comparison with dotmap
I've added a quick and dirty performance comparison with dotmap below.
First, install both libraries with pip:
pip install dotwiz dotmap
I came up with the following code for benchmark purposes:
from timeit import timeit
from dotwiz import DotWiz
from dotmap import DotMap
d = {'hey': {'so': [{'this': {'is': {'pretty': {'cool': True}}}}]}}
dw = DotWiz(d)
# ✫(hey=✫(so=[✫(this=✫(is=✫(pretty={'cool'})))]))
dm = DotMap(d)
# DotMap(hey=DotMap(so=[DotMap(this=DotMap(is=DotMap(pretty={'cool'})))]))
assert dw.hey.so[0].this['is'].pretty.cool == dm.hey.so[0].this['is'].pretty.cool
n = 100_000
print('dotwiz (create): ', round(timeit('DotWiz(d)', number=n, globals=globals()), 3))
print('dotmap (create): ', round(timeit('DotMap(d)', number=n, globals=globals()), 3))
print('dotwiz (get): ', round(timeit("dw.hey.so[0].this['is'].pretty.cool", number=n, globals=globals()), 3))
print('dotmap (get): ', round(timeit("dm.hey.so[0].this['is'].pretty.cool", number=n, globals=globals()), 3))
Results, on my M1 Mac, running Python 3.10:
dotwiz (create): 0.189
dotmap (create): 1.085
dotwiz (get): 0.014
dotmap (get): 0.335
This also works with nested dicts and makes sure that dicts which are appended later behave the same:
class DotDict(dict):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
# Recursively turn nested dicts into DotDicts
for key, value in self.items():
if type(value) is dict:
self[key] = DotDict(value)
def __setitem__(self, key, item):
if type(item) is dict:
item = DotDict(item)
super().__setitem__(key, item)
__setattr__ = __setitem__
__getattr__ = dict.__getitem__
Using namedtuple allows dot access.
It is like a lightweight object which also has the properties of a tuple.
It allows to define properties and access them using the dot operator.
from collections import namedtuple
Data = namedtuple('Data', ['key1', 'key2'])
dataObj = Data(val1, key2=val2) # can instantiate using keyword arguments and positional arguments
Access using dot operator
dataObj.key1 # Gives val1
datObj.key2 # Gives val2
Access using tuple indices
dataObj[0] # Gives val1
dataObj[1] # Gives val2
But remember this is a tuple; not a dict. So the below code will give error
dataObj['key1'] # Gives TypeError: tuple indices must be integers or slices, not str
Refer: namedtuple
It is an old question but I recently found that sklearn has an implemented version dict accessible by key, namely Bunch
https://scikit-learn.org/stable/modules/generated/sklearn.utils.Bunch.html#sklearn.utils.Bunch
Simplest solution.
Define a class with only pass statement in it. Create object for this class and use dot notation.
class my_dict:
pass
person = my_dict()
person.id = 1 # create using dot notation
person.phone = 9999
del person.phone # Remove a property using dot notation
name_data = my_dict()
name_data.first_name = 'Arnold'
name_data.last_name = 'Schwarzenegger'
person.name = name_data
person.name.first_name # dot notation access for nested properties - gives Arnold
One simple way to get dot access (but not array access), is to use a plain object in Python. Like this:
class YourObject:
def __init__(self, *args, **kwargs):
for k, v in kwargs.items():
setattr(self, k, v)
...and use it like this:
>>> obj = YourObject(key="value")
>>> print(obj.key)
"value"
... to convert it to a dict:
>>> print(obj.__dict__)
{"key": "value"}
The answer of #derek73 is very neat, but it cannot be pickled nor (deep)copied, and it returns None for missing keys. The code below fixes this.
Edit: I did not see the answer above that addresses the exact same point (upvoted). I'm leaving the answer here for reference.
class dotdict(dict):
__setattr__ = dict.__setitem__
__delattr__ = dict.__delitem__
def __getattr__(self, name):
try:
return self[name]
except KeyError:
raise AttributeError(name)
I just needed to access a dictionary using a dotted path string, so I came up with:
def get_value_from_path(dictionary, parts):
""" extracts a value from a dictionary using a dotted path string """
if type(parts) is str:
parts = parts.split('.')
if len(parts) > 1:
return get_value_from_path(dictionary[parts[0]], parts[1:])
return dictionary[parts[0]]
a = {'a':{'b':'c'}}
print(get_value_from_path(a, 'a.b')) # c
The implemention used by kaggle_environments is a function called structify.
class Struct(dict):
def __init__(self, **entries):
entries = {k: v for k, v in entries.items() if k != "items"}
dict.__init__(self, entries)
self.__dict__.update(entries)
def __setattr__(self, attr, value):
self.__dict__[attr] = value
self[attr] = value
# Added benefit of cloning lists and dicts.
def structify(o):
if isinstance(o, list):
return [structify(o[i]) for i in range(len(o))]
elif isinstance(o, dict):
return Struct(**{k: structify(v) for k, v in o.items()})
return o
https://github.com/Kaggle/kaggle-environments/blob/master/kaggle_environments/utils.py
This may be useful for testing AI simulation agents in games like ConnectX
from kaggle_environments import structify
obs = structify({ 'remainingOverageTime': 60, 'step': 0, 'mark': 1, 'board': [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]})
conf = structify({ 'timeout': 2, 'actTimeout': 2, 'agentTimeout': 60, 'episodeSteps': 1000, 'runTimeout': 1200, 'columns': 7, 'rows': 6, 'inarow': 4, '__raw_path__': '/kaggle_simulations/agent/main.py' })
def agent(obs, conf):
action = obs.step % conf.columns
return action
Not a direct answer to the OP's question, but inspired by and perhaps useful for some.. I've created an object-based solution using the internal __dict__ (In no way optimized code)
payload = {
"name": "John",
"location": {
"lat": 53.12312312,
"long": 43.21345112
},
"numbers": [
{
"role": "home",
"number": "070-12345678"
},
{
"role": "office",
"number": "070-12345679"
}
]
}
class Map(object):
"""
Dot style access to object members, access raw values
with an underscore e.g.
class Foo(Map):
def foo(self):
return self.get('foo') + 'bar'
obj = Foo(**{'foo': 'foo'})
obj.foo => 'foobar'
obj._foo => 'foo'
"""
def __init__(self, *args, **kwargs):
for arg in args:
if isinstance(arg, dict):
for k, v in arg.iteritems():
self.__dict__[k] = v
self.__dict__['_' + k] = v
if kwargs:
for k, v in kwargs.iteritems():
self.__dict__[k] = v
self.__dict__['_' + k] = v
def __getattribute__(self, attr):
if hasattr(self, 'get_' + attr):
return object.__getattribute__(self, 'get_' + attr)()
else:
return object.__getattribute__(self, attr)
def get(self, key):
try:
return self.__dict__.get('get_' + key)()
except (AttributeError, TypeError):
return self.__dict__.get(key)
def __repr__(self):
return u"<{name} object>".format(
name=self.__class__.__name__
)
class Number(Map):
def get_role(self):
return self.get('role')
def get_number(self):
return self.get('number')
class Location(Map):
def get_latitude(self):
return self.get('lat') + 1
def get_longitude(self):
return self.get('long') + 1
class Item(Map):
def get_name(self):
return self.get('name') + " Doe"
def get_location(self):
return Location(**self.get('location'))
def get_numbers(self):
return [Number(**n) for n in self.get('numbers')]
# Tests
obj = Item({'foo': 'bar'}, **payload)
assert type(obj) == Item
assert obj._name == "John"
assert obj.name == "John Doe"
assert type(obj.location) == Location
assert obj.location._lat == 53.12312312
assert obj.location._long == 43.21345112
assert obj.location.latitude == 54.12312312
assert obj.location.longitude == 44.21345112
for n in obj.numbers:
assert type(n) == Number
if n.role == 'home':
assert n.number == "070-12345678"
if n.role == 'office':
assert n.number == "070-12345679"

Is there a way of subclassing from dict and collections.abc.MutableMapping together?

Let's for the sake of example assume I want to subclass dict and have all keys capitalized:
class capdict(dict):
def __init__(self,*args,**kwds):
super().__init__(*args,**kwds)
mod = [(k.capitalize(),v) for k,v in super().items()]
super().clear()
super().update(mod)
def __getitem__(self,key):
return super().__getitem__(key.capitalize())
def __setitem__(self,key,value):
super().__setitem__(key.capitalize(),value)
def __delitem__(self,key):
super().__detitem__(key.capitalize())
This works to an extent,
>>> ex = capdict(map(reversed,enumerate("abc")))
>>> ex
{'A': 0, 'B': 1, 'C': 2}
>>> ex['a']
0
but, of course, only for methods I remembered to implement, for example
>>> 'a' in ex
False
is not the desired behavior.
Now, the lazy way of filling in all the methods that can be derived from the "core" ones
would be mixing in collections.abc.MutableMapping. Only, it doesn't work here. I presume because the methods in question (__contains__ in the example) are already provided by dict.
Is there a way of having my cake and eating it? Some magic to let MutableMapping only see the methods I've overridden so that it reimplements the others based on those?
What you could do:
This likely won't work out well (i.e. not the cleanest design), but you could inherit from MutableMapping first and then from dict second.
Then MutableMapping would use whatever methods you've implemented (because they are the first in the lookup chain):
>>> class D(MutableMapping, dict):
def __getitem__(self, key):
print(f'Intercepted a lookup for {key!r}')
return dict.__getitem__(self, key)
>>> d = D(x=10, y=20)
>>> d.get('x', 0)
Intercepted a lookup for 'x'
10
>>> d.get('z', 0)
Intercepted a lookup for 'z'
0
Better way:
The cleanest approach (easy to understand and test) is to just inherit from MutableMapping and then implement the required methods using a regular dict as the base data store (with composition rather than inheritance):
>>> class CapitalizingDict(MutableMapping):
def __init__(self, *args, **kwds):
self.store = {}
self.update(*args, **kwds)
def __getitem__(self, key):
key = key.capitalize()
return self.store[key]
def __setitem__(self, key, value):
key = key.capitalize()
self.store[key] = value
def __delitem__(self, key):
del self.store[key]
def __len__(self):
return len(self.store)
def __iter__(self):
return iter(self.store)
def __repr__(self):
return repr(self.store)
>>> d = CapitalizingDict(x=10, y=20)
>>> d
{'X': 10, 'Y': 20}
>>> d['x']
10
>>> d.get('x', 0)
10
>>> d.get('z', 0)
0
>>> d['w'] = 30
>>> d['W']
30

Python OrderedDict with lambda [duplicate]

I would like to combine OrderedDict() and defaultdict() from collections in one object, which shall be an ordered, default dict.
Is this possible?
The following (using a modified version of this recipe) works for me:
from collections import OrderedDict, Callable
class DefaultOrderedDict(OrderedDict):
# Source: http://stackoverflow.com/a/6190500/562769
def __init__(self, default_factory=None, *a, **kw):
if (default_factory is not None and
not isinstance(default_factory, Callable)):
raise TypeError('first argument must be callable')
OrderedDict.__init__(self, *a, **kw)
self.default_factory = default_factory
def __getitem__(self, key):
try:
return OrderedDict.__getitem__(self, key)
except KeyError:
return self.__missing__(key)
def __missing__(self, key):
if self.default_factory is None:
raise KeyError(key)
self[key] = value = self.default_factory()
return value
def __reduce__(self):
if self.default_factory is None:
args = tuple()
else:
args = self.default_factory,
return type(self), args, None, None, self.items()
def copy(self):
return self.__copy__()
def __copy__(self):
return type(self)(self.default_factory, self)
def __deepcopy__(self, memo):
import copy
return type(self)(self.default_factory,
copy.deepcopy(self.items()))
def __repr__(self):
return 'OrderedDefaultDict(%s, %s)' % (self.default_factory,
OrderedDict.__repr__(self))
Here is another possibility, inspired by Raymond Hettinger's super() Considered Super, tested on Python 2.7.X and 3.4.X:
from collections import OrderedDict, defaultdict
class OrderedDefaultDict(OrderedDict, defaultdict):
def __init__(self, default_factory=None, *args, **kwargs):
#in python3 you can omit the args to super
super(OrderedDefaultDict, self).__init__(*args, **kwargs)
self.default_factory = default_factory
If you check out the class's MRO (aka, help(OrderedDefaultDict)), you'll see this:
class OrderedDefaultDict(collections.OrderedDict, collections.defaultdict)
| Method resolution order:
| OrderedDefaultDict
| collections.OrderedDict
| collections.defaultdict
| __builtin__.dict
| __builtin__.object
meaning that when an instance of OrderedDefaultDict is initialized, it defers to the OrderedDict's init, but this one in turn will call the defaultdict's methods before calling __builtin__.dict, which is precisely what we want.
If you want a simple solution that doesn't require a class, you can just use OrderedDict.setdefault(key, default=None) or OrderedDict.get(key, default=None). If you only get / set from a few places, say in a loop, you can easily just setdefault.
totals = collections.OrderedDict()
for i, x in some_generator():
totals[i] = totals.get(i, 0) + x
It is even easier for lists with setdefault:
agglomerate = collections.OrderedDict()
for i, x in some_generator():
agglomerate.setdefault(i, []).append(x)
But if you use it more than a few times, it is probably better to set up a class, like in the other answers.
Here's another solution to think about if your use case is simple like mine and you don't necessarily want to add the complexity of a DefaultOrderedDict class implementation to your code.
from collections import OrderedDict
keys = ['a', 'b', 'c']
items = [(key, None) for key in keys]
od = OrderedDict(items)
(None is my desired default value.)
Note that this solution won't work if one of your requirements is to dynamically insert new keys with the default value. A tradeoff of simplicity.
Update 3/13/17 - I learned of a convenience function for this use case. Same as above but you can omit the line items = ... and just:
od = OrderedDict.fromkeys(keys)
Output:
OrderedDict([('a', None), ('b', None), ('c', None)])
And if your keys are single characters, you can just pass one string:
OrderedDict.fromkeys('abc')
This has the same output as the two examples above.
You can also pass a default value as the second arg to OrderedDict.fromkeys(...).
Another simple approach would be to use dictionary get method
>>> from collections import OrderedDict
>>> d = OrderedDict()
>>> d['key'] = d.get('key', 0) + 1
>>> d['key'] = d.get('key', 0) + 1
>>> d
OrderedDict([('key', 2)])
>>>
A simpler version of #zeekay 's answer is:
from collections import OrderedDict
class OrderedDefaultListDict(OrderedDict): #name according to default
def __missing__(self, key):
self[key] = value = [] #change to whatever default you want
return value
A simple and elegant solution building on #NickBread.
Has a slightly different API to set the factory, but good defaults are always nice to have.
class OrderedDefaultDict(OrderedDict):
factory = list
def __missing__(self, key):
self[key] = value = self.factory()
return value
I created slightly fixed and more simplified version of the accepted answer, actual for python 3.7.
from collections import OrderedDict
from copy import copy, deepcopy
import pickle
from typing import Any, Callable
class DefaultOrderedDict(OrderedDict):
def __init__(
self,
default_factory: Callable[[], Any],
*args,
**kwargs,
):
super().__init__(*args, **kwargs)
self.default_factory = default_factory
def __getitem__(self, key):
try:
return super().__getitem__(key)
except KeyError:
return self.__missing__(key)
def __missing__(self, key):
self[key] = value = self.default_factory()
return value
def __reduce__(self):
return type(self), (self.default_factory, ), None, None, iter(self.items())
def copy(self):
return self.__copy__()
def __copy__(self):
return type(self)(self.default_factory, self)
def __deepcopy__(self, memo):
return type(self)(self.default_factory, deepcopy(tuple(self.items()), memo))
def __repr__(self):
return f'{self.__class__.__name__}({self.default_factory}, {OrderedDict(self).__repr__()})'
And, that may be even more important, provided some tests.
a = DefaultOrderedDict(list)
# testing default
assert a['key'] == []
a['key'].append(1)
assert a['key'] == [1, ]
# testing repr
assert repr(a) == "DefaultOrderedDict(<class 'list'>, OrderedDict([('key', [1])]))"
# testing copy
b = a.copy()
assert b['key'] is a['key']
c = copy(a)
assert c['key'] is a['key']
d = deepcopy(a)
assert d['key'] is not a['key']
assert d['key'] == a['key']
# testing pickle
saved = pickle.dumps(a)
restored = pickle.loads(saved)
assert restored is not a
assert restored == a
# testing order
a['second_key'] = [2, ]
a['key'] = [3, ]
assert list(a.items()) == [('key', [3, ]), ('second_key', [2, ])]
Inspired by other answers on this thread, you can use something like,
from collections import OrderedDict
class OrderedDefaultDict(OrderedDict):
def __missing__(self, key):
value = OrderedDefaultDict()
self[key] = value
return value
I would like to know if there're any downsides of initializing another object of the same class in the missing method.
i tested the default dict and discovered it's also sorted!
maybe it was just a coincidence but anyway you can use the sorted function:
sorted(s.items())
i think it's simpler

Indexable weak ordered set in Python

I was wondering if there is an easy way to build an indexable weak ordered set in Python. I tried to build one myself. Here's what I came up with:
"""
An indexable, ordered set of objects, which are held by weak reference.
"""
from nose.tools import *
import blist
import weakref
class WeakOrderedSet(blist.weaksortedset):
"""
A blist.weaksortedset whose key is the insertion order.
"""
def __init__(self, iterable=()):
self.insertion_order = weakref.WeakKeyDictionary() # value_type to int
self.last_key = 0
super().__init__(key=self.insertion_order.__getitem__)
for item in iterable:
self.add(item)
def __delitem__(self, index):
values = super().__getitem__(index)
super().__delitem__(index)
if not isinstance(index, slice):
# values is just one element
values = [values]
for value in values:
if value not in self:
del self.insertion_order[value]
def add(self, value):
# Choose a key so that value is on the end.
if value not in self.insertion_order:
key = self.last_key
self.last_key += 1
self.insertion_order[value] = key
super().add(value)
def discard(self, value):
super().discard(value)
if value not in self:
del self.insertion_order[value]
def remove(self, value):
super().remove(value)
if value not in self:
del self.insertion_order[value]
def pop(self, *args, **kwargs):
value = super().pop(*args, **kwargs)
if value not in self:
del self.insertion_order[value]
def clear(self):
super().clear()
self.insertion_order.clear()
def update(self, *args):
for arg in args:
for item in arg:
self.add(item)
if __name__ == '__main__':
class Dummy:
def __init__(self, value):
self.value = value
x = [Dummy(i) for i in range(10)]
w = WeakOrderedSet(reversed(x))
del w[2:8]
assert_equals([9,8,1,0], [i.value for i in w])
del w[0]
assert_equals([8,1,0], [i.value for i in w])
del x
assert_equals([], [i.value for i in w])
Is there an easier way to do this?
The easiest way to is to take advantage of existing components in the standard library.
OrderedDict and the MutableSet ABC make it easy to write an OrderedSet.
Likewise, you can reuse the existing weakref.WeakSet and replace its underlying set() with an OrderedSet.
Indexing is more difficult to achieve -- these easiest way it to convert it to a list when needed. That is necessary because sets and dicts are intrinsically sparse.
import collections.abc
import weakref
class OrderedSet(collections.abc.MutableSet):
def __init__(self, values=()):
self._od = collections.OrderedDict().fromkeys(values)
def __len__(self):
return len(self._od)
def __iter__(self):
return iter(self._od)
def __contains__(self, value):
return value in self._od
def add(self, value):
self._od[value] = None
def discard(self, value):
self._od.pop(value, None)
class OrderedWeakrefSet(weakref.WeakSet):
def __init__(self, values=()):
super(OrderedWeakrefSet, self).__init__()
self.data = OrderedSet()
for elem in values:
self.add(elem)
Use it like this:
>>> names = OrderedSet(['Alice', 'Bob', 'Carol', 'Bob', 'Dave', 'Edna'])
>>> len(names)
5
>>> 'Bob' in names
True
>>> s = list(names)
>>> s[2]
'Carol'
>>> s[4]
'Edna'
Note as of Python 3.7, regular dicts are guaranteed to be ordered, so you can substitute dict for OrderedDict in this recipe and it will all work fine :-)
Raymond has a great and succinct answer, as usual, but I actually came here a while back interested in the indexable part, more than the weakref part. I eventually built my own answer, which became the IndexedSet type in the boltons utility library. Basically, it's all the best parts of the list and set APIs, combined.
>>> x = IndexedSet(list(range(4)) + list(range(8)))
>>> x
IndexedSet([0, 1, 2, 3, 4, 5, 6, 7])
>>> x - set(range(2))
IndexedSet([2, 3, 4, 5, 6, 7])
>>> x[-1]
7
>>> fcr = IndexedSet('freecreditreport.com')
>>> ''.join(fcr[:fcr.index('.')])
'frecditpo'
If the weakref part is critical you can likely add it via inheritance or direct modification of a copy of the code (the module is standalone, pure-Python, and 2/3 compatible).

How to implement __iter__(self) for a container object (Python)

I have written a custom container object.
According to this page, I need to implement this method on my object:
__iter__(self)
However, upon following up the link to Iterator Types in the Python reference manual, there are no examples given of how to implement your own.
Can someone post a snippet (or link to a resource), that shows how to do this?
The container I am writing, is a map (i.e. stores values by unique keys).
dicts can be iterated like this:
for k, v in mydict.items()
In this case I need to be able to return two elements (a tuple?) in the iterator.
It is still not clear how to implement such an iterator (despite the several answers that have been kindly provided). Could someone please shed some more light on how to implement an iterator for a map-like container object? (i.e. a custom class that acts like a dict)?
I normally would use a generator function. Each time you use a yield statement, it will add an item to the sequence.
The following will create an iterator that yields five, and then every item in some_list.
def __iter__(self):
yield 5
yield from some_list
Pre-3.3, yield from didn't exist, so you would have to do:
def __iter__(self):
yield 5
for x in some_list:
yield x
Another option is to inherit from the appropriate abstract base class from the `collections module as documented here.
In case the container is its own iterator, you can inherit from
collections.Iterator. You only need to implement the next method then.
An example is:
>>> from collections import Iterator
>>> class MyContainer(Iterator):
... def __init__(self, *data):
... self.data = list(data)
... def next(self):
... if not self.data:
... raise StopIteration
... return self.data.pop()
...
...
...
>>> c = MyContainer(1, "two", 3, 4.0)
>>> for i in c:
... print i
...
...
4.0
3
two
1
While you are looking at the collections module, consider inheriting from Sequence, Mapping or another abstract base class if that is more appropriate. Here is an example for a Sequence subclass:
>>> from collections import Sequence
>>> class MyContainer(Sequence):
... def __init__(self, *data):
... self.data = list(data)
... def __getitem__(self, index):
... return self.data[index]
... def __len__(self):
... return len(self.data)
...
...
...
>>> c = MyContainer(1, "two", 3, 4.0)
>>> for i in c:
... print i
...
...
1
two
3
4.0
NB: Thanks to Glenn Maynard for drawing my attention to the need to clarify the difference between iterators on the one hand and containers that are iterables rather than iterators on the other.
usually __iter__() just return self if you have already define the next() method (generator object):
here is a Dummy example of a generator :
class Test(object):
def __init__(self, data):
self.data = data
def next(self):
if not self.data:
raise StopIteration
return self.data.pop()
def __iter__(self):
return self
but __iter__() can also be used like this:
http://mail.python.org/pipermail/tutor/2006-January/044455.html
The "iterable interface" in python consists of two methods __next__() and __iter__(). The __next__ function is the most important, as it defines the iterator behavior - that is, the function determines what value should be returned next. The __iter__() method is used to reset the starting point of the iteration. Often, you will find that __iter__() can just return self when __init__() is used to set the starting point.
See the following code for defining a Class Reverse which implements the "iterable interface" and defines an iterator over any instance from any sequence class. The __next__() method starts at the end of the sequence and returns values in reverse order of the sequence. Note that instances from a class implementing the "sequence interface" must define a __len__() and a __getitem__() method.
class Reverse:
"""Iterator for looping over a sequence backwards."""
def __init__(self, seq):
self.data = seq
self.index = len(seq)
def __iter__(self):
return self
def __next__(self):
if self.index == 0:
raise StopIteration
self.index = self.index - 1
return self.data[self.index]
>>> rev = Reverse('spam')
>>> next(rev) # note no need to call iter()
'm'
>>> nums = Reverse(range(1,10))
>>> next(nums)
9
If your object contains a set of data you want to bind your object's iter to, you can cheat and do this:
>>> class foo:
def __init__(self, *params):
self.data = params
def __iter__(self):
if hasattr(self.data[0], "__iter__"):
return self.data[0].__iter__()
return self.data.__iter__()
>>> d=foo(6,7,3,8, "ads", 6)
>>> for i in d:
print i
6
7
3
8
ads
6
To answer the question about mappings: your provided __iter__ should iterate over the keys of the mapping. The following is a simple example that creates a mapping x -> x * x and works on Python3 extending the ABC mapping.
import collections.abc
class MyMap(collections.abc.Mapping):
def __init__(self, n):
self.n = n
def __getitem__(self, key): # given a key, return it's value
if 0 <= key < self.n:
return key * key
else:
raise KeyError('Invalid key')
def __iter__(self): # iterate over all keys
for x in range(self.n):
yield x
def __len__(self):
return self.n
m = MyMap(5)
for k, v in m.items():
print(k, '->', v)
# 0 -> 0
# 1 -> 1
# 2 -> 4
# 3 -> 9
# 4 -> 16
In case you don't want to inherit from dict as others have suggested, here is direct answer to the question on how to implement __iter__ for a crude example of a custom dict:
class Attribute:
def __init__(self, key, value):
self.key = key
self.value = value
class Node(collections.Mapping):
def __init__(self):
self.type = ""
self.attrs = [] # List of Attributes
def __iter__(self):
for attr in self.attrs:
yield attr.key
That uses a generator, which is well described here.
Since we're inheriting from Mapping, you need to also implement __getitem__ and __len__:
def __getitem__(self, key):
for attr in self.attrs:
if key == attr.key:
return attr.value
raise KeyError
def __len__(self):
return len(self.attrs)
One option that might work for some cases is to make your custom class inherit from dict. This seems like a logical choice if it acts like a dict; maybe it should be a dict. This way, you get dict-like iteration for free.
class MyDict(dict):
def __init__(self, custom_attribute):
self.bar = custom_attribute
mydict = MyDict('Some name')
mydict['a'] = 1
mydict['b'] = 2
print mydict.bar
for k, v in mydict.items():
print k, '=>', v
Output:
Some name
a => 1
b => 2
example for inhert from dict, modify its iter, for example, skip key 2 when in for loop
# method 1
class Dict(dict):
def __iter__(self):
keys = self.keys()
for i in keys:
if i == 2:
continue
yield i
# method 2
class Dict(dict):
def __iter__(self):
for i in super(Dict, self).__iter__():
if i == 2:
continue
yield i

Categories