Indexable weak ordered set in Python - python

I was wondering if there is an easy way to build an indexable weak ordered set in Python. I tried to build one myself. Here's what I came up with:
"""
An indexable, ordered set of objects, which are held by weak reference.
"""
from nose.tools import *
import blist
import weakref
class WeakOrderedSet(blist.weaksortedset):
"""
A blist.weaksortedset whose key is the insertion order.
"""
def __init__(self, iterable=()):
self.insertion_order = weakref.WeakKeyDictionary() # value_type to int
self.last_key = 0
super().__init__(key=self.insertion_order.__getitem__)
for item in iterable:
self.add(item)
def __delitem__(self, index):
values = super().__getitem__(index)
super().__delitem__(index)
if not isinstance(index, slice):
# values is just one element
values = [values]
for value in values:
if value not in self:
del self.insertion_order[value]
def add(self, value):
# Choose a key so that value is on the end.
if value not in self.insertion_order:
key = self.last_key
self.last_key += 1
self.insertion_order[value] = key
super().add(value)
def discard(self, value):
super().discard(value)
if value not in self:
del self.insertion_order[value]
def remove(self, value):
super().remove(value)
if value not in self:
del self.insertion_order[value]
def pop(self, *args, **kwargs):
value = super().pop(*args, **kwargs)
if value not in self:
del self.insertion_order[value]
def clear(self):
super().clear()
self.insertion_order.clear()
def update(self, *args):
for arg in args:
for item in arg:
self.add(item)
if __name__ == '__main__':
class Dummy:
def __init__(self, value):
self.value = value
x = [Dummy(i) for i in range(10)]
w = WeakOrderedSet(reversed(x))
del w[2:8]
assert_equals([9,8,1,0], [i.value for i in w])
del w[0]
assert_equals([8,1,0], [i.value for i in w])
del x
assert_equals([], [i.value for i in w])
Is there an easier way to do this?

The easiest way to is to take advantage of existing components in the standard library.
OrderedDict and the MutableSet ABC make it easy to write an OrderedSet.
Likewise, you can reuse the existing weakref.WeakSet and replace its underlying set() with an OrderedSet.
Indexing is more difficult to achieve -- these easiest way it to convert it to a list when needed. That is necessary because sets and dicts are intrinsically sparse.
import collections.abc
import weakref
class OrderedSet(collections.abc.MutableSet):
def __init__(self, values=()):
self._od = collections.OrderedDict().fromkeys(values)
def __len__(self):
return len(self._od)
def __iter__(self):
return iter(self._od)
def __contains__(self, value):
return value in self._od
def add(self, value):
self._od[value] = None
def discard(self, value):
self._od.pop(value, None)
class OrderedWeakrefSet(weakref.WeakSet):
def __init__(self, values=()):
super(OrderedWeakrefSet, self).__init__()
self.data = OrderedSet()
for elem in values:
self.add(elem)
Use it like this:
>>> names = OrderedSet(['Alice', 'Bob', 'Carol', 'Bob', 'Dave', 'Edna'])
>>> len(names)
5
>>> 'Bob' in names
True
>>> s = list(names)
>>> s[2]
'Carol'
>>> s[4]
'Edna'
Note as of Python 3.7, regular dicts are guaranteed to be ordered, so you can substitute dict for OrderedDict in this recipe and it will all work fine :-)

Raymond has a great and succinct answer, as usual, but I actually came here a while back interested in the indexable part, more than the weakref part. I eventually built my own answer, which became the IndexedSet type in the boltons utility library. Basically, it's all the best parts of the list and set APIs, combined.
>>> x = IndexedSet(list(range(4)) + list(range(8)))
>>> x
IndexedSet([0, 1, 2, 3, 4, 5, 6, 7])
>>> x - set(range(2))
IndexedSet([2, 3, 4, 5, 6, 7])
>>> x[-1]
7
>>> fcr = IndexedSet('freecreditreport.com')
>>> ''.join(fcr[:fcr.index('.')])
'frecditpo'
If the weakref part is critical you can likely add it via inheritance or direct modification of a copy of the code (the module is standalone, pure-Python, and 2/3 compatible).

Related

How to filter both the keys and values of a dict in python?

I want to achieve a customized dict that return the items according to an attribute mode as follows:
class MyDict(dict):
def __init__(self):
super(MyDict, self).__init__()
self.mode = True
mydict = MyDict()
# fill data {0:0, 1:1, 2:2, 3:3, ..., 9:9}
for i in range(10):
mydict[i] = i
mydict.mode acts as a filter. For example, when mydict.mode is True, mydict works like it only has the first half items, and when mydict.mode is False, it only contains the remaining data.
mydict.mode=True
print(mydict) # {0:0, 1:1, 2:2, 3:3, 4:4}
print(mydict.keys()) # [0, 1, 2, 3, 4]
mydict.mode=False
print(mydict) # {5:5, 6:6, 7:7, 8:8, 9:9}
print(mydict.keys()) # [5, 6, 7, 8, 9]
The above code is only a simple example, the support for more complex filters is also expected.
At first, I want to filter items within the functions of dict. The problem is that the class dict in python is implemented by C language rather than python, I don't know which and where is the most original iter function. So I have to rewrite all functions for my customized dict, like keys(), items(), values(), __contains__, __getitem__ ...
Is there any better idea to achieve my customized dict?
There's not really a lot that you need to implement - just 3 higher level function (keys, values, items) and 3 dunder functions (setitem, getitem, contains). Add a few helper functions to keep the code concise then you end up with:
class MyDict:
def __init__(self, mode=True):
self._dict = dict()
self._mode = mode
#property
def mode(self):
return self._mode
#mode.setter
def mode(self, mode):
self._mode = mode
def __split(self, list_):
mid = len(self._dict) // 2
return list_[:mid] if self._mode else list_[mid:]
def __sublistK(self):
return self.__split(list(self._dict.keys()))
def __sublistV(self):
return self.__split(list(self._dict.values()))
def __sublistI(self):
return self.__split(list(self._dict.items()))
def keys(self):
for k in self.__sublistK():
yield k
def values(self):
for v in self.__sublistV():
yield v
def items(self):
for k, v in self.__sublistI():
yield k, v
def __setitem__(self, k, v):
self._dict[k] = v
def __getitem__(self, k):
if k in self:
return self._dict[k]
def __contains__(self, k):
return k in self.__sublistK()
Now you can use this as a regular dictionary except that the output will be influenced by the mode value
I hope this helps you,
mydict = MyDict()
for i in range(11):
mydict[i] = i
mid_index = int(len(mydict.keys()) / 2)
if mydict.mode == True:
new_keys = list(mydict.keys())[:mid_index]
new_values = list(mydict.values())[:mid_index]
new_dict = dict(zip(new_keys, new_values))
print(new_dict)
else:
new_keys = list(mydict.keys())[mid_index:]
new_values = list(mydict.values())[mid_index:]
new_dict = dict(zip(new_keys, new_values))
print(new_dict)
This code first identifies the dictionary's middle key's index and then returns the values according to that.
Finally I rewrite all buildin functions of dict, including
__getitem__
__setattr__
values
items
keys
get
__contains__.
Taking keys as an example:
class MyDict(dict):
def __init__(self, filter):
self.filter = filter
def keys(self):
return self.filter(super().keys())
where filter is a customized filter function.
Also, I notice that MyDict().get("key", default=None) doesn't call the function __getitem__, and I still need to rewrite it.

Is there a way of subclassing from dict and collections.abc.MutableMapping together?

Let's for the sake of example assume I want to subclass dict and have all keys capitalized:
class capdict(dict):
def __init__(self,*args,**kwds):
super().__init__(*args,**kwds)
mod = [(k.capitalize(),v) for k,v in super().items()]
super().clear()
super().update(mod)
def __getitem__(self,key):
return super().__getitem__(key.capitalize())
def __setitem__(self,key,value):
super().__setitem__(key.capitalize(),value)
def __delitem__(self,key):
super().__detitem__(key.capitalize())
This works to an extent,
>>> ex = capdict(map(reversed,enumerate("abc")))
>>> ex
{'A': 0, 'B': 1, 'C': 2}
>>> ex['a']
0
but, of course, only for methods I remembered to implement, for example
>>> 'a' in ex
False
is not the desired behavior.
Now, the lazy way of filling in all the methods that can be derived from the "core" ones
would be mixing in collections.abc.MutableMapping. Only, it doesn't work here. I presume because the methods in question (__contains__ in the example) are already provided by dict.
Is there a way of having my cake and eating it? Some magic to let MutableMapping only see the methods I've overridden so that it reimplements the others based on those?
What you could do:
This likely won't work out well (i.e. not the cleanest design), but you could inherit from MutableMapping first and then from dict second.
Then MutableMapping would use whatever methods you've implemented (because they are the first in the lookup chain):
>>> class D(MutableMapping, dict):
def __getitem__(self, key):
print(f'Intercepted a lookup for {key!r}')
return dict.__getitem__(self, key)
>>> d = D(x=10, y=20)
>>> d.get('x', 0)
Intercepted a lookup for 'x'
10
>>> d.get('z', 0)
Intercepted a lookup for 'z'
0
Better way:
The cleanest approach (easy to understand and test) is to just inherit from MutableMapping and then implement the required methods using a regular dict as the base data store (with composition rather than inheritance):
>>> class CapitalizingDict(MutableMapping):
def __init__(self, *args, **kwds):
self.store = {}
self.update(*args, **kwds)
def __getitem__(self, key):
key = key.capitalize()
return self.store[key]
def __setitem__(self, key, value):
key = key.capitalize()
self.store[key] = value
def __delitem__(self, key):
del self.store[key]
def __len__(self):
return len(self.store)
def __iter__(self):
return iter(self.store)
def __repr__(self):
return repr(self.store)
>>> d = CapitalizingDict(x=10, y=20)
>>> d
{'X': 10, 'Y': 20}
>>> d['x']
10
>>> d.get('x', 0)
10
>>> d.get('z', 0)
0
>>> d['w'] = 30
>>> d['W']
30

Python OrderedDict with lambda [duplicate]

I would like to combine OrderedDict() and defaultdict() from collections in one object, which shall be an ordered, default dict.
Is this possible?
The following (using a modified version of this recipe) works for me:
from collections import OrderedDict, Callable
class DefaultOrderedDict(OrderedDict):
# Source: http://stackoverflow.com/a/6190500/562769
def __init__(self, default_factory=None, *a, **kw):
if (default_factory is not None and
not isinstance(default_factory, Callable)):
raise TypeError('first argument must be callable')
OrderedDict.__init__(self, *a, **kw)
self.default_factory = default_factory
def __getitem__(self, key):
try:
return OrderedDict.__getitem__(self, key)
except KeyError:
return self.__missing__(key)
def __missing__(self, key):
if self.default_factory is None:
raise KeyError(key)
self[key] = value = self.default_factory()
return value
def __reduce__(self):
if self.default_factory is None:
args = tuple()
else:
args = self.default_factory,
return type(self), args, None, None, self.items()
def copy(self):
return self.__copy__()
def __copy__(self):
return type(self)(self.default_factory, self)
def __deepcopy__(self, memo):
import copy
return type(self)(self.default_factory,
copy.deepcopy(self.items()))
def __repr__(self):
return 'OrderedDefaultDict(%s, %s)' % (self.default_factory,
OrderedDict.__repr__(self))
Here is another possibility, inspired by Raymond Hettinger's super() Considered Super, tested on Python 2.7.X and 3.4.X:
from collections import OrderedDict, defaultdict
class OrderedDefaultDict(OrderedDict, defaultdict):
def __init__(self, default_factory=None, *args, **kwargs):
#in python3 you can omit the args to super
super(OrderedDefaultDict, self).__init__(*args, **kwargs)
self.default_factory = default_factory
If you check out the class's MRO (aka, help(OrderedDefaultDict)), you'll see this:
class OrderedDefaultDict(collections.OrderedDict, collections.defaultdict)
| Method resolution order:
| OrderedDefaultDict
| collections.OrderedDict
| collections.defaultdict
| __builtin__.dict
| __builtin__.object
meaning that when an instance of OrderedDefaultDict is initialized, it defers to the OrderedDict's init, but this one in turn will call the defaultdict's methods before calling __builtin__.dict, which is precisely what we want.
If you want a simple solution that doesn't require a class, you can just use OrderedDict.setdefault(key, default=None) or OrderedDict.get(key, default=None). If you only get / set from a few places, say in a loop, you can easily just setdefault.
totals = collections.OrderedDict()
for i, x in some_generator():
totals[i] = totals.get(i, 0) + x
It is even easier for lists with setdefault:
agglomerate = collections.OrderedDict()
for i, x in some_generator():
agglomerate.setdefault(i, []).append(x)
But if you use it more than a few times, it is probably better to set up a class, like in the other answers.
Here's another solution to think about if your use case is simple like mine and you don't necessarily want to add the complexity of a DefaultOrderedDict class implementation to your code.
from collections import OrderedDict
keys = ['a', 'b', 'c']
items = [(key, None) for key in keys]
od = OrderedDict(items)
(None is my desired default value.)
Note that this solution won't work if one of your requirements is to dynamically insert new keys with the default value. A tradeoff of simplicity.
Update 3/13/17 - I learned of a convenience function for this use case. Same as above but you can omit the line items = ... and just:
od = OrderedDict.fromkeys(keys)
Output:
OrderedDict([('a', None), ('b', None), ('c', None)])
And if your keys are single characters, you can just pass one string:
OrderedDict.fromkeys('abc')
This has the same output as the two examples above.
You can also pass a default value as the second arg to OrderedDict.fromkeys(...).
Another simple approach would be to use dictionary get method
>>> from collections import OrderedDict
>>> d = OrderedDict()
>>> d['key'] = d.get('key', 0) + 1
>>> d['key'] = d.get('key', 0) + 1
>>> d
OrderedDict([('key', 2)])
>>>
A simpler version of #zeekay 's answer is:
from collections import OrderedDict
class OrderedDefaultListDict(OrderedDict): #name according to default
def __missing__(self, key):
self[key] = value = [] #change to whatever default you want
return value
A simple and elegant solution building on #NickBread.
Has a slightly different API to set the factory, but good defaults are always nice to have.
class OrderedDefaultDict(OrderedDict):
factory = list
def __missing__(self, key):
self[key] = value = self.factory()
return value
I created slightly fixed and more simplified version of the accepted answer, actual for python 3.7.
from collections import OrderedDict
from copy import copy, deepcopy
import pickle
from typing import Any, Callable
class DefaultOrderedDict(OrderedDict):
def __init__(
self,
default_factory: Callable[[], Any],
*args,
**kwargs,
):
super().__init__(*args, **kwargs)
self.default_factory = default_factory
def __getitem__(self, key):
try:
return super().__getitem__(key)
except KeyError:
return self.__missing__(key)
def __missing__(self, key):
self[key] = value = self.default_factory()
return value
def __reduce__(self):
return type(self), (self.default_factory, ), None, None, iter(self.items())
def copy(self):
return self.__copy__()
def __copy__(self):
return type(self)(self.default_factory, self)
def __deepcopy__(self, memo):
return type(self)(self.default_factory, deepcopy(tuple(self.items()), memo))
def __repr__(self):
return f'{self.__class__.__name__}({self.default_factory}, {OrderedDict(self).__repr__()})'
And, that may be even more important, provided some tests.
a = DefaultOrderedDict(list)
# testing default
assert a['key'] == []
a['key'].append(1)
assert a['key'] == [1, ]
# testing repr
assert repr(a) == "DefaultOrderedDict(<class 'list'>, OrderedDict([('key', [1])]))"
# testing copy
b = a.copy()
assert b['key'] is a['key']
c = copy(a)
assert c['key'] is a['key']
d = deepcopy(a)
assert d['key'] is not a['key']
assert d['key'] == a['key']
# testing pickle
saved = pickle.dumps(a)
restored = pickle.loads(saved)
assert restored is not a
assert restored == a
# testing order
a['second_key'] = [2, ]
a['key'] = [3, ]
assert list(a.items()) == [('key', [3, ]), ('second_key', [2, ])]
Inspired by other answers on this thread, you can use something like,
from collections import OrderedDict
class OrderedDefaultDict(OrderedDict):
def __missing__(self, key):
value = OrderedDefaultDict()
self[key] = value
return value
I would like to know if there're any downsides of initializing another object of the same class in the missing method.
i tested the default dict and discovered it's also sorted!
maybe it was just a coincidence but anyway you can use the sorted function:
sorted(s.items())
i think it's simpler

Unmutable dictionary in python [duplicate]

A frozen set is a frozenset.
A frozen list could be a tuple.
What would a frozen dict be? An immutable, hashable dict.
I guess it could be something like collections.namedtuple, but that is more like a frozen-keys dict (a half-frozen dict). Isn't it?
A "frozendict" should be a frozen dictionary, it should have keys, values, get, etc., and support in, for, etc.
update :
* there it is : https://www.python.org/dev/peps/pep-0603
Python doesn't have a builtin frozendict type. It turns out this wouldn't be useful too often (though it would still probably be useful more often than frozenset is).
The most common reason to want such a type is when memoizing function calls for functions with unknown arguments. The most common solution to store a hashable equivalent of a dict (where the values are hashable) is something like tuple(sorted(kwargs.items())).
This depends on the sorting not being a bit insane. Python cannot positively promise sorting will result in something reasonable here. (But it can't promise much else, so don't sweat it too much.)
You could easily enough make some sort of wrapper that works much like a dict. It might look something like
import collections
class FrozenDict(collections.Mapping):
"""Don't forget the docstrings!!"""
def __init__(self, *args, **kwargs):
self._d = dict(*args, **kwargs)
self._hash = None
def __iter__(self):
return iter(self._d)
def __len__(self):
return len(self._d)
def __getitem__(self, key):
return self._d[key]
def __hash__(self):
# It would have been simpler and maybe more obvious to
# use hash(tuple(sorted(self._d.iteritems()))) from this discussion
# so far, but this solution is O(n). I don't know what kind of
# n we are going to run into, but sometimes it's hard to resist the
# urge to optimize when it will gain improved algorithmic performance.
if self._hash is None:
hash_ = 0
for pair in self.items():
hash_ ^= hash(pair)
self._hash = hash_
return self._hash
It should work great:
>>> x = FrozenDict(a=1, b=2)
>>> y = FrozenDict(a=1, b=2)
>>> x is y
False
>>> x == y
True
>>> x == {'a': 1, 'b': 2}
True
>>> d = {x: 'foo'}
>>> d[y]
'foo'
Curiously, although we have the seldom useful frozenset, there's still no frozen mapping. The idea was rejected in PEP 416 -- Add a frozendict builtin type. This idea may be revisited in a later Python release, see PEP 603 -- Adding a frozenmap type to collections.
So the Python 2 solution to this:
def foo(config={'a': 1}):
...
Still seems to be the usual:
def foo(config=None):
if config is None:
config = {'a': 1} # default config
...
In Python 3 you have the option of this:
from types import MappingProxyType
default_config = {'a': 1}
DEFAULTS = MappingProxyType(default_config)
def foo(config=DEFAULTS):
...
Now the default config can be updated dynamically, but remain immutable where you want it to be immutable by passing around the proxy instead.
So changes in the default_config will update DEFAULTS as expected, but you can't write to the mapping proxy object itself.
Admittedly it's not really the same thing as an "immutable, hashable dict", but it might be a decent substitute for some use cases of a frozendict.
Assuming the keys and values of the dictionary are themselves immutable (e.g. strings) then:
>>> d
{'forever': 'atones', 'minks': 'cards', 'overhands': 'warranted',
'hardhearted': 'tartly', 'gradations': 'snorkeled'}
>>> t = tuple((k, d[k]) for k in sorted(d.keys()))
>>> hash(t)
1524953596
There is no fronzedict, but you can use MappingProxyType that was added to the standard library with Python 3.3:
>>> from types import MappingProxyType
>>> foo = MappingProxyType({'a': 1})
>>> foo
mappingproxy({'a': 1})
>>> foo['a'] = 2
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
TypeError: 'mappingproxy' object does not support item assignment
>>> foo
mappingproxy({'a': 1})
I think of frozendict everytime I write a function like this:
def do_something(blah, optional_dict_parm=None):
if optional_dict_parm is None:
optional_dict_parm = {}
Install frozendict
pip install frozendict
Use it!
from frozendict import frozendict
def smth(param = frozendict({})):
pass
Here is the code I've been using. I subclassed frozenset. The advantages of this are the following.
This is a truly immutable object. No relying on the good behavior of future users and developers.
It's easy to convert back and forth between a regular dictionary and a frozen dictionary. FrozenDict(orig_dict) --> frozen dictionary. dict(frozen_dict) --> regular dict.
Update Jan 21 2015: The original piece of code I posted in 2014 used a for-loop to find a key that matched. That was incredibly slow. Now I've put together an implementation which takes advantage of frozenset's hashing features. Key-value pairs are stored in special containers where the __hash__ and __eq__ functions are based on the key only. This code has also been formally unit-tested, unlike what I posted here in August 2014.
MIT-style license.
if 3 / 2 == 1:
version = 2
elif 3 / 2 == 1.5:
version = 3
def col(i):
''' For binding named attributes to spots inside subclasses of tuple.'''
g = tuple.__getitem__
#property
def _col(self):
return g(self,i)
return _col
class Item(tuple):
''' Designed for storing key-value pairs inside
a FrozenDict, which itself is a subclass of frozenset.
The __hash__ is overloaded to return the hash of only the key.
__eq__ is overloaded so that normally it only checks whether the Item's
key is equal to the other object, HOWEVER, if the other object itself
is an instance of Item, it checks BOTH the key and value for equality.
WARNING: Do not use this class for any purpose other than to contain
key value pairs inside FrozenDict!!!!
The __eq__ operator is overloaded in such a way that it violates a
fundamental property of mathematics. That property, which says that
a == b and b == c implies a == c, does not hold for this object.
Here's a demonstration:
[in] >>> x = Item(('a',4))
[in] >>> y = Item(('a',5))
[in] >>> hash('a')
[out] >>> 194817700
[in] >>> hash(x)
[out] >>> 194817700
[in] >>> hash(y)
[out] >>> 194817700
[in] >>> 'a' == x
[out] >>> True
[in] >>> 'a' == y
[out] >>> True
[in] >>> x == y
[out] >>> False
'''
__slots__ = ()
key, value = col(0), col(1)
def __hash__(self):
return hash(self.key)
def __eq__(self, other):
if isinstance(other, Item):
return tuple.__eq__(self, other)
return self.key == other
def __ne__(self, other):
return not self.__eq__(other)
def __str__(self):
return '%r: %r' % self
def __repr__(self):
return 'Item((%r, %r))' % self
class FrozenDict(frozenset):
''' Behaves in most ways like a regular dictionary, except that it's immutable.
It differs from other implementations because it doesn't subclass "dict".
Instead it subclasses "frozenset" which guarantees immutability.
FrozenDict instances are created with the same arguments used to initialize
regular dictionaries, and has all the same methods.
[in] >>> f = FrozenDict(x=3,y=4,z=5)
[in] >>> f['x']
[out] >>> 3
[in] >>> f['a'] = 0
[out] >>> TypeError: 'FrozenDict' object does not support item assignment
FrozenDict can accept un-hashable values, but FrozenDict is only hashable if its values are hashable.
[in] >>> f = FrozenDict(x=3,y=4,z=5)
[in] >>> hash(f)
[out] >>> 646626455
[in] >>> g = FrozenDict(x=3,y=4,z=[])
[in] >>> hash(g)
[out] >>> TypeError: unhashable type: 'list'
FrozenDict interacts with dictionary objects as though it were a dict itself.
[in] >>> original = dict(x=3,y=4,z=5)
[in] >>> frozen = FrozenDict(x=3,y=4,z=5)
[in] >>> original == frozen
[out] >>> True
FrozenDict supports bi-directional conversions with regular dictionaries.
[in] >>> original = {'x': 3, 'y': 4, 'z': 5}
[in] >>> FrozenDict(original)
[out] >>> FrozenDict({'x': 3, 'y': 4, 'z': 5})
[in] >>> dict(FrozenDict(original))
[out] >>> {'x': 3, 'y': 4, 'z': 5} '''
__slots__ = ()
def __new__(cls, orig={}, **kw):
if kw:
d = dict(orig, **kw)
items = map(Item, d.items())
else:
try:
items = map(Item, orig.items())
except AttributeError:
items = map(Item, orig)
return frozenset.__new__(cls, items)
def __repr__(self):
cls = self.__class__.__name__
items = frozenset.__iter__(self)
_repr = ', '.join(map(str,items))
return '%s({%s})' % (cls, _repr)
def __getitem__(self, key):
if key not in self:
raise KeyError(key)
diff = self.difference
item = diff(diff({key}))
key, value = set(item).pop()
return value
def get(self, key, default=None):
if key not in self:
return default
return self[key]
def __iter__(self):
items = frozenset.__iter__(self)
return map(lambda i: i.key, items)
def keys(self):
items = frozenset.__iter__(self)
return map(lambda i: i.key, items)
def values(self):
items = frozenset.__iter__(self)
return map(lambda i: i.value, items)
def items(self):
items = frozenset.__iter__(self)
return map(tuple, items)
def copy(self):
cls = self.__class__
items = frozenset.copy(self)
dupl = frozenset.__new__(cls, items)
return dupl
#classmethod
def fromkeys(cls, keys, value):
d = dict.fromkeys(keys,value)
return cls(d)
def __hash__(self):
kv = tuple.__hash__
items = frozenset.__iter__(self)
return hash(frozenset(map(kv, items)))
def __eq__(self, other):
if not isinstance(other, FrozenDict):
try:
other = FrozenDict(other)
except Exception:
return False
return frozenset.__eq__(self, other)
def __ne__(self, other):
return not self.__eq__(other)
if version == 2:
#Here are the Python2 modifications
class Python2(FrozenDict):
def __iter__(self):
items = frozenset.__iter__(self)
for i in items:
yield i.key
def iterkeys(self):
items = frozenset.__iter__(self)
for i in items:
yield i.key
def itervalues(self):
items = frozenset.__iter__(self)
for i in items:
yield i.value
def iteritems(self):
items = frozenset.__iter__(self)
for i in items:
yield (i.key, i.value)
def has_key(self, key):
return key in self
def viewkeys(self):
return dict(self).viewkeys()
def viewvalues(self):
return dict(self).viewvalues()
def viewitems(self):
return dict(self).viewitems()
#If this is Python2, rebuild the class
#from scratch rather than use a subclass
py3 = FrozenDict.__dict__
py3 = {k: py3[k] for k in py3}
py2 = {}
py2.update(py3)
dct = Python2.__dict__
py2.update({k: dct[k] for k in dct})
FrozenDict = type('FrozenDict', (frozenset,), py2)
You may use frozendict from utilspie package as:
>>> from utilspie.collectionsutils import frozendict
>>> my_dict = frozendict({1: 3, 4: 5})
>>> my_dict # object of `frozendict` type
frozendict({1: 3, 4: 5})
# Hashable
>>> {my_dict: 4}
{frozendict({1: 3, 4: 5}): 4}
# Immutable
>>> my_dict[1] = 5
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/Users/mquadri/workspace/utilspie/utilspie/collectionsutils/collections_utils.py", line 44, in __setitem__
self.__setitem__.__name__, type(self).__name__))
AttributeError: You can not call '__setitem__()' for 'frozendict' object
As per the document:
frozendict(dict_obj): Accepts obj of dict type and returns a hashable and immutable dict
Subclassing dict
i see this pattern in the wild (github) and wanted to mention it:
class FrozenDict(dict):
def __init__(self, *args, **kwargs):
self._hash = None
super(FrozenDict, self).__init__(*args, **kwargs)
def __hash__(self):
if self._hash is None:
self._hash = hash(tuple(sorted(self.items()))) # iteritems() on py2
return self._hash
def _immutable(self, *args, **kws):
raise TypeError('cannot change object - object is immutable')
# makes (deep)copy alot more efficient
def __copy__(self):
return self
def __deepcopy__(self, memo=None):
if memo is not None:
memo[id(self)] = self
return self
__setitem__ = _immutable
__delitem__ = _immutable
pop = _immutable
popitem = _immutable
clear = _immutable
update = _immutable
setdefault = _immutable
example usage:
d1 = FrozenDict({'a': 1, 'b': 2})
d2 = FrozenDict({'a': 1, 'b': 2})
d1.keys()
assert isinstance(d1, dict)
assert len(set([d1, d2])) == 1 # hashable
Pros
support for get(), keys(), items() (iteritems() on py2) and all the goodies from dict out of the box without explicitly implementing them
uses internally dict which means performance (dict is written in c in CPython)
elegant simple and no black magic
isinstance(my_frozen_dict, dict) returns True - although python encourages duck-typing many packages uses isinstance(), this can save many tweaks and customizations
Cons
any subclass can override this or access it internally (you cant really 100% protect something in python, you should trust your users and provide good documentation).
if you care for speed, you might want to make __hash__ a bit faster.
Yes, this is my second answer, but it is a completely different approach. The first implementation was in pure python. This one is in Cython. If you know how to use and compile Cython modules, this is just as fast as a regular dictionary. Roughly .04 to .06 micro-sec to retrieve a single value.
This is the file "frozen_dict.pyx"
import cython
from collections import Mapping
cdef class dict_wrapper:
cdef object d
cdef int h
def __init__(self, *args, **kw):
self.d = dict(*args, **kw)
self.h = -1
def __len__(self):
return len(self.d)
def __iter__(self):
return iter(self.d)
def __getitem__(self, key):
return self.d[key]
def __hash__(self):
if self.h == -1:
self.h = hash(frozenset(self.d.iteritems()))
return self.h
class FrozenDict(dict_wrapper, Mapping):
def __repr__(self):
c = type(self).__name__
r = ', '.join('%r: %r' % (k,self[k]) for k in self)
return '%s({%s})' % (c, r)
__all__ = ['FrozenDict']
Here's the file "setup.py"
from distutils.core import setup
from Cython.Build import cythonize
setup(
ext_modules = cythonize('frozen_dict.pyx')
)
If you have Cython installed, save the two files above into the same directory. Move to that directory in the command line.
python setup.py build_ext --inplace
python setup.py install
And you should be done.
The main disadvantage of namedtuple is that it needs to be specified before it is used, so it's less convenient for single-use cases.
However, there is a practical workaround that can be used to handle many such cases. Let's say that you want to have an immutable equivalent of the following dict:
MY_CONSTANT = {
'something': 123,
'something_else': 456
}
This can be emulated like this:
from collections import namedtuple
MY_CONSTANT = namedtuple('MyConstant', 'something something_else')(123, 456)
It's even possible to write an auxiliary function to automate this:
def freeze_dict(data):
from collections import namedtuple
keys = sorted(data.keys())
frozen_type = namedtuple(''.join(keys), keys)
return frozen_type(**data)
a = {'foo':'bar', 'x':'y'}
fa = freeze_dict(data)
assert a['foo'] == fa.foo
Of course this works only for flat dicts, but it shouldn't be too difficult to implement a recursive version.
freeze implements frozen collections (dict, list and set) that are hashable, type-hinted and will recursively freeze the data you give them (when possible) for you.
pip install frz
Usage:
from freeze import FDict
a_mutable_dict = {
"list": [1, 2],
"set": {3, 4},
}
a_frozen_dict = FDict(a_mutable_dict)
print(repr(a_frozen_dict))
# FDict: {'list': FList: (1, 2), 'set': FSet: {3, 4}}
In the absence of native language support, you can either do it yourself or use an existing solution. Fortunately Python makes it dead simple to extend off of their base implementations.
class frozen_dict(dict):
def __setitem__(self, key, value):
raise Exception('Frozen dictionaries cannot be mutated')
frozen_dict = frozen_dict({'foo': 'FOO' })
print(frozen['foo']) # FOO
frozen['foo'] = 'NEWFOO' # Exception: Frozen dictionaries cannot be mutated
# OR
from types import MappingProxyType
frozen_dict = MappingProxyType({'foo': 'FOO'})
print(frozen_dict['foo']) # FOO
frozen_dict['foo'] = 'NEWFOO' # TypeError: 'mappingproxy' object does not support item assignment
I needed to access fixed keys for something at one point for something that was a sort of globally-constanty kind of thing and I settled on something like this:
class MyFrozenDict:
def __getitem__(self, key):
if key == 'mykey1':
return 0
if key == 'mykey2':
return "another value"
raise KeyError(key)
Use it like
a = MyFrozenDict()
print(a['mykey1'])
WARNING: I don't recommend this for most use cases as it makes some pretty severe tradeoffs.

How to implement __iter__(self) for a container object (Python)

I have written a custom container object.
According to this page, I need to implement this method on my object:
__iter__(self)
However, upon following up the link to Iterator Types in the Python reference manual, there are no examples given of how to implement your own.
Can someone post a snippet (or link to a resource), that shows how to do this?
The container I am writing, is a map (i.e. stores values by unique keys).
dicts can be iterated like this:
for k, v in mydict.items()
In this case I need to be able to return two elements (a tuple?) in the iterator.
It is still not clear how to implement such an iterator (despite the several answers that have been kindly provided). Could someone please shed some more light on how to implement an iterator for a map-like container object? (i.e. a custom class that acts like a dict)?
I normally would use a generator function. Each time you use a yield statement, it will add an item to the sequence.
The following will create an iterator that yields five, and then every item in some_list.
def __iter__(self):
yield 5
yield from some_list
Pre-3.3, yield from didn't exist, so you would have to do:
def __iter__(self):
yield 5
for x in some_list:
yield x
Another option is to inherit from the appropriate abstract base class from the `collections module as documented here.
In case the container is its own iterator, you can inherit from
collections.Iterator. You only need to implement the next method then.
An example is:
>>> from collections import Iterator
>>> class MyContainer(Iterator):
... def __init__(self, *data):
... self.data = list(data)
... def next(self):
... if not self.data:
... raise StopIteration
... return self.data.pop()
...
...
...
>>> c = MyContainer(1, "two", 3, 4.0)
>>> for i in c:
... print i
...
...
4.0
3
two
1
While you are looking at the collections module, consider inheriting from Sequence, Mapping or another abstract base class if that is more appropriate. Here is an example for a Sequence subclass:
>>> from collections import Sequence
>>> class MyContainer(Sequence):
... def __init__(self, *data):
... self.data = list(data)
... def __getitem__(self, index):
... return self.data[index]
... def __len__(self):
... return len(self.data)
...
...
...
>>> c = MyContainer(1, "two", 3, 4.0)
>>> for i in c:
... print i
...
...
1
two
3
4.0
NB: Thanks to Glenn Maynard for drawing my attention to the need to clarify the difference between iterators on the one hand and containers that are iterables rather than iterators on the other.
usually __iter__() just return self if you have already define the next() method (generator object):
here is a Dummy example of a generator :
class Test(object):
def __init__(self, data):
self.data = data
def next(self):
if not self.data:
raise StopIteration
return self.data.pop()
def __iter__(self):
return self
but __iter__() can also be used like this:
http://mail.python.org/pipermail/tutor/2006-January/044455.html
The "iterable interface" in python consists of two methods __next__() and __iter__(). The __next__ function is the most important, as it defines the iterator behavior - that is, the function determines what value should be returned next. The __iter__() method is used to reset the starting point of the iteration. Often, you will find that __iter__() can just return self when __init__() is used to set the starting point.
See the following code for defining a Class Reverse which implements the "iterable interface" and defines an iterator over any instance from any sequence class. The __next__() method starts at the end of the sequence and returns values in reverse order of the sequence. Note that instances from a class implementing the "sequence interface" must define a __len__() and a __getitem__() method.
class Reverse:
"""Iterator for looping over a sequence backwards."""
def __init__(self, seq):
self.data = seq
self.index = len(seq)
def __iter__(self):
return self
def __next__(self):
if self.index == 0:
raise StopIteration
self.index = self.index - 1
return self.data[self.index]
>>> rev = Reverse('spam')
>>> next(rev) # note no need to call iter()
'm'
>>> nums = Reverse(range(1,10))
>>> next(nums)
9
If your object contains a set of data you want to bind your object's iter to, you can cheat and do this:
>>> class foo:
def __init__(self, *params):
self.data = params
def __iter__(self):
if hasattr(self.data[0], "__iter__"):
return self.data[0].__iter__()
return self.data.__iter__()
>>> d=foo(6,7,3,8, "ads", 6)
>>> for i in d:
print i
6
7
3
8
ads
6
To answer the question about mappings: your provided __iter__ should iterate over the keys of the mapping. The following is a simple example that creates a mapping x -> x * x and works on Python3 extending the ABC mapping.
import collections.abc
class MyMap(collections.abc.Mapping):
def __init__(self, n):
self.n = n
def __getitem__(self, key): # given a key, return it's value
if 0 <= key < self.n:
return key * key
else:
raise KeyError('Invalid key')
def __iter__(self): # iterate over all keys
for x in range(self.n):
yield x
def __len__(self):
return self.n
m = MyMap(5)
for k, v in m.items():
print(k, '->', v)
# 0 -> 0
# 1 -> 1
# 2 -> 4
# 3 -> 9
# 4 -> 16
In case you don't want to inherit from dict as others have suggested, here is direct answer to the question on how to implement __iter__ for a crude example of a custom dict:
class Attribute:
def __init__(self, key, value):
self.key = key
self.value = value
class Node(collections.Mapping):
def __init__(self):
self.type = ""
self.attrs = [] # List of Attributes
def __iter__(self):
for attr in self.attrs:
yield attr.key
That uses a generator, which is well described here.
Since we're inheriting from Mapping, you need to also implement __getitem__ and __len__:
def __getitem__(self, key):
for attr in self.attrs:
if key == attr.key:
return attr.value
raise KeyError
def __len__(self):
return len(self.attrs)
One option that might work for some cases is to make your custom class inherit from dict. This seems like a logical choice if it acts like a dict; maybe it should be a dict. This way, you get dict-like iteration for free.
class MyDict(dict):
def __init__(self, custom_attribute):
self.bar = custom_attribute
mydict = MyDict('Some name')
mydict['a'] = 1
mydict['b'] = 2
print mydict.bar
for k, v in mydict.items():
print k, '=>', v
Output:
Some name
a => 1
b => 2
example for inhert from dict, modify its iter, for example, skip key 2 when in for loop
# method 1
class Dict(dict):
def __iter__(self):
keys = self.keys()
for i in keys:
if i == 2:
continue
yield i
# method 2
class Dict(dict):
def __iter__(self):
for i in super(Dict, self).__iter__():
if i == 2:
continue
yield i

Categories