I am interested in counting the number of accesses to a dictionary's values. I am unsure how to include dictionary unpacking in the counter. Any tips?
from collections import defaultdict
class LDict(dict):
def __init__(self, *args, **kwargs):
'''
This is a read-counting dictionary
'''
super().__init__(*args, **kwargs)
self._lookup = defaultdict(lambda : 0)
def __getitem__(self, key):
retval = super().__getitem__(key)
self._lookup[key] += 1
return retval
def __setitem__(self, key, value):
super().__setitem__(key, value)
self._lookup[key] = self._lookup.default_factory()
def __delitem__(self, key):
super().__delitem__(self, key)
_ = self._lookup[key]
del self._lookup[key]
def list_unused(self):
return [key for key in self if self._lookup[key] == 0]
l = LDict(a='apple', b='bugger')
print({**l, **l})
print(l.list_unused())
_ = l['a']
print(l.list_unused())
You need to override more methods. Access is not centralized through __getitem__(): other methods like copy(), items(), etc. access the keys without going through __getitem()__. I would assume the ** operator uses items(), but you will need to handle ALL of the methods to keep track of EVERY access. In many cases you will have to make a judgement call. For example, does __repr__() count as an access? The returned string contains every key and value formatted, so I think it does.
I would recommend overriding all of these methods, because you have to do bookkeeping on assignment too.
def __repr__(self):
def __len__(self):
def __iter__(self):
def clear(self):
def copy(self):
def has_key(self, k):
def update(self, *args, **kwargs):
def keys(self):
def values(self):
def items(self):
EDIT: So apparently there's an important caveat here that directly relates to your implementation. if LDict extends dict, then none of these methods are invoked during the dictionary unpacking { **l, **l}.
Apparently you can follow the advice here though, and implement LDict without extending dict. This worked for me:
from collections import MutableMapping
class LDict(MutableMapping):
def __init__(self, *args, **kwargs):
'''
This is a read-counting dictionary
'''
self._lookup = defaultdict(lambda : 0)
self.data = {}
if kwargs:
self.data.update(kwargs)
def __getitem__(self, key):
retval = self.data[key]
self._lookup[key] += 1
return retval
def __setitem__(self, key, value):
self.data[key] = value
self._lookup[key] = self._lookup.default_factory()
def __delitem__(self, key):
del self.data[key]
_ = self._lookup[key]
del self._lookup[key]
def items(self):
print('items is being called!')
yield from self.data.items()
def __iter__(self):
print('__iter__ is being called!')
yield from self.data
def __len__(self):
return len(self.data)
def list_unused(self):
return [key for key in self if self._lookup[key] == 0]
l = LDict(a='apple', b='bugger')
print({**l, **l})
print(l.list_unused())
_ = l['a']
print(l.list_unused())
which produces the output:
__iter__ is being called!
__iter__ is being called!
{'b': 'bugger', 'a': 'apple'}
__iter__ is being called!
[]
__iter__ is being called!
[]
(I only implemented the bare minimum to get example to work, I still recommend implementing the set of methods I listed about if you want your counts to be correct!)
So I guess the answer to your question is you have to
Implement the __iter__(self) method
DO NOT inherit from dict().
Related
I would like to be able to unpack an object from a dict-similar class.
current:
f(**m.to_dict())
preferred
f(**m)
This would work if starstarprepare existed:
class M:
#... __getitem__, __setitem__
def __starstarprepare__(self):
md = self.to_dict()
return md
You can use collections.abc.Mapping.
from collections.abc import Mapping
class M(Mapping):
def __iter__(self):
return iter(self.to_dict())
def __getitem__(self, item):
return self.to_dict()[item]
def __len__(self):
return len(self.to_dict())
** works with any mapping type. One way to make M a mapping type is to subclass collections.abc.Mapping and implement __getitem__, __iter__, and __len__:
from collections.abc import Mapping
class M(Mapping):
def __init__(self):
self.a = 3
self.b = 5
def __getitem__(self, key):
return getattr(self, key)
def __iter__(self):
yield 'a'
yield 'b'
def __len__(self):
return 2
def foo(**kwargs):
for k, v in kwargs.items():
print(k, v)
m = M()
foo(**m)
If you already have a to_dict method, all three of the magic methods can be wrappers around the corresponding dict methods.
class M(Mapping):
def __init__(self):
self.a = 3
self.b = 5
def to_dict(self):
return {'a': self.a, 'b': self.b}
def __getitem__(self, key):
return self.to_dict()[key]
def __iter__(self):
return iter(self.to_dict())
def __len__(self):
return len(self.to_dict())
Solution due to #peter
class M:
# ... __getitem__ and other functions
def keys(self):
k = self.to_dict().keys()
return k
Is there a way to give a comparator to set() so when adding items it checks an attribute of that item for likeness rather than if the item is the same? For example, I want to use objects in a set that can contain the same value for one attribute.
class TestObj(object):
def __init__(self, value, *args, **kwargs):
self.value = value
super().__init__(*args, **kwargs)
values = set()
a = TestObj('a')
b = TestObj('b')
a2 = TestObj('a')
values.add(a) # Ok
values.add(b) # Ok
values.add(a2) # Not ok but still gets added
# Hypothetical code
values = set(lambda x, y: x.value != y.value)
values.add(a) # Ok
values.add(b) # Ok
values.add(a2) # Not added
I have implemented my own sorta thing that does similar functionality but wanted to know if there was a builtin way.
from Queue import Queue
class UniqueByAttrQueue(Queue):
def __init__(self, attr, *args, **kwargs):
Queue.__init__(self, *args, **kwargs)
self.attr = attr
def _init(self, maxsize):
self.queue = set()
def _put(self, item):
# Potential race condition, worst case message gets put in twice
if hasattr(item, self.attr) and item not in self:
self.queue.add(item)
def __contains__(self, item):
item_attr = getattr(item, self.attr)
for x in self.queue:
x_attr = getattr(x, self.attr)
if x_attr == item_attr:
return True
return False
def _get(self):
return self.queue.pop()
Just define __hash__ and __eq__ on the object in terms of the attribute in question and it will work with sets. For example:
class TestObj(object):
def __init__(self, value, *args, **kwargs):
self.value = value
super().__init__(*args, **kwargs)
def __eq__(self, other):
if not instance(other, TestObj):
return NotImplemented
return self.value == other.value
def __hash__(self):
return hash(self.value)
If you can't change the object (or don't want to, say, because other things are important to equality), then use a dict instead. You can either do:
mydict[obj.value] = obj
so new objects replace old, or
mydict.setdefault(obj.value, obj)
so old objects are maintained if the value in question is already in the keys. Just make sure to iterate using .viewvalues() (Python 2) or .values() (Python 3) instead of iterating directly (which would get the keys, not the values). You could actually use this approach to make a custom set-like object with a key as you describe (though you'd need to implement many more methods than I show to make it efficient, the default methods are usually fairly slow):
from collections.abc import MutableSet # On Py2, collections without .abc
class keyedset(MutableSet):
def __init__(self, it=(), key=lambda x: x):
self.key = key
self.contents = {}
for x in it:
self.add(x)
def __contains__(self, x):
# Use anonymous object() as default so all arguments handled properly
sentinel = object()
getval = self.contents.get(self.key(x), sentinel)
return getval is not sentinel and getval == x
def __iter__(self):
return iter(self.contents.values()) # itervalues or viewvalues on Py2
def __len__(self):
return len(self.contents)
def add(self, x):
self.contents.setdefault(self.key(x), x)
def discard(self, x):
self.contents.pop(self.key(x), None)
I'd like a dict-like class that transparently uses transformed keys on lookup, so that I can write
k in d # instead of f(k) in d
d[k] # instead of d[f(k)]
d.get(k, v) # instead of d.get(f(k), v)
etc. (Imagine for example that f does some kind of canonicalization, e.g. f(k) returns k.lower().)
It seems that I can inherit from dict and override individual operations, but not that there is a centralized spot for such transformation that all keys go through. That means I have to override all of __contains__, __getitem__, get, and possibly __missing__, etc. This gets too tedious and error-prone, and not very attractive unless this overhead outweighs that of manually substituting f(k) for every call on a plain dict.
Well, the idiomatic way to do it is probably using dimo414's answer. For the case where the transform is not pure (do not always evaluates the same result value given the same argument):
class Foo(dict):
def __init__(self, transform, *args, **kwargs):
super(Foo, self).__init__(self, *args, **kwargs)
assert isfunction(transform), u'Transform argument must be a function.'
self._transform = transform
def get(self, k, d=None):
return super(Foo, self).get(self._transform(k), d)
def __getitem__(self, item):
return super(Foo, self).__getitem__(self._transform(item))
def __contains__(self, item):
return super(Foo, self).__contains__(self._transform(item))
def __repr__(self):
return '<Foo instance {}>'.format(id(self))
Testing:
>>> import datetime
>>> # {0: '0', 1: '1', 2: '2' ... 99: '99'}
>>> x = Foo(lambda x: (datetime.datetime.now() - x).seconds, ((i, str(i)) for i in range(10)))
>>> t = datetime.datetime.now()
>>> x.get(t)
'5'
>>> x[t]
'12'
Not that tedious but I don't like how it smells (in terms of design).
I'm not sure why your question is being downvoted, it's a reasonable thing to want. In Java, Guava provides several map transformation utilities which provide views into the backing map like you're describing. However they don't provide a Maps.transformKeys() method because it's actually not a very useful function. See How to convert Map<String, String> to Map<Long, String> using guava and Why Guava does not provide a way to transform map keys for details as to why.
In short, it's not possible to efficiently provide key transformations in the general case. Rather than creating the complex and possibly inconsistent data structure you're envisioning, the best thing to do is likely to just create a new dict applying your key transformation, e.g.:
{ f(k): v for k, v in d.iteritems() }
Since you want to maintain the exact same signature as dict(), I
propose creating a factory function to wrap a TransformDict to provide
the same signature.
def transform_dict(transform_key):
def _transform_dict(*args, **kwargs):
return TransformDict(transform_key, *args, **kwargs)
return _transform_dict
Which can be used as:
>>> LowerDict = transform_dict(lambda k: k.lower())
>>> lower_dict = LowerDict({'FOO': 1}, BaR=2)
TransformDict(<function <lambda> at 0x12345678>, {'foo': 1, 'bar': 2})
The TransformDict should implement the MutableMapping abstract
base class so that any potentially missed dict method will not pass
silently. All methods dealing with transforming the key can be
implemented in terms of __contains__(), __getitem__(),
__setitem__(), and __delitem__().
import collections
import sys
class TransformDict(collections.MutableMapping):
def __init__(self, __transform_key, *args, **kwargs):
self.data = dict(*args, **kwargs)
self.transform_key = __transform_key
# Key methods.
def __contains__(self, key):
key = self.transform_key(key)
return key in self.data
def __getitem__(self, key):
key = self.transform_key(key)
return self.data[key]
def __setitem__(self, key, value):
key = self.transform_key(key)
self.data[key] = value
def __delitem__(self, key):
key = self.transform_key(key)
del self.data[key]
# Operator methods.
def __iter__(self):
return iter(self.data)
def __len__(self):
return len(self.data)
def __eq__(self, other):
if isinstance(other, TransformDict):
other = other.data
return self.data == other
def __ne__(self, other):
return not (self == other)
def __repr__(self):
return "{}({!r}, {!r})".format(self.__class__.__name__, self.transform_key, self.data)
# Accessor methods.
def get(self, key, default=None):
if key in self:
return self[key]
return default
def keys(self):
return self.data.keys()
def items(self):
return self.data.items()
def values(self):
return self.data.values()
if sys.version_info[0] == 2:
def iterkeys(self):
return self.data.iterkeys()
def itervalues(self):
return self.data.itervalues()
def iteritems(self):
return self.data.iteritems()
def viewkeys(self):
return self.data.viewkeys()
def viewvalues(self):
return self.data.viewvalues()
def viewitems(self):
return self.data.viewitems()
# Mutable methods.
def clear(self):
self.data.clear()
def pop(self, key, default=KeyError):
if key in self or default is KeyError:
value = self[key]
del self[key]
return value
return default
def popitem(self):
return self.data.popitem()
def setdefault(self, key, default=None):
if key not in self:
self[key] = default
return default
return self[key]
def update(self, other):
for key for other:
self[key] = other[key]
# Miscellaneous methods.
def copy(self):
return self.__class__(self.transform_key, self.data)
I have a class like:
class A:
def __init__(self):
self.data = {}
and at some moment I want to prohibit self.data fields modification.
I've read in PEP-416 rejection notice that there are a lot of ways to do it. So I'd like to find what they are.
I tried this:
a = A()
a.data = types.MappingProxyType(a.data)
That should work but first, its python3.3+ and second, when I do this "prohibition" multiple times I get this:
>>> a.data = types.MappingProxyType(a.data)
>>> a.data = types.MappingProxyType(a.data)
>>> a.data
mappingproxy(mappingproxy({}))
though it would be much better to get just mappingproxy({}) as I am going to "prohibit" a lot of times. Check of isinstance(MappingProxyType) is an option, but I think that other options can exist.
Thanks
Use collections.Mapping e.g.
import collections
class DictWrapper(collections.Mapping):
def __init__(self, data):
self._data = data
def __getitem__(self, key):
return self._data[key]
def __len__(self):
return len(self._data)
def __iter__(self):
return iter(self._data)
This is the full implementation of fast (shallow-)read-only dict:
def _readonly(self, *args, **kwargs):
raise RuntimeError("Cannot modify ReadOnlyDict")
class ReadOnlyDict(dict):
__setitem__ = _readonly
__delitem__ = _readonly
pop = _readonly
popitem = _readonly
clear = _readonly
update = _readonly
setdefault = _readonly
My previous (worse) implementation was as follows (thanks #mtraceur for the great remarks!):
class ReadOnlyDict(dict):
def __readonly__(self, *args, **kwargs):
raise RuntimeError("Cannot modify ReadOnlyDict")
__setitem__ = __readonly__
__delitem__ = __readonly__
pop = __readonly__
popitem = __readonly__
clear = __readonly__
update = __readonly__
setdefault = __readonly__
del __readonly__
Very easy, you just override default dict's methods!
Here is an example:
class ReadOnlyDict(dict):
__readonly = False
def readonly(self, allow=1):
"""Allow or deny modifying dictionary"""
self.__readonly = bool(allow)
def __setitem__(self, key, value):
if self.__readonly:
raise TypeError, "__setitem__ is not supported"
return dict.__setitem__(self, key, value)
def __delitem__(self, key):
if self.__readonly:
raise TypeError, "__delitem__ is not supported"
return dict.__delitem__(self, key)
BTW, you can also remove .pop, .update and other methods you need. Just play around with it.
The best way is to derive from UserDict like this:
from collections import UserDict
class MyReadOnlyDict(UserDict):
def my_set(self, key, val, more_params):
# do something special
# custom logic etc
self.data[key] = val
def __setitem__(self, key, val):
raise NotImplementedError('This dictionary cannot be updated')
def __delitem__(self, key):
raise NotImplementedError('This dictionary does not allow delete')
The advantage of this method is that you can still have internal methods in your class that can update dictionary by accessing self.data.
How about? It is the update of #mouad 's answer.
import json
from collections import OrderedDict
from collections.abc import Mapping
class ReadOnlyJsonObject(Mapping):
def __init__(self, data, dumps_kw: dict=None, loads_kw: dict=None):
if dumps_kw is None:
dumps_kw = dict()
if loads_kw is None:
self._loads_kw = dict(object_pairs_hook=OrderedDict)
else:
self._loads_kw = loads_kw
if isinstance(data, str):
self._json_string = data
else:
self._json_string = json.dumps(data, **dumps_kw)
#property
def _data(self):
return json.loads(self._json_string, **self._loads_kw)
def __getitem__(self, key):
return self._data[key]
def __len__(self):
return len(self._data)
def __iter__(self):
return iter(self._data)
def __str__(self):
return self._json_string
Not sure about the performance, though. I use this one in a real project https://github.com/patarapolw/AnkiTools/blob/master/AnkiTools/tools/defaults.py
As you know, python allows us simply override dict.__getitem__ method so we can do something different in there when someone tries to retrieve any value from it.
I want to do some code when one MyDict(dict) class instance is passed to update method of another python dict instance. See below:
class MyDict(dict):
def __getitem__(self, item):
print "Doing some stuff here"
return dict.__getitem__(self, item)
d1 = MyDict({'1': 1, '2': 2})
d2 = {}
# I want to have d1.__getitem__ called, but it does not work :-(
d2.update(d1)
Try using the collections.Mapping abstract base class (or collections.MutableMapping, if this is read-write).
import collections
class MyDict(collections.Mapping):
def __init__(self, *args, **kwargs):
self.data = dict(*args, **kwargs)
def __len__(self):
return len(self.data)
def __iter__(self):
return iter(self.data)
def __contains__(self, key):
return key in self.data
def __getitem__(self, key):
print 'Doing some stuff here'
return self.data[key]
All you need is to subclass MyDict from object and create .keys() method for it. See below:
class MyDict(object):
def __init__(self, items=()):
self._dict = dict(items)
def keys(self):
return self._dict.keys()
def __getitem__(self, item):
print "Doing some stuff for item:", item
return self._dict[item]
def __setitem__(self, item, value):
self._dict[item] = value
# You can add some more dict methods
d1 = MyDict({'1': 1, '2': 2})
d2 = {}
# Now you will see some stuff executed for each
# value extracted from d1 while updating d2
d2.update(d1)