I am interested in counting the number of accesses to a dictionary's values. I am unsure how to include dictionary unpacking in the counter. Any tips?
from collections import defaultdict
class LDict(dict):
def __init__(self, *args, **kwargs):
'''
This is a read-counting dictionary
'''
super().__init__(*args, **kwargs)
self._lookup = defaultdict(lambda : 0)
def __getitem__(self, key):
retval = super().__getitem__(key)
self._lookup[key] += 1
return retval
def __setitem__(self, key, value):
super().__setitem__(key, value)
self._lookup[key] = self._lookup.default_factory()
def __delitem__(self, key):
super().__delitem__(self, key)
_ = self._lookup[key]
del self._lookup[key]
def list_unused(self):
return [key for key in self if self._lookup[key] == 0]
l = LDict(a='apple', b='bugger')
print({**l, **l})
print(l.list_unused())
_ = l['a']
print(l.list_unused())
You need to override more methods. Access is not centralized through __getitem__(): other methods like copy(), items(), etc. access the keys without going through __getitem()__. I would assume the ** operator uses items(), but you will need to handle ALL of the methods to keep track of EVERY access. In many cases you will have to make a judgement call. For example, does __repr__() count as an access? The returned string contains every key and value formatted, so I think it does.
I would recommend overriding all of these methods, because you have to do bookkeeping on assignment too.
def __repr__(self):
def __len__(self):
def __iter__(self):
def clear(self):
def copy(self):
def has_key(self, k):
def update(self, *args, **kwargs):
def keys(self):
def values(self):
def items(self):
EDIT: So apparently there's an important caveat here that directly relates to your implementation. if LDict extends dict, then none of these methods are invoked during the dictionary unpacking { **l, **l}.
Apparently you can follow the advice here though, and implement LDict without extending dict. This worked for me:
from collections import MutableMapping
class LDict(MutableMapping):
def __init__(self, *args, **kwargs):
'''
This is a read-counting dictionary
'''
self._lookup = defaultdict(lambda : 0)
self.data = {}
if kwargs:
self.data.update(kwargs)
def __getitem__(self, key):
retval = self.data[key]
self._lookup[key] += 1
return retval
def __setitem__(self, key, value):
self.data[key] = value
self._lookup[key] = self._lookup.default_factory()
def __delitem__(self, key):
del self.data[key]
_ = self._lookup[key]
del self._lookup[key]
def items(self):
print('items is being called!')
yield from self.data.items()
def __iter__(self):
print('__iter__ is being called!')
yield from self.data
def __len__(self):
return len(self.data)
def list_unused(self):
return [key for key in self if self._lookup[key] == 0]
l = LDict(a='apple', b='bugger')
print({**l, **l})
print(l.list_unused())
_ = l['a']
print(l.list_unused())
which produces the output:
__iter__ is being called!
__iter__ is being called!
{'b': 'bugger', 'a': 'apple'}
__iter__ is being called!
[]
__iter__ is being called!
[]
(I only implemented the bare minimum to get example to work, I still recommend implementing the set of methods I listed about if you want your counts to be correct!)
So I guess the answer to your question is you have to
Implement the __iter__(self) method
DO NOT inherit from dict().
Is there a way to give a comparator to set() so when adding items it checks an attribute of that item for likeness rather than if the item is the same? For example, I want to use objects in a set that can contain the same value for one attribute.
class TestObj(object):
def __init__(self, value, *args, **kwargs):
self.value = value
super().__init__(*args, **kwargs)
values = set()
a = TestObj('a')
b = TestObj('b')
a2 = TestObj('a')
values.add(a) # Ok
values.add(b) # Ok
values.add(a2) # Not ok but still gets added
# Hypothetical code
values = set(lambda x, y: x.value != y.value)
values.add(a) # Ok
values.add(b) # Ok
values.add(a2) # Not added
I have implemented my own sorta thing that does similar functionality but wanted to know if there was a builtin way.
from Queue import Queue
class UniqueByAttrQueue(Queue):
def __init__(self, attr, *args, **kwargs):
Queue.__init__(self, *args, **kwargs)
self.attr = attr
def _init(self, maxsize):
self.queue = set()
def _put(self, item):
# Potential race condition, worst case message gets put in twice
if hasattr(item, self.attr) and item not in self:
self.queue.add(item)
def __contains__(self, item):
item_attr = getattr(item, self.attr)
for x in self.queue:
x_attr = getattr(x, self.attr)
if x_attr == item_attr:
return True
return False
def _get(self):
return self.queue.pop()
Just define __hash__ and __eq__ on the object in terms of the attribute in question and it will work with sets. For example:
class TestObj(object):
def __init__(self, value, *args, **kwargs):
self.value = value
super().__init__(*args, **kwargs)
def __eq__(self, other):
if not instance(other, TestObj):
return NotImplemented
return self.value == other.value
def __hash__(self):
return hash(self.value)
If you can't change the object (or don't want to, say, because other things are important to equality), then use a dict instead. You can either do:
mydict[obj.value] = obj
so new objects replace old, or
mydict.setdefault(obj.value, obj)
so old objects are maintained if the value in question is already in the keys. Just make sure to iterate using .viewvalues() (Python 2) or .values() (Python 3) instead of iterating directly (which would get the keys, not the values). You could actually use this approach to make a custom set-like object with a key as you describe (though you'd need to implement many more methods than I show to make it efficient, the default methods are usually fairly slow):
from collections.abc import MutableSet # On Py2, collections without .abc
class keyedset(MutableSet):
def __init__(self, it=(), key=lambda x: x):
self.key = key
self.contents = {}
for x in it:
self.add(x)
def __contains__(self, x):
# Use anonymous object() as default so all arguments handled properly
sentinel = object()
getval = self.contents.get(self.key(x), sentinel)
return getval is not sentinel and getval == x
def __iter__(self):
return iter(self.contents.values()) # itervalues or viewvalues on Py2
def __len__(self):
return len(self.contents)
def add(self, x):
self.contents.setdefault(self.key(x), x)
def discard(self, x):
self.contents.pop(self.key(x), None)
As you know, python allows us simply override dict.__getitem__ method so we can do something different in there when someone tries to retrieve any value from it.
I want to do some code when one MyDict(dict) class instance is passed to update method of another python dict instance. See below:
class MyDict(dict):
def __getitem__(self, item):
print "Doing some stuff here"
return dict.__getitem__(self, item)
d1 = MyDict({'1': 1, '2': 2})
d2 = {}
# I want to have d1.__getitem__ called, but it does not work :-(
d2.update(d1)
Try using the collections.Mapping abstract base class (or collections.MutableMapping, if this is read-write).
import collections
class MyDict(collections.Mapping):
def __init__(self, *args, **kwargs):
self.data = dict(*args, **kwargs)
def __len__(self):
return len(self.data)
def __iter__(self):
return iter(self.data)
def __contains__(self, key):
return key in self.data
def __getitem__(self, key):
print 'Doing some stuff here'
return self.data[key]
All you need is to subclass MyDict from object and create .keys() method for it. See below:
class MyDict(object):
def __init__(self, items=()):
self._dict = dict(items)
def keys(self):
return self._dict.keys()
def __getitem__(self, item):
print "Doing some stuff for item:", item
return self._dict[item]
def __setitem__(self, item, value):
self._dict[item] = value
# You can add some more dict methods
d1 = MyDict({'1': 1, '2': 2})
d2 = {}
# Now you will see some stuff executed for each
# value extracted from d1 while updating d2
d2.update(d1)
Is there any straightforward way of finding a key by knowing the value within a dictionary?
All I can think of is this:
key = [key for key, value in dict_obj.items() if value == 'value'][0]
Your list comprehension goes through all the dict's items finding all the matches, then just returns the first key. This generator expression will only iterate as far as necessary to return the first value:
key = next(key for key, value in dd.items() if value == 'value')
where dd is the dict. Will raise StopIteration if no match is found, so you might want to catch that and return a more appropriate exception like ValueError or KeyError.
There are cases where a dictionary is a one:one mapping
Eg,
d = {1: "one", 2: "two" ...}
Your approach is ok if you are only doing a single lookup. However if you need to do more than one lookup it will be more efficient to create an inverse dictionary
ivd = {v: k for k, v in d.items()}
If there is a possibility of multiple keys with the same value, you will need to specify the desired behaviour in this case.
If your Python is 2.6 or older, you can use
ivd = dict((v, k) for k, v in d.items())
This version is 26% shorter than yours but functions identically, even for redundant/ambiguous values (returns the first match, as yours does). However, it is probably twice as slow as yours, because it creates a list from the dict twice.
key = dict_obj.keys()[dict_obj.values().index(value)]
Or if you prefer brevity over readability you can save one more character with
key = list(dict_obj)[dict_obj.values().index(value)]
And if you prefer efficiency, #PaulMcGuire's approach is better. If there are lots of keys that share the same value it's more efficient not to instantiate that list of keys with a list comprehension and instead use use a generator:
key = (key for key, value in dict_obj.items() if value == 'value').next()
Since this is still very relevant, the first Google hit and I just spend some time figuring this out, I'll post my (working in Python 3) solution:
testdict = {'one' : '1',
'two' : '2',
'three' : '3',
'four' : '4'
}
value = '2'
[key for key in testdict.items() if key[1] == value][0][0]
Out[1]: 'two'
It will give you the first value that matches.
Maybe a dictionary-like class such as DoubleDict down below is what you want? You can use any one of the provided metaclasses in conjuction with DoubleDict or may avoid using any metaclass at all.
import functools
import threading
################################################################################
class _DDChecker(type):
def __new__(cls, name, bases, classdict):
for key, value in classdict.items():
if key not in {'__new__', '__slots__', '_DoubleDict__dict_view'}:
classdict[key] = cls._wrap(value)
return super().__new__(cls, name, bases, classdict)
#staticmethod
def _wrap(function):
#functools.wraps(function)
def check(self, *args, **kwargs):
value = function(self, *args, **kwargs)
if self._DoubleDict__forward != \
dict(map(reversed, self._DoubleDict__reverse.items())):
raise RuntimeError('Forward & Reverse are not equivalent!')
return value
return check
################################################################################
class _DDAtomic(_DDChecker):
def __new__(cls, name, bases, classdict):
if not bases:
classdict['__slots__'] += ('_DDAtomic__mutex',)
classdict['__new__'] = cls._atomic_new
return super().__new__(cls, name, bases, classdict)
#staticmethod
def _atomic_new(cls, iterable=(), **pairs):
instance = object.__new__(cls, iterable, **pairs)
instance.__mutex = threading.RLock()
instance.clear()
return instance
#staticmethod
def _wrap(function):
#functools.wraps(function)
def atomic(self, *args, **kwargs):
with self.__mutex:
return function(self, *args, **kwargs)
return atomic
################################################################################
class _DDAtomicChecker(_DDAtomic):
#staticmethod
def _wrap(function):
return _DDAtomic._wrap(_DDChecker._wrap(function))
################################################################################
class DoubleDict(metaclass=_DDAtomicChecker):
__slots__ = '__forward', '__reverse'
def __new__(cls, iterable=(), **pairs):
instance = super().__new__(cls, iterable, **pairs)
instance.clear()
return instance
def __init__(self, iterable=(), **pairs):
self.update(iterable, **pairs)
########################################################################
def __repr__(self):
return repr(self.__forward)
def __lt__(self, other):
return self.__forward < other
def __le__(self, other):
return self.__forward <= other
def __eq__(self, other):
return self.__forward == other
def __ne__(self, other):
return self.__forward != other
def __gt__(self, other):
return self.__forward > other
def __ge__(self, other):
return self.__forward >= other
def __len__(self):
return len(self.__forward)
def __getitem__(self, key):
if key in self:
return self.__forward[key]
return self.__missing_key(key)
def __setitem__(self, key, value):
if self.in_values(value):
del self[self.get_key(value)]
self.__set_key_value(key, value)
return value
def __delitem__(self, key):
self.pop(key)
def __iter__(self):
return iter(self.__forward)
def __contains__(self, key):
return key in self.__forward
########################################################################
def clear(self):
self.__forward = {}
self.__reverse = {}
def copy(self):
return self.__class__(self.items())
def del_value(self, value):
self.pop_key(value)
def get(self, key, default=None):
return self[key] if key in self else default
def get_key(self, value):
if self.in_values(value):
return self.__reverse[value]
return self.__missing_value(value)
def get_key_default(self, value, default=None):
return self.get_key(value) if self.in_values(value) else default
def in_values(self, value):
return value in self.__reverse
def items(self):
return self.__dict_view('items', ((key, self[key]) for key in self))
def iter_values(self):
return iter(self.__reverse)
def keys(self):
return self.__dict_view('keys', self.__forward)
def pop(self, key, *default):
if len(default) > 1:
raise TypeError('too many arguments')
if key in self:
value = self[key]
self.__del_key_value(key, value)
return value
if default:
return default[0]
raise KeyError(key)
def pop_key(self, value, *default):
if len(default) > 1:
raise TypeError('too many arguments')
if self.in_values(value):
key = self.get_key(value)
self.__del_key_value(key, value)
return key
if default:
return default[0]
raise KeyError(value)
def popitem(self):
try:
key = next(iter(self))
except StopIteration:
raise KeyError('popitem(): dictionary is empty')
return key, self.pop(key)
def set_key(self, value, key):
if key in self:
self.del_value(self[key])
self.__set_key_value(key, value)
return key
def setdefault(self, key, default=None):
if key not in self:
self[key] = default
return self[key]
def setdefault_key(self, value, default=None):
if not self.in_values(value):
self.set_key(value, default)
return self.get_key(value)
def update(self, iterable=(), **pairs):
for key, value in (((key, iterable[key]) for key in iterable.keys())
if hasattr(iterable, 'keys') else iterable):
self[key] = value
for key, value in pairs.items():
self[key] = value
def values(self):
return self.__dict_view('values', self.__reverse)
########################################################################
def __missing_key(self, key):
if hasattr(self.__class__, '__missing__'):
return self.__missing__(key)
if not hasattr(self, 'default_factory') \
or self.default_factory is None:
raise KeyError(key)
return self.__setitem__(key, self.default_factory())
def __missing_value(self, value):
if hasattr(self.__class__, '__missing_value__'):
return self.__missing_value__(value)
if not hasattr(self, 'default_key_factory') \
or self.default_key_factory is None:
raise KeyError(value)
return self.set_key(value, self.default_key_factory())
def __set_key_value(self, key, value):
self.__forward[key] = value
self.__reverse[value] = key
def __del_key_value(self, key, value):
del self.__forward[key]
del self.__reverse[value]
########################################################################
class __dict_view(frozenset):
__slots__ = '__name'
def __new__(cls, name, iterable=()):
instance = super().__new__(cls, iterable)
instance.__name = name
return instance
def __repr__(self):
return 'dict_{}({})'.format(self.__name, list(self))
# oneline solution using zip
>> x = {'a':100, 'b':999}
>> y = dict(zip(x.values(), x.keys()))
>> y
{100: 'a', 999: 'b'}
No, you can not do this efficiently without looking in all the keys and checking all their values. So you will need O(n) time to do this. If you need to do a lot of such lookups you will need to do this efficiently by constructing a reversed dictionary (can be done also in O(n)) and then making a search inside of this reversed dictionary (each search will take on average O(1)).
Here is an example of how to construct a reversed dictionary (which will be able to do one to many mapping) from a normal dictionary:
for i in h_normal:
for j in h_normal[i]:
if j not in h_reversed:
h_reversed[j] = set([i])
else:
h_reversed[j].add(i)
For example if your
h_normal = {
1: set([3]),
2: set([5, 7]),
3: set([]),
4: set([7]),
5: set([1, 4]),
6: set([1, 7]),
7: set([1]),
8: set([2, 5, 6])
}
your h_reversed will be
{
1: set([5, 6, 7]),
2: set([8]),
3: set([1]),
4: set([5]),
5: set([8, 2]),
6: set([8]),
7: set([2, 4, 6])
}
Make a reverse dictionary
reverse_dictionary = {v:k for k,v in dictionary.items()}
If you have a lot of reverse lookups to do
There isn't one as far as I know of, one way however to do it is to create a dict for normal lookup by key and another dict for reverse lookup by value.
There's an example of such an implementation here:
http://code.activestate.com/recipes/415903-two-dict-classes-which-can-lookup-keys-by-value-an/
This does mean that looking up the keys for a value could result in multiple results which can be returned as a simple list.
I know this might be considered 'wasteful', but in this scenario I often store the key as an additional column in the value record:
d = {'key1' : ('key1', val, val...), 'key2' : ('key2', val, val...) }
it's a tradeoff and feels wrong, but it's simple and works and of course depends on values being tuples rather than simple values.
Through values in dictionary can be object of any kind they can't be hashed or indexed other way. So finding key by the value is unnatural for this collection type. Any query like that can be executed in O(n) time only. So if this is frequent task you should take a look for some indexing of key like Jon sujjested or maybe even some spatial index (DB or http://pypi.python.org/pypi/Rtree/ ).
I'm using dictionaries as a sort of "database", so I need to find a key that I can reuse. For my case, if a key's value is None, then I can take it and reuse it without having to "allocate" another id. Just figured I'd share it.
db = {0:[], 1:[], ..., 5:None, 11:None, 19:[], ...}
keys_to_reallocate = [None]
allocate.extend(i for i in db.iterkeys() if db[i] is None)
free_id = keys_to_reallocate[-1]
I like this one because I don't have to try and catch any errors such as StopIteration or IndexError. If there's a key available, then free_id will contain one. If there isn't, then it will simply be None. Probably not pythonic, but I really didn't want to use a try here...
There is none. Don't forget that the value may be found on any number of keys, including 0 or more than 1.
class a(object):
w='www'
def __init__(self):
for i in self.keys():
print i
def __iter__(self):
for k in self.keys():
yield k
a() # why is there an error here?
Thanks.
Edit: The following class also doesn't extend any class;
why it can use keys?
class DictMixin:
# Mixin defining all dictionary methods for classes that already have
# a minimum dictionary interface including getitem, setitem, delitem,
# and keys. Without knowledge of the subclass constructor, the mixin
# does not define __init__() or copy(). In addition to the four base
# methods, progressively more efficiency comes with defining
# __contains__(), __iter__(), and iteritems().
# second level definitions support higher levels
def __iter__(self):
for k in self.keys():
yield k
def has_key(self, key):
try:
value = self[key]
except KeyError:
return False
return True
def __contains__(self, key):
return self.has_key(key)
# third level takes advantage of second level definitions
def iteritems(self):
for k in self:
yield (k, self[k])
def iterkeys(self):
return self.__iter__()
# fourth level uses definitions from lower levels
def itervalues(self):
for _, v in self.iteritems():
yield v
def values(self):
return [v for _, v in self.iteritems()]
def items(self):
return list(self.iteritems())
def clear(self):
for key in self.keys():
del self[key]
def setdefault(self, key, default=None):
try:
return self[key]
except KeyError:
self[key] = default
return default
def pop(self, key, *args):
if len(args) > 1:
raise TypeError, "pop expected at most 2 arguments, got "\
+ repr(1 + len(args))
try:
value = self[key]
except KeyError:
if args:
return args[0]
raise
del self[key]
return value
def popitem(self):
try:
k, v = self.iteritems().next()
except StopIteration:
raise KeyError, 'container is empty'
del self[k]
return (k, v)
def update(self, other=None, **kwargs):
# Make progressively weaker assumptions about "other"
if other is None:
pass
elif hasattr(other, 'iteritems'): # iteritems saves memory and lookups
for k, v in other.iteritems():
self[k] = v
elif hasattr(other, 'keys'):
for k in other.keys():
self[k] = other[k]
else:
for k, v in other:
self[k] = v
if kwargs:
self.update(kwargs)
def get(self, key, default=None):
try:
return self[key]
except KeyError:
return default
def __repr__(self):
return repr(dict(self.iteritems()))
def __cmp__(self, other):
if other is None:
return 1
if isinstance(other, DictMixin):
other = dict(other.iteritems())
return cmp(dict(self.iteritems()), other)
def __len__(self):
return len(self.keys())
Why would you expect it to have keys? You didn't define such a method in your class. Did you intend to inherit from a dictionary?
To do that declare class a(dict)
Or maybe you meant a.__dict__.keys()?
As for the large snippet you've posted in the update, read the comment above the class again:
# Mixin defining all dictionary methods for classes that already have
# a minimum dictionary interface including getitem, setitem, delitem,
# and keys
Note that "already have ... keys" part.
The DictMixin class comes from the UserDict module, which says:
class UserDict.DictMixin Mixin
defining all dictionary methods for
classes that already have a minimum
dictionary interface including
getitem(), setitem(), delitem(), and keys().
This mixin should be used as a
superclass. Adding each of the above
methods adds progressively more
functionality. For instance, defining
all but delitem() will preclude
only pop() and popitem() from the full
interface.
In addition to the four base methods,
progressively more efficiency comes
with defining contains(),
iter(), and iteritems().
Since the mixin has no knowledge of
the subclass constructor, it does not
define init() or copy().
Starting with Python version 2.6, it
is recommended to use
collections.MutableMapping instead of
DictMixin.
Note the recommendation in the last part - use collections.MutableMapping instead.
To iterate over attributes of an object:
class A(object):
def __init__(self):
self.myinstatt1 = 'one'
self.myinstatt2 = 'two'
def mymethod(self):
pass
a = A()
for attr, value in a.__dict__.iteritems():
print attr, value