How to add behaviour to standard dictionary? - python

I want to make a simple wrapper to the standard python dictionary, or maybe the defaultdict class where there is a default value.
The change I want to make is very simple: I would like to store in the dictionary data-structures that are not hashable due to the possibility of mutation, but I have guarantee in my code that I won't ever mutate them anyways.
My approach is detect if the key to the dictionary is hashable, if so proceed as usual. However if the key is not hashable, turn it into a string first then proceed as usual.

You can inherit from dict and override __setitem__ method:
class CustomDict(dict):
def __setitem__(self, key, value):
try:
hash(key)
except TypeError:
key = str(key)
super(CustomDict, self).__setitem__(key, value)
def __getitem__(self, key):
try:
hash(key)
except TypeError:
key = str(key)
return super(CustomDict, self).__getitem__(key)
data = CustomDict()
data["x"] = True
data[dict(foo='bar')] = False
print(data)
>>> {'x': True, "{'foo': 'bar'}": False}
assert data[dict(foo='bar')] == False
Or you can create custom dict-like object as described here.

Related

How to subclass a dictionary so it supports generic type hints?

How can a dictionary be subclassed such that the subclass supports generic type hinting? It needs to behave like a dictionary in every way and support type hints of the keys and values. The subclass will add functions that access and manipulate the dictionary data. For example, it will have a valueat(self, idx:int) function that returns the dictionary value at a given index.
It doesn't require OrderedDict as its base class, but the dictionary does need to have a predictable order. Since OrderedDict maintains insertion order and supports type hints, it seems like a reasonable place to start.
Here's what I tried:
from collections import OrderedDict
class ApplicationSpecificDict(OrderedDict[str, int]):
...
However, it fails with the error:
TypeError: 'type' object is not subscriptable
Is this not supported in Python 3.7+, or am I missing something?
The typing package provides generic classes that correspond to the non-generic classes in collections.abc and collections. These generic classes may be used as base classes to create user-defined generic classes, such as a custom generic dictionary.
Examples of generic classes corresponding to types in collections.abc:
typing.AbstractSet(Sized, Collection[T_co])
typing.Container(Generic[T_co])
typing.Mapping(Sized, Collection[KT], Generic[VT_co])
typing.MutableMapping(Mapping[KT, VT])
typing.MutableSequence(Sequence[T])
typing.MutableSet(AbstractSet[T])
typing.Sequence(Reversible[T_co], Collection[T_co])
Examples of generic classes corresponding to types in collections:
typing.DefaultDict(collections.defaultdict, MutableMapping[KT, VT])
typing.OrderedDict(collections.OrderedDict, MutableMapping[KT, VT])
typing.ChainMap(collections.ChainMap, MutableMapping[KT, VT])
typing.Counter(collections.Counter, Dict[T, int])
typing.Deque(deque, MutableSequence[T])
Implementing a custom generic dictionary
There are many options for implementing a custom generic dictionary. However, it is important to note that unless the user-defined class explicitly inherits from Mapping or MutableMapping, static type checkers like mypy will not consider the class as a mapping.
Example user-defined generic dictionary
from collections import abc # Used for isinstance check in `update()`.
from typing import Dict, Iterator, MutableMapping, TypeVar
KT = TypeVar('KT')
VT = TypeVar('VT')
class MyDict(MutableMapping[KT, VT]):
def __init__(self, dictionary=None, /, **kwargs) -> None:
self.data: Dict[KT, VT] = {}
if dictionary is not None:
self.update(dictionary)
if kwargs:
self.update(kwargs)
def __contains__(self, key: KT) -> bool:
return key in self.data
def __delitem__(self, key: KT) -> None:
del self.data[key]
def __getitem__(self, key: KT) -> VT:
if key in self.data:
return self.data[key]
raise KeyError(key)
def __len__(self) -> int:
return len(self.data)
def __iter__(self) -> Iterator[KT]:
return iter(self.data)
def __setitem__(self, key: KT, value: VT) -> None:
self.data[key] = value
#classmethod
def fromkeys(cls, iterable: Iterable[KT], value: VT) -> "MyDict":
"""Create a new dictionary with keys from `iterable` and values set
to `value`.
Args:
iterable: A collection of keys.
value: The default value. All of the values refer to just a single
instance, so it generally does not make sense for `value` to be a
mutable object such as an empty list. To get distinct values, use
a dict comprehension instead.
Returns:
A new instance of MyDict.
"""
d = cls()
for key in iterable:
d[key] = value
return d
def update(self, other=(), /, **kwds) -> None:
"""Updates the dictionary from an iterable or mapping object."""
if isinstance(other, abc.Mapping):
for key in other:
self.data[key] = other[key]
elif hasattr(other, "keys"):
for key in other.keys():
self.data[key] = other[key]
else:
for key, value in other:
self.data[key] = value
for key, value in kwds.items():
self.data[key] = value
I posted on this question which yours may be a dupe of, but I will include it here as well because I found both of these questions when I was googling how to do this.
Basically, you need to use the typing Mapping generic
This is the generic annotation that dict uses so you can define other types like MyDict[str, int].
How to:
import typing
from collections import OrderedDict
# these are generic type vars to tell mutable-mapping
# to accept any type vars when creating a sub-type of your generic dict
_KT = typing.TypeVar("_KT") # key type
_VT = typing.TypeVar("_VT") # value type
# `typing.MutableMapping` requires you to implement certain functions like __getitem__
# You can get around this by just subclassing OrderedDict first.
# Note: The generic you're subclassing needs to come BEFORE
# the `typing.MutableMapping` subclass or accessing indices won't work.
class ApplicationSpecificDict(
OrderedDict,
typing.MutableMapping[_KT, _VT]
):
"""Your special dict"""
...
# Now define the key, value types for sub-types of your dict
RequestDict = ApplicationSpecificDict[str, typing.Tuple[str, str]]
ModelDict = ApplicationSpecificDict[str, typing.Any]
Now use you custom types of your sub-typed dict:
from my_project.custom_typing import ApplicationSpecificDict # Import your custom type
def make_request() -> ApplicationSpecificDict:
request = ApplicationSpecificDict()
request["test"] = ("sierra", "117")
return request
print(make_request())
Will output as { "test": ("sierra", "117") }

Python dictionary with default key

I am running Python 2.7.10.
I would like to have a dictionary return the value stored at a particular key in case of missing item. For example, something like that:
myD = dict(...)
return myD[key] if key in myD else myD[defaultKey]
Just to make sure it is clear, I want to call myD[key] and have the right value returned without the extra if...else in my code...
This isn't quite what defaultdict does (since it takes a function to call as a default) and not quite what dict.setdefault() does, and myD.get(key, ???) does not seem to help either. I probably should inherit from dict or defaultdict and overload __init__() and missing() methods, but I could not come up with a good way to do this.
In your case, dict.get should work(I know you already mentioned it didn't work). Did you try:
myD.get(key,myD[defaultKey])
I'm not completely sure what you want (didn't read all the comments under your question), but think this may be at least close to what you want.
class DefaultKeyDict(dict):
def __init__(self, default_key, *args, **kwargs):
self.default_key = default_key
super(DefaultKeyDict, self).__init__(*args, **kwargs)
def __missing__ (self, key):
if self.default_key not in self: # default key not defined
raise KeyError(key)
return self[self.default_key]
def __repr__(self):
return ('{}({!r}, {})'.format(self.__class__.__name__,
self.default_key,
super(DefaultKeyDict, self).__repr__()))
def __reduce__(self): # optional, for pickle support
args = (self.default_key if self.default_key in self else None,)
return self.__class__, args, None, None, self.iteritems()
dkdict = DefaultKeyDict('b', {'a': 1, 'b': 2, 'c': 3})
print dkdict['a'] # -> 1
print dkdict['b'] # -> 2
print dkdict['c'] # -> 3
print dkdict['d'] # -> 2 (value of the default key)
del dkdict['b'] # delete the default key
try:
print dkdict['d'] # should now raise exception like normal
except KeyError:
print("dkdict['d'] raised a KeyError")
You might want to modify the class __init__() method to accept both the default key and its value as arguments (instead of just the key).
When overwriting __getitem__, one can use simply square brackets. It returns the value for the first valid key and None if no key is found.
class mDict(dict):
def __getitem__(self, keys):
for k in keys:
if k in self:
return self.get(k)
mdict = mDict({'a': 1, 'b': 2, 'default': 3})
mdict['a', 'default'] # -> 1
mdict['X', 'b', 'default'] # -> 2
mdict['X', 'Y', 'default'] # -> 3
mdict['X', 'Y', 'Z'] # -> None
One can use here also more than just two keys, which is more readable than many nested .get().

Define a python dictionary with immutable keys but mutable values

Well, the question is in the title: how do I define a python dictionary with immutable keys but mutable values? I came up with this (in python 2.x):
class FixedDict(dict):
"""
A dictionary with a fixed set of keys
"""
def __init__(self, dictionary):
dict.__init__(self)
for key in dictionary.keys():
dict.__setitem__(self, key, dictionary[key])
def __setitem__(self, key, item):
if key not in self:
raise KeyError("The key '" +key+"' is not defined")
dict.__setitem__(self, key, item)
but it looks to me (unsurprisingly) rather sloppy. In particular, is this safe or is there the risk of actually changing/adding some keys, since I'm inheriting from dict?
Thanks.
Consider proxying dict instead of subclassing it. That means that only the methods that you define will be allowed, instead of falling back to dict's implementations.
class FixedDict(object):
def __init__(self, dictionary):
self._dictionary = dictionary
def __setitem__(self, key, item):
if key not in self._dictionary:
raise KeyError("The key {} is not defined.".format(key))
self._dictionary[key] = item
def __getitem__(self, key):
return self._dictionary[key]
Also, you should use string formatting instead of + to generate the error message, since otherwise it will crash for any value that's not a string.
The problem with direct inheritance from dict is that it's quite hard to comply with the full dict's contract (e.g. in your case, update method won't behave in a consistent way).
What you want, is to extend the collections.MutableMapping:
import collections
class FixedDict(collections.MutableMapping):
def __init__(self, data):
self.__data = data
def __len__(self):
return len(self.__data)
def __iter__(self):
return iter(self.__data)
def __setitem__(self, k, v):
if k not in self.__data:
raise KeyError(k)
self.__data[k] = v
def __delitem__(self, k):
raise NotImplementedError
def __getitem__(self, k):
return self.__data[k]
def __contains__(self, k):
return k in self.__data
Note that the original (wrapped) dict will be modified, if you don't want that to happen, use copy or deepcopy.
How you prevent someone from adding new keys depends entirely on why someone might try to add new keys. As the comments state, most dictionary methods that modify the keys don't go through __setitem__, so a .update() call will add new keys just fine.
If you only expect someone to use d[new_key] = v, then your __setitem__ is fine. If they might use other ways to add keys, then you have to put in more work. And of course, they can always use this to do it anyway:
dict.__setitem__(d, new_key, v)
You can't make things truly immutable in Python, you can only stop particular changes.

Immutable dictionary, only use as a key for another dictionary

I had the need to implement a hashable dict so I could use a dictionary as a key for another dictionary.
A few months ago I used this implementation: Python hashable dicts
However I got a notice from a colleague saying 'it is not really immutable, thus it is not safe. You can use it, but it does make me feel like a sad Panda'.
So I started looking around to create one that is immutable. I have no need to compare the 'key-dict' to another 'key-dict'. Its only use is as a key for another dictionary.
I have come up with the following:
class HashableDict(dict):
"""Hashable dict that can be used as a key in other dictionaries"""
def __new__(self, *args, **kwargs):
# create a new local dict, that will be used by the HashableDictBase closure class
immutableDict = dict(*args, **kwargs)
class HashableDictBase(object):
"""Hashable dict that can be used as a key in other dictionaries. This is now immutable"""
def __key(self):
"""Return a tuple of the current keys"""
return tuple((k, immutableDict[k]) for k in sorted(immutableDict))
def __hash__(self):
"""Return a hash of __key"""
return hash(self.__key())
def __eq__(self, other):
"""Compare two __keys"""
return self.__key() == other.__key() # pylint: disable-msg=W0212
def __repr__(self):
"""#see: dict.__repr__"""
return immutableDict.__repr__()
def __str__(self):
"""#see: dict.__str__"""
return immutableDict.__str__()
def __setattr__(self, *args):
raise TypeError("can't modify immutable instance")
__delattr__ = __setattr__
return HashableDictBase()
I used the following to test the functionality:
d = {"a" : 1}
a = HashableDict(d)
b = HashableDict({"b" : 2})
print a
d["b"] = 2
print a
c = HashableDict({"a" : 1})
test = {a : "value with a dict as key (key a)",
b : "value with a dict as key (key b)"}
print test[a]
print test[b]
print test[c]
which gives:
{'a': 1}
{'a': 1}
value with a dict as key (key a)
value with a dict as key (key b)
value with a dict as key (key a)
as output
Is this the 'best possible' immutable dictionary that I can use that satisfies my requirements? If not, what would be a better solution?
If you are only using it as a key for another dict, you could go for frozenset(mutabledict.items()). If you need to access the underlying mappings, you could then use that as the parameter to dict.
mutabledict = dict(zip('abc', range(3)))
immutable = frozenset(mutabledict.items())
read_frozen = dict(immutable)
read_frozen['a'] # => 1
Note that you could also combine this with a class derived from dict, and use the frozenset as the source of the hash, while disabling __setitem__, as suggested in another answer. (#RaymondHettinger's answer for code which does just that).
The Mapping abstract base class makes this easy to implement:
import collections
class ImmutableDict(collections.Mapping):
def __init__(self, somedict):
self._dict = dict(somedict) # make a copy
self._hash = None
def __getitem__(self, key):
return self._dict[key]
def __len__(self):
return len(self._dict)
def __iter__(self):
return iter(self._dict)
def __hash__(self):
if self._hash is None:
self._hash = hash(frozenset(self._dict.items()))
return self._hash
def __eq__(self, other):
return self._dict == other._dict
I realize this has already been answered, but types.MappingProxyType is an analogous implementation for Python 3.3. Regarding the original question of safety, there is a discussion in PEP 416 -- Add a frozendict builtin type on why the idea of a frozendict was rejected.
In order for your immutable dictionary to be safe, all it needs to do is never change its hash. Why don't you just disable __setitem__ as follows:
class ImmutableDict(dict):
def __setitem__(self, key, value):
raise Exception("Can't touch this")
def __hash__(self):
return hash(tuple(sorted(self.items())))
a = ImmutableDict({'a':1})
b = {a:1}
print b
print b[a]
a['a'] = 0
The output of the script is:
{{'a': 1}: 1}
1
Traceback (most recent call last):
File "ex.py", line 11, in <module>
a['a'] = 0
File "ex.py", line 3, in __setitem__
raise Exception("Can't touch this")
Exception: Can't touch this
Here is a link to pip install-able implementation of #RaymondHettinger's answer: https://github.com/pcattori/icicle
Simply pip install icicle and you can from icicle import FrozenDict!
Update: icicle has been deprecated in favor of maps: https://github.com/pcattori/maps (documentation, PyPI).
It appears I am late to post. Not sure if anyone else has come up with ideas. But here is my take on it. The Dict is immutable and hashable. I made it immutable by overriding all the methods, magic and otherwise, with a custom '_readonly' function that raises an Exception. This is done when the object is instantiated. To get around the problem of not being able to apply the values I set the 'hash' under '__new__'. I then I override the '__hash__'function. Thats it!
class ImmutableDict(dict):
_HASH = None
def __new__(cls, *args, **kwargs):
ImmutableDict._HASH = hash(frozenset(args[0].items()))
return super(ImmutableDict, cls).__new__(cls, args)
def __hash__(self):
return self._HASH
def _readonly(self, *args, **kwards):
raise TypeError("Cannot modify Immutable Instance")
__delattr__ = __setattr__ = __setitem__ = pop = update = setdefault = clear = popitem = _readonly
Test:
immutabled1 = ImmutableDict({"This": "That", "Cheese": "Blarg"})
dict1 = {immutabled1: "Yay"}
dict1[immutabled1]
"Yay"
dict1
{{'Cheese': 'Blarg', 'This': 'That'}: 'Yay'}
Variation of Raymond Hettinger's answer by wrapping the self._dict with types.MappingProxyType.
class ImmutableDict(collections.Mapping):
"""
Copies a dict and proxies it via types.MappingProxyType to make it immutable.
"""
def __init__(self, somedict):
dictcopy = dict(somedict) # make a copy
self._dict = MappingProxyType(dictcopy) # lock it
self._hash = None
def __getitem__(self, key):
return self._dict[key]
def __len__(self):
return len(self._dict)
def __iter__(self):
return iter(self._dict)
def __hash__(self):
if self._hash is None:
self._hash = hash(frozenset(self._dict.items()))
return self._hash
def __eq__(self, other):
return self._dict == other._dict
def __repr__(self):
return str(self._dict)
You can use an enum:
import enum
KeyDict1 = enum.Enum('KeyDict1', {'InnerDictKey1':'bla', 'InnerDictKey2 ':2})
d = { KeyDict1: 'whatever', KeyDict2: 1, ...}
You can access the enums like you would a dictionary:
KeyDict1['InnerDictKey2'].value # This is 2
You can iterate over the names, and get their values... It does everything you'd expect.
You can try using https://github.com/Lightricks/freeze
It provides recursively immutable and hashable dictionaries
from freeze import FDict
a_mutable_dict = {
"list": [1, 2],
"set": {3, 4},
}
a_frozen_dict = FDict(a_mutable_dict)
print(a_frozen_dict)
print(hash(a_frozen_dict))
# FDict: {'list': FList: (1, 2), 'set': FSet: {3, 4}}
# -4855611361973338606

Keyed Collection in Python?

Is there any equivalent to KeyedCollection in Python, i.e. a set where the elements have (or dynamically generate) their own keys?
i.e. the goal here is to avoid storing the key in two places, and therefore dictionaries are less than ideal (hence the question).
You can simulate that very easily:
class KeyedObject(object):
def get_key(self):
raise NotImplementedError("You must subclass this before you can use it.")
class KeyedDict(dict):
def append(self, obj):
self[obj.get_key()] = obj
Now you can use a KeyedDict instead of dict with subclasses of KeyedObject (where get_key return a valid key based on some object property).
Given your constraints, everyone trying to implement what you're looking for using a dict is barking up the wrong tree. Instead, you should write a list subclass that overrides __getitem__ to provide the behavior you want. I've written it so it tries to get the desired item by index first, then falls back to searching for the item by the key attribute of the contained objects. (This could be a property if the object needs to determine this dynamically.)
There's no way to avoid a linear search if you don't want to duplicate something somewhere; I am sure the C# implementation does exactly the same thing if you don't allow it to use a dictionary to store the keys.
class KeyedCollection(list):
def __getitem__(self, key):
if isinstance(key, int) or isinstance(key, slice):
return list.__getitem__(key)
for item in self:
if getattr(item, "key", 0) == key:
return item
raise KeyError('item with key `%s` not found' % key)
You would probably also want to override __contains__ in a similar manner so you could say if "key" in kc.... If you want to make it even more like a dict, you could also implement keys() and so on. They will be equally inefficient, but you will have an API like a dict, that also works like a list.
#Mehrdad said:
Because semantically, it doesn't make as much sense. When an object
knows its key, it doesn't make sense to put it in a dictionary -- it's
not a key-value pair. It's more of a semantic issue than anything
else.
With this constraint, there is nothing in Python that does what you want. I suggest you use a dict and not worry about this level of detail on the semantics. #Gabi Purcaru's answer shows how you can create an object with the interface you want. Why get bothered about how it's working internally?
It could be that C#'s KeyedCollection is doing the same thing under the covers: asking the object for its key and then storing the key for fast access. In fact, from the docs:
By default, the KeyedCollection(Of TKey, TItem) includes a lookup
dictionary that you can obtain with the Dictionary property. When an
item is added to the KeyedCollection(Of TKey, TItem), the item's key
is extracted once and saved in the lookup dictionary for faster
searches. This behavior is overridden by specifying a dictionary
creation threshold when you create the KeyedCollection(Of TKey,
TItem). The lookup dictionary is created the first time the number of
elements exceeds that threshold. If you specify –1 as the threshold,
the lookup dictionary is never created.
I'm not much of a C#'er, but I think dictionaries is what you need.
http://docs.python.org/tutorial/datastructures.html#dictionaries
http://docs.python.org/tutorial/datastructures.html
Or maybe lists:
http://docs.python.org/library/functions.html#list
Why not simply use a dict? If the key already exists, a reference to the key will be used in the dict; it won't be senselessly duplicated.
class MyExample(object):
def __init__(self, key, value):
self.key = key
self.value = value
m = MyExample("foo", "bar")
d = {}
d[m.key] = m
first_key = d.keys()[0]
first_key is m.key # returns True
If the key doesn't already exist, a copy of it will be saved, but I don't see that as a problem.
def lame_hash(s):
h = 0
for ch in s:
h ^= ord(ch)
return h
d = {}
d[lame_hash(m.key)] = m
print d # key value is 102 which is stored in the dict
lame_hash(m.key) in d # returns True
I'm not sure if this is what you meant, but this dictionary will create it's own keys as you add to it...
class KeyedCollection(dict):
def __init__(self):
self.current_key = 0
def add(self, item):
self[self.current_key] = item
abc = KeyedCollection()
abc.add('bob')
abc.add('jane')
>>> abc
{0: 'bob', 1: 'jane'}
How about a set()? The elements can have their own k
To go a little more in detail that the already correct answer from #Gabi Purcaru's answer, here a class that do the same as gabi one's but that also check for correct given type on key and value (as the TKey and TValue of the .net KeyedCollection).
class KeyedCollection(MutableMapping):
"""
Provides the abstract base class for a collection (:class:`MutableMappinp`) whose keys are embedded in the values.
"""
__metaclass__ = abc.ABCMeta
_dict = None # type: dict
def __init__(self, seq={}):
self._dict = dict(seq)
#abc.abstractmethod
def __is_type_key_correct__(self, key):
"""
Returns: The type of keys in the collection
"""
pass
#abc.abstractmethod
def __is_type_value_correct__(self, value):
"""
Returns: The type of values in the collection
"""
pass
#abc.abstractmethod
def get_key_for_item(self, value):
"""
When implemented in a derivated class, extracts the key from the specified element.
Args:
value: the element from which to extract the key (of type specified by :meth:`type_value`)
Returns: The key of specified element (of type specified by :meth:`type_key`)
"""
pass
def __assert_type_key(self, key, arg_name='key'):
if not self.__is_type_key_correct__(key) :
raise ValueError("{} type is not correct".format(arg_name))
def __assert_type_value(self, value, arg_name='value'):
if not self.__is_type_value_correct__(value) :
raise ValueError("{} type is not correct".format(arg_name))
def add(self, value):
"""
Adds an object to the KeyedCollection.
Args:
value: The object to be added to the KeyedCollection (of type specified by :meth:`type_value`).
"""
key = self.get_key_for_item(value)
self._dict[key] = value
# Implements abstract method __setitem__ from MutableMapping parent class
def __setitem__(self, key, value):
self.__assert_type_key(key)
self.__assert_type_value(value)
if value.get_key() != key:
raise ValueError("provided key does not correspond to the given KeyedObject value")
self._dict[key] = value
# Implements abstract method __delitem__ from MutableMapping parent class
def __delitem__(self, key):
self.__assert_type_key(key)
self._dict.pop(key)
# Implements abstract method __getitem__ from MutableMapping parent class (Mapping base class)
def __getitem__(self, key):
self.__assert_type_key(key)
return self._dict[key]
# Implements abstract method __len__ from MutableMapping parent class (Sized mixin on Mapping base class)
def __len__(self):
return len(self._dict)
# Implements abstract method __iter__ from MutableMapping parent class (Iterable mixin on Mapping base class)
def __iter__(self):
return iter(self._dict)
pass
# Implements abstract method __contains__ from MutableMapping parent class (Container mixin on Mapping base class)
def __contains__(self, x):
self.__assert_type_key(x, 'x')
return x in self._dict

Categories