When a missing key is queried in a defaultdict object, the key is automatically added to the dictionary:
from collections import defaultdict
d = defaultdict(int)
res = d[5]
print(d)
# defaultdict(<class 'int'>, {5: 0})
# we want this dictionary to remain empty
However, often we want to only add keys when they are assigned explicitly or implicitly:
d[8] = 1 # we want this key added
d[3] += 1 # we want this key added
One use case is simple counting, to avoid the higher overhead of collections.Counter, but this feature may also be desirable generally.
Counter example [pardon the pun]
This is the functionality I want:
from collections import Counter
c = Counter()
res = c[5] # 0
print(c) # Counter()
c[8] = 1 # key added successfully
c[3] += 1 # key added successfully
But Counter is significantly slower than defaultdict(int). I find the performance hit usually ~2x slower vs defaultdict(int).
In addition, obviously Counter is only comparable to int argument in defaultdict, while defaultdict can take list, set, etc.
Is there a way to implement the above behaviour efficiently; for instance, by subclassing defaultdict?
Benchmarking example
%timeit DwD(lst) # 72 ms
%timeit dd(lst) # 44 ms
%timeit counter_func(lst) # 98 ms
%timeit af(lst) # 72 ms
Test code:
import numpy as np
from collections import defaultdict, Counter, UserDict
class DefaultDict(defaultdict):
def get_and_forget(self, key):
_sentinel = object()
value = self.get(key, _sentinel)
if value is _sentinel:
return self.default_factory()
return value
class DictWithDefaults(dict):
__slots__ = ['_factory'] # avoid using extra memory
def __init__(self, factory, *args, **kwargs):
self._factory = factory
super().__init__(*args, **kwargs)
def __missing__(self, key):
return self._factory()
lst = np.random.randint(0, 10, 100000)
def DwD(lst):
d = DictWithDefaults(int)
for i in lst:
d[i] += 1
return d
def dd(lst):
d = defaultdict(int)
for i in lst:
d[i] += 1
return d
def counter_func(lst):
d = Counter()
for i in lst:
d[i] += 1
return d
def af(lst):
d = DefaultDict(int)
for i in lst:
d[i] += 1
return d
Note Regarding Bounty Comment:
#Aran-Fey's solution has been updated since Bounty was offered, so please disregard the Bounty comment.
Rather than messing about with collections.defaultdict to make it do what we want, it seems easier to implement our own:
class DefaultDict(dict):
def __init__(self, default_factory, **kwargs):
super().__init__(**kwargs)
self.default_factory = default_factory
def __getitem__(self, key):
try:
return super().__getitem__(key)
except KeyError:
return self.default_factory()
This works the way you want:
d = DefaultDict(int)
res = d[5]
d[8] = 1
d[3] += 1
print(d) # {8: 1, 3: 1}
However, it can behave unexpectedly for mutable types:
d = DefaultDict(list)
d[5].append('foobar')
print(d) # output: {}
This is probably the reason why defaultdict remembers the value when a nonexistant key is accessed.
Another option is to extend defaultdict and add a new method that looks up a value without remembering it:
from collections import defaultdict
class DefaultDict(defaultdict):
def get_and_forget(self, key):
return self.get(key, self.default_factory())
Note that the get_and_forget method calls the default_factory() every time, regardless of whether the key already exists in the dict or not. If this is undesirable, you can implement it with a sentinel value instead:
class DefaultDict(defaultdict):
def get_and_forget(self, key):
_sentinel = object()
value = self.get(key, _sentinel)
if value is _sentinel:
return self.default_factory()
return value
This has better support for mutable types, because it allows you to choose whether the value should be added to the dict or not.
If you just want a dict that returns a default value when you access a non-existing key then you could simply subclass dict and implement __missing__:
object.__missing__(self, key)
Called by dict.__getitem__() to implement self[key] for dict subclasses when key is not in the dictionary.
That would look like this:
class DictWithDefaults(dict):
# not necessary, just a memory optimization
__slots__ = ['_factory']
def __init__(self, factory, *args, **kwargs):
self._factory = factory
super().__init__(*args, **kwargs)
def __missing__(self, key):
return self._factory()
In this case I used a defaultdict-like approach so you have to pass in a factory that should provide the default value when called:
>>> dwd = DictWithDefaults(int)
>>> dwd[0] # key does not exist
0
>>> dwd # key still doesn't exist
{}
>>> dwd[0] = 10
>>> dwd
{0: 10}
When you do assignments (explicitly or implicitly) the value will be added to the dictionary:
>>> dwd = DictWithDefaults(int)
>>> dwd[0] += 1
>>> dwd
{0: 1}
>>> dwd = DictWithDefaults(list)
>>> dwd[0] += [1]
>>> dwd
{0: [1]}
You wondered how collections.Counter is doing it and as of CPython 3.6.5 it also uses __missing__:
class Counter(dict):
...
def __missing__(self, key):
'The count of elements not in the Counter is zero.'
# Needed so that self[missing_item] does not raise KeyError
return 0
...
Better performance?!
You mentioned that speed is of concern, so you could make that class a C extension class (assuming you use CPython), for example using Cython (I'm using the Jupyter magic commands to create the extension class):
%load_ext cython
%%cython
cdef class DictWithDefaultsCython(dict):
cdef object _factory
def __init__(self, factory, *args, **kwargs):
self._factory = factory
super().__init__(*args, **kwargs)
def __missing__(self, key):
return self._factory()
Benchmark
Based on your benchmark:
from collections import Counter, defaultdict
def d_py(lst):
d = DictWithDefaults(int)
for i in lst:
d[i] += 1
return d
def d_cy(lst):
d = DictWithDefaultsCython(int)
for i in lst:
d[i] += 1
return d
def d_dd(lst):
d = defaultdict(int)
for i in lst:
d[i] += 1
return d
Given that this is just counting it would be an (unforgivable) oversight to not include a benchmark simply using the Counter initializer.
I have recently written a small benchmarking tool that I think might come in handy here (but you could do it using %timeit as well):
from simple_benchmark import benchmark
import random
sizes = [2**i for i in range(2, 20)]
unique_lists = {i: list(range(i)) for i in sizes}
identical_lists = {i: [0]*i for i in sizes}
mixed = {i: [random.randint(0, i // 2) for j in range(i)] for i in sizes}
functions = [d_py, d_cy, d_dd, d_c, Counter]
b_unique = benchmark(functions, unique_lists, 'list size')
b_identical = benchmark(functions, identical_lists, 'list size')
b_mixed = benchmark(functions, mixed, 'list size')
With this result:
import matplotlib.pyplot as plt
f, (ax1, ax2, ax3) = plt.subplots(1, 3, sharey=True)
ax1.set_title('unique elements')
ax2.set_title('identical elements')
ax3.set_title('mixed elements')
b_unique.plot(ax=ax1)
b_identical.plot(ax=ax2)
b_mixed.plot(ax=ax3)
Note that it uses log-log scale for better visibility of differences:
For long iterables the Counter(iterable) was by far the fastest. DictWithDefaultCython and defaultdict were equal (with DictWithDefault being slightly faster most of the times, even if that's not visible here) followed by DictWithDefault and then Counter with the manual for-loop. Funny how Counter is fastest and slowest.
Implicitly add the returned value if it is modifie
Something I glossed over is the fact that it differs considerably from defaultdict because of the desired "just return the default don't save it" with mutable types:
>>> from collections import defaultdict
>>> dd = defaultdict(list)
>>> dd[0].append(10)
>>> dd
defaultdict(list, {0: [10]})
>>> dwd = DictWithDefaults(list)
>>> dwd[0].append(10)
>>> dwd
{}
That means you actually need to set the element when you want the modified value to be visible in the dictionary.
However this somewhat intrigued me so I want to share a way how you could make that work (if desired). But it's just a quick test and only works for append calls using a proxy. Please don't use that in production code (from my point of view this just has entertainment value):
from wrapt import ObjectProxy
class DictWithDefaultsFunky(dict):
__slots__ = ['_factory'] # avoid using extra memory
def __init__(self, factory, *args, **kwargs):
self._factory = factory
super().__init__(*args, **kwargs)
def __missing__(self, key):
ret = self._factory()
dict_ = self
class AppendTrigger(ObjectProxy):
def append(self, val):
self.__wrapped__.append(val)
dict_[key] = ret
return AppendTrigger(ret)
That's a dictionary that returns a proxy object (instead of the real default) and it overloads a method that, if called, adds the return value to the dictionary. And it "works":
>>> d = DictWithDefaultsFunky(list)
>>> a = d[10]
>>> d
[]
>>> a.append(1)
>>> d
{10: [1]}
But it does have a few pitfalls (that could be solved but it's just a proof-of-concept so I won't attempt it here):
>>> d = DictWithDefaultsFunky(list)
>>> a = d[10]
>>> b = d[10]
>>> d
{}
>>> a.append(1)
>>> d
{10: [1]}
>>> b.append(10)
>>> d # oups, that overwrote the previous stored value ...
{10: [10]}
If you really want something like that you probably need to implement a class that really tracks changes within the values (and not just append calls).
If you want to avoid implicit assignments
In case you don't like the fact that += or similar operations add the value to the dictionary (opposed to the previous example which even tried to add the value in a very implicit fashion) then you probably should implement it as method instead of as special method.
For example:
class SpecialDict(dict):
__slots__ = ['_factory']
def __init__(self, factory, *args, **kwargs):
self._factory = factory
def get_or_default_from_factory(self, key):
try:
return self[key]
except KeyError:
return self._factory()
>>> sd = SpecialDict(int)
>>> sd.get_or_default_from_factory(0)
0
>>> sd
{}
>>> sd[0] = sd.get_or_default_from_factory(0) + 1
>>> sd
{0: 1}
Which is similar to the behavior of Aran-Feys answer but instead of get with a sentinel it uses a try and catch approach.
Your bounty message says Aran-Fey's answer "does not work with mutable types". (For future readers, the bounty message is "The current answer is good, but it does not work with mutable types. If the existing answer can be adapted, or another option solution put forward, to suit this purpose, this would be ideal.")
The thing is, it does work for mutable types:
>>> d = DefaultDict(list)
>>> d[0] += [1]
>>> d[0]
[1]
>>> d[1]
[]
>>> 1 in d
False
What doesn't work is something like d[1].append(2):
>>> d[1].append(2)
>>> d[1]
[]
That's because this doesn't involve a store operation on the dict. The only dict operation involved is an item retrieval.
There is no difference between what the dict object sees in d[1] or d[1].append(2). The dict is not involved in the append operation. Without nasty, fragile stack inspection or something similar, there is no way for the dict to store the list only for d[1].append(2).
So that's hopeless. What should you do instead?
Well, one option is to use a regular collections.defaultdict, and just not use [] when you don't want to store defaults. You can use in or get:
if key in d:
value = d[key]
else:
...
or
value = d.get(key, sentinel)
Alternatively, you can turn off the default factory when you don't want it. This is frequently reasonable when you have separate "build" and "read" phases, and you don't want the default factory during the read phase:
d = collections.defaultdict(list)
for thing in whatever:
d[thing].append(other_thing)
# turn off default factory
d.default_factory = None
use(d)
Related
I have a case where the same key could have different strings associated with it.
e.g. flow and wolf both have the same characters, if I sort them and use them as keys in a dictionary, I want to put the original strings as values.
I tried in a python dict as:
d = {}
d["flow"] = flow
d["flow"] = wolf
but there is only one value associated with the key.
I tried d["flow"].append("wolf") but that also doesn't work.
How to get this scenario working with Python dicts?
You can't have multiple items in a dictionary with the same key. What you should do is make the value a list. Like this -
d = dict()
d["flow"] = ["flow"]
d["flow"].append("wolf")
If that is what you want to do, then you might want to use defaultdict. Then you can do
from collections import defaultdict
d = defaultdict(list)
d["flow"].append("flow")
d["flow"].append("wolf")
You could use the setdefault method to create a list as the value for a key even if that key is not already in the dictionary.
So this makes the code really simple:
>>> d = {}
>>> d.setdefault(1, []).append(2)
>>> d.setdefault(1, []).append(3)
>>> d.setdefault(5, []).append(6)
>>> d
{1: [2, 3], 5: [6]}
You could implement a dict-like class that does exactly that.
class MultiDict:
def __init__(self):
self.dict = {}
def __setitem__(self, key, value):
try:
self.dict[key].append(value)
except KeyError:
self.dict[key] = [value]
def __getitem__(self, key):
return self.dict[key]
Here is how you can use it
d = MultiDict()
d['flow'] = 'flow'
d['flow'] = 'wolf'
d['flow'] # ['flow', 'wolf']
Today I'm learning using * and ** to unpack arguments.
I find that both list, str, tuple, dict can be unpacked by *.
I guess because they are all iterables. So I made my own class.
# FILE CONTENT
def print_args(*args):
for i in args:
print i
class MyIterator(object):
count = 0
def __iter__(self):
while self.count < 5:
yield self.count
self.count += 1
self.count = 0
my_iterator = MyIterator()
# INTERPRETOR TEST
In [1]: print_args(*my_iterator)
0
1
2
3
4
It works! But how to make a mapping object like dict in python so that ** unpacking works on it? Is it possible to do that? And is there already another kind of mapping object in python except dict?
PS:
I know I can make an object inherit from dict class to make it a mapping object. But is there some key magic_method like __iter__ to make a mapping object without class inheritance?
PS2:
With the help of #mgilson's answer, I've made an object which can be unpacked by ** without inherit from current mapping object:
# FILE CONTENT
def print_kwargs(**kwargs):
for i, j in kwargs.items():
print i, '\t', j
class MyMapping(object):
def __getitem__(self, key):
if int(key) in range(5):
return "Mapping and unpacking!"
def keys(self):
return map(str, range(5))
my_mapping = MyMapping()
print_kwargs(**my_mapping)
# RESULTS
1 Mapping and unpacking!
0 Mapping and unpacking!
3 Mapping and unpacking!
2 Mapping and unpacking!
4 Mapping and unpacking!
Be aware, when unpacking using **, the key in your mapping object should be type str, or TypeError will be raised.
Any mapping can be used. I'd advise that you inherit from collections.Mapping or collections.MutableMapping1. They're abstract base classes -- you supply a couple methods and the base class fills in the rest.
Here's an example of a "frozendict" that you could use:
from collections import Mapping
class FrozenDict(Mapping):
"""Immutable dictionary.
Abstract methods required by Mapping are
1. `__getitem__`
2. `__iter__`
3. `__len__`
"""
def __init__(self, *args, **kwargs):
self._data = dict(*args, **kwargs)
def __getitem__(self, key):
return self._data[key]
def __iter__(self):
return iter(self._data)
def __len__(self):
return len(self._data)
And usage is just:
def printer(**kwargs):
print(kwargs)
d = FrozenDict({'a': 1, 'b': 2})
printer(**d)
To answer your question about which "magic" methods are necessary to allow unpacking -- just based on experimentation alone -- in Cpython a class with __getitem__ and keys is enough to allow it to be unpacked with **. With that said, there is no guarantee that works on other implementations (or future versions of CPython). To get the guarantee, you need to implement the full mapping interface (usually with the help of a base class as I've used above).
In python2.x, there's also UserDict.UserDict which can be accessed in python3.x as collections.UserDict -- However if you're going to use this one, you can frequently just subclass from dict.
1Note that as of Python3.3, those classes were moved to thecollections.abc module.
First, let's define unpacking:
def unpack(**kwargs):
"""
Collect all keyword arguments under one hood
and print them as 'key: value' pairs
"""
for key_value in kwargs.items():
print('key: %s, value: %s' % key_value)
Now, the structure: two built-in options available are collections.abc.Mapping and collections.UserDict. As there's another answer exploring highly-customizable Mapping type, I will focus on UserDict: UserDict can be easier to start with if all you need is a basic dict structure with some twist. After definition, underlying UserDict dictionary of is also accessible as .data attribute.
1.It can be used inline, like so:
from collections import UserDict
>>> d = UserDict({'key':'value'})
>>> # UserDict makes it feel like it's a regular dict
>>> d, d.data
({'key':'value'}, {'key':'value'})
Breaking UserDict into key=value pairs:
>>> unpack(**d)
key: key, value: value
>>> unpack(**d.data) # same a above
key: key, value: value
2.If subclassing, all you have to do is to define self.data within __init__. Note that i expanded the class with additional functionality with (self+other) 'magic' methods:
class CustomDict(UserDict):
def __init__(self, dct={}):
self.data = dct
def __add__(self, other={}):
"""Returning new object of the same type
In case of UserDict, unpacking self is the same as unpacking self.data
"""
return __class__({**self.data, **other})
def __iadd__(self, other={}):
"""Returning same object, modified in-place"""
self.update(other)
return self
Usage is:
>>> d = CustomDict({'key': 'value', 'key2': 'value2'})
>>> d
{'key': 'value', 'key2': 'value2'}
>>> type(d), id(d)
(<class '__main__.CustomDict'>, 4323059136)
Adding other dict (or any mapping type) to it will call __add__, returning new object:
>>> mixin = {'a': 'aaa', 'b': 'bbb'}
>>> d_new = d + mixin # __add__
>>> d_new
{'key': 'value', 'a': 'aaa', 'key2': 'value2', 'b': 'bbb'}
>>>type(d_new), id(d_new)
(<class '__main__.CustomDict'>, 4323059248) # new object
>>> d # unmodified
{'key': 'value', 'key2': 'value2'}
In-place modification with __iadd__ will return the same object (same id in memory)
>>> d += {'a': 'aaa', 'b': 'bbb'} # __iadd__
>>> d
{'key': 'value', 'a': 'aaa', 'key2': 'value2', 'b': 'bbb'}
>>> type(d), id(d)
(<class '__main__.CustomDict'>, 4323059136)
Btw, i agree with other contributors that you should also be familiar with collections.abc.Mapping and brethren types. For basic dictionary exploration UserDict has all the same features and does not require from you to override abstract methods before becoming usable.
I use a dict as a short-term cache. I want to get a value from the dictionary, and if the dictionary didn't already have that key, set it, e.g.:
val = cache.get('the-key', calculate_value('the-key'))
cache['the-key'] = val
In the case where 'the-key' was already in cache, the second line is not necessary. Is there a better, shorter, more expressive idiom for this?
yes, use:
val = cache.setdefault('the-key', calculate_value('the-key'))
An example in the shell:
>>> cache = {'a': 1, 'b': 2}
>>> cache.setdefault('a', 0)
1
>>> cache.setdefault('b', 0)
2
>>> cache.setdefault('c', 0)
0
>>> cache
{'a': 1, 'c': 0, 'b': 2}
See: http://docs.python.org/release/2.5.2/lib/typesmapping.html
Readability matters!
if 'the-key' not in cache:
cache['the-key'] = calculate_value('the-key')
val = cache['the-key']
If you really prefer an one-liner:
val = cache['the-key'] if 'the-key' in cache else cache.setdefault('the-key', calculate_value('the-key'))
Another option is to define __missing__ in the cache class:
class Cache(dict):
def __missing__(self, key):
return self.setdefault(key, calculate_value(key))
Have a look at the Python Decorator Library, and more specifically Memoize which acts as a cache. That way you can just decorate your call the calculate_value with the Memoize decorator.
Approach with
cache.setdefault('the-key',calculate_value('the-key'))
is great if calculate_value is not costly, because it will be evaluated each time. So if you have to read from DB, open a file or network connection or do anything "expensive", then use the following structure:
try:
val = cache['the-key']
except KeyError:
val = calculate_value('the-key')
cache['the-key'] = val
You might want to take a look at (the entire page at) "Code Like a Pythonista" http://python.net/~goodger/projects/pycon/2007/idiomatic/handout.html#dictionary-get-method
It covers the setdefault() technique described above, and the defaultdict technique is also very handy for making dictionaries of sets or arrays for example.
You can also use defaultdict to do something similar:
>>> from collections import defaultdict
>>> d = defaultdict(int) # will default values to 0
>>> d["a"] = 1
>>> d["a"]
1
>>> d["b"]
0
>>>
You can assign any default you want by supplying your own factory function and itertools.repeat:
>>> from itertools import repeat
>>> def constant_factory(value):
... return repeat(value).next
...
>>> default_value = "default"
>>> d = defaultdict(constant_factory(default_value))
>>> d["a"]
'default'
>>> d["b"] = 5
>>> d["b"]
5
>>> d.keys()
['a', 'b']
use setdefault method,
if the key is already not present then setdefault creates the new key with the value provided in the second argument, in case the key is already present then it returns the value of that key.
val = cache.setdefault('the-key',value)
Use get to extract the value or to get None.
Combining None with or will let you chain another operation (setdefault)
def get_or_add(cache, key, value_factory):
return cache.get(key) or cache.setdefault(key, value_factory())
usage:
in order to make it lazy the method expects a function as the third parameter
get_or_add(cache, 'the-key', lambda: calculate_value('the-key'))
I have a large list like:
[A][B1][C1]=1
[A][B1][C2]=2
[A][B2]=3
[D][E][F][G]=4
I want to build a multi-level dict like:
A
--B1
-----C1=1
-----C2=1
--B2=3
D
--E
----F
------G=4
I know that if I use recursive defaultdict I can write table[A][B1][C1]=1, table[A][B2]=2, but this works only if I hardcode those insert statement.
While parsing the list, I don't how many []'s I need beforehand to call table[key1][key2][...].
You can do it without even defining a class:
from collections import defaultdict
nested_dict = lambda: defaultdict(nested_dict)
nest = nested_dict()
nest[0][1][2][3][4][5] = 6
Your example says that at any level there can be a value, and also a dictionary of sub-elements. That is called a tree, and there are many implementations available for them. This is one:
from collections import defaultdict
class Tree(defaultdict):
def __init__(self, value=None):
super(Tree, self).__init__(Tree)
self.value = value
root = Tree()
root.value = 1
root['a']['b'].value = 3
print root.value
print root['a']['b'].value
print root['c']['d']['f'].value
Outputs:
1
3
None
You could do something similar by writing the input in JSON and using json.load to read it as a structure of nested dictionaries.
I think the simplest implementation of a recursive dictionary is this. Only leaf nodes can contain values.
# Define recursive dictionary
from collections import defaultdict
tree = lambda: defaultdict(tree)
Usage:
# Create instance
mydict = tree()
mydict['a'] = 1
mydict['b']['a'] = 2
mydict['c']
mydict['d']['a']['b'] = 0
# Print
import prettyprint
prettyprint.pp(mydict)
Output:
{
"a": 1,
"b": {
"a": 1
},
"c": {},
"d": {
"a": {
"b": 0
}
}
}
I'd do it with a subclass of dict that defines __missing__:
>>> class NestedDict(dict):
... def __missing__(self, key):
... self[key] = NestedDict()
... return self[key]
...
>>> table = NestedDict()
>>> table['A']['B1']['C1'] = 1
>>> table
{'A': {'B1': {'C1': 1}}}
You can't do it directly with defaultdict because defaultdict expects the factory function at initialization time, but at initialization time, there's no way to describe the same defaultdict. The above construct does the same thing that default dict does, but since it's a named class (NestedDict), it can reference itself as missing keys are encountered. It is also possible to subclass defaultdict and override __init__.
This is equivalent to the above, but avoiding lambda notation. Perhaps easier to read ?
def dict_factory():
return defaultdict(dict_factory)
your_dict = dict_factory()
Also -- from the comments -- if you'd like to update from an existing dict, you can simply call
your_dict[0][1][2].update({"some_key":"some_value"})
In order to add values to the dict.
Dan O'Huiginn posted a very nice solution on his journal in 2010:
http://ohuiginn.net/mt/2010/07/nested_dictionaries_in_python.html
>>> class NestedDict(dict):
... def __getitem__(self, key):
... if key in self: return self.get(key)
... return self.setdefault(key, NestedDict())
>>> eggs = NestedDict()
>>> eggs[1][2][3][4][5]
{}
>>> eggs
{1: {2: {3: {4: {5: {}}}}}}
You may achieve this with a recursive defaultdict.
from collections import defaultdict
def tree():
def the_tree():
return defaultdict(the_tree)
return the_tree()
It is important to protect the default factory name, the_tree here, in a closure ("private" local function scope). Avoid using a one-liner lambda version, which is bugged due to Python's late binding closures, and implement this with a def instead.
The accepted answer, using a lambda, has a flaw where instances must rely on the nested_dict name existing in an outer scope. If for whatever reason the factory name can not be resolved (e.g. it was rebound or deleted) then pre-existing instances will also become subtly broken:
>>> nested_dict = lambda: defaultdict(nested_dict)
>>> nest = nested_dict()
>>> nest[0][1][2][3][4][6] = 7
>>> del nested_dict
>>> nest[8][9] = 10
# NameError: name 'nested_dict' is not defined
To add to #Hugo To have a max depth:
l=lambda x:defaultdict(lambda:l(x-1)) if x>0 else defaultdict(dict)
arr = l(2)
A slightly different possibility that allows regular dictionary initialization:
from collections import defaultdict
def superdict(arg=()):
update = lambda obj, arg: obj.update(arg) or obj
return update(defaultdict(superdict), arg)
Example:
>>> d = {"a":1}
>>> sd = superdict(d)
>>> sd["b"]["c"] = 2
You could use a NestedDict.
from ndicts.ndicts import NestedDict
nd = NestedDict()
nd[0, 1, 2, 3, 4, 5] = 6
The result as a dictionary:
>>> nd.to_dict()
{0: {1: {2: {3: {4: {5: 6}}}}}}
To install ndicts
pip install ndicts
I want to rewrite Python's dictionary access mechanism "getitem" to be able to return default values.
The functionality I am looking for is something like
a = dict()
a.setdefault_value(None)
print a[100] #this would return none
any hints ?
Thanks
There is already a collections.defaultdict:
from collections import defaultdict
a = defaultdict(lambda:None)
print a[100]
There is a defaultdict built-in starting with Python 2.6. The constructor takes a function which will be called when a value is not found. This gives more flexibility than simply returning None.
from collections import defaultdict
a = defaultdict(lambda: None)
print a[100] #gives None
The lambda is just a quick way to define a one-line function with no name. This code is equivalent:
def nonegetter():
return None
a = defaultdict(nonegetter)
print a[100] #gives None
This is a very useful pattern which gives you a hash showing the count of each unique object. Using a normal dict, you would need special cases to avoid KeyError.
counts = defaultdict(int)
for obj in mylist:
counts[obj] += 1
use a defaultdict (http://docs.python.org/library/collections.html#collections.defaultdict)
import collections
a = collections.defaultdict(lambda:None)
where the argument to the defaultdict constructor is a function which returns the default value.
Note that if you access an unset entry, it actually sets it to the default:
>>> print a[100]
None
>>> a
defaultdict(<function <lambda> at 0x38faf0>, {100: None})
If you really want to not use the defaultdict builtin, you need to define your own subclass of dict, like so:
class MyDefaultDict(dict):
def setdefault_value(self, default):
self.__default = default
def __getitem__(self, key):
try:
return self[key]
except IndexError:
return self.__default
i wasnt aware of defaultdict, and thats probably the best way to go. if you are opposed for some reason ive written small wrapper function for this purpose in the past. Has slightly different functionality that may or may not be better for you.
def makeDictGet(d, defaultVal):
return lambda key: d[key] if key in dict else defaultVal
And using it...
>>> d1 = {'a':1,'b':2}
>>> d1Get = makeDictGet(d1, 0)
>>> d1Get('a')
1
>>> d1Get(5)
0
>>> d1['newAddition'] = 'justAddedThisOne' #changing dict after the fact is fine
>>> d1Get('newAddition')
'justAddedThisOne'
>>> del d1['a']
>>> d1Get('a')
0
>>> d1GetDefaultNone = makeDictGet(d1, None) #having more than one such function is fine
>>> print d1GetDefaultNone('notpresent')
None
>>> d1Get('notpresent')
0
>>> f = makeDictGet({'k1':'val1','pi':3.14,'e':2.718},False) #just put new dict as arg if youre ok with being unable to change it or access directly
>>> f('e')
2.718
>>> f('bad')
False