Why does overriding __contains__ break OrderedDict.keys?

Why does overriding __contains__ break OrderedDict.keys? - python

I'm subclasssing OrderedDict (Cpython, 2.7.3) to represent a datafile. __getitem__ pulls a field out of the datafile and sets it on the current instance similar to the code I've posted below. now I would like to override __contains__ to return True if the field is in the dictionary or in the file on the disk since it can be read either way. However, this seems to break OrderedDict's ability to inspect it's keys.
from collections import OrderedDict
dictclass = OrderedDict
class Foo(dictclass):
def __getitem__(self,key):
try:
return dictclass.__getitem__(self,key)
except KeyError:
pass
data = key*2
self[key] = data
return data
def __contains__(self,whatever):
return dictclass.__contains__(self,whatever) or 'bar' in whatever
a = Foo()
print a['bar']
print a.keys()
If you run the code above, you'll get this output:
barbar
[]
Note that if you change dictclass = dict in the above code, it still seems to work (giving the following output).
barbar
['bar']
Am I doing something horribly wrong?

When Foo.__contains__ is not defined:
a['bar']
calls Foo.__getitem__, which executes
self[key] = data
This calls OrderedDict.__setitem__, which is defined this way:
def __setitem__(self, key, value, PREV=0, NEXT=1, dict_setitem=dict.__setitem__):
'od.__setitem__(i, y) <==> od[i]=y'
# Setting a new item creates a new link at the end of the linked list,
# and the inherited dictionary is updated with the new key/value pair.
if key not in self:
root = self.__root
last = root[PREV]
last[NEXT] = root[PREV] = self.__map[key] = [last, root, key]
dict_setitem(self, key, value)
Since Foo.__contains__ is not defined,
if key not in self:
is True. So the key is properly added to self.__root and self.__map.
When Foo.__contains__ is defined,
if key not in self:
if False. So the key is not properly added to self.__root and self.__map.
Foo.__contains__ effective fools OrderedDict.__setitem__ into thinking that the 'bar' key has already been added.
I found it helpful to play with the following code (adding print statements in __setitem__ and __iter__):
from collections import OrderedDict
dictclass = OrderedDict
class Foo(dictclass):
def __getitem__(self,key):
try:
return dictclass.__getitem__(self,key)
except KeyError:
pass
data = key*2
self[key] = data
return data
def __contains__(self,whatever):
print('contains: {}'.format(whatever))
return dictclass.__contains__(self,whatever) or 'bar' in whatever
def __setitem__(self, key, value, PREV=0, NEXT=1, dict_setitem=dict.__setitem__):
'od.__setitem__(i, y) <==> od[i]=y'
# Setting a new item creates a new link at the end of the linked list,
# and the inherited dictionary is updated with the new key/value pair.
print('key not in self: {}'.format(key not in self))
if key not in self:
root = self._OrderedDict__root
last = root[PREV]
last[NEXT] = root[PREV] = self._OrderedDict__map[key] = [last, root, key]
dict_setitem(self, key, value)
def __iter__(self):
'od.__iter__() <==> iter(od)'
# Traverse the linked list in order.
NEXT, KEY = 1, 2
root = self._OrderedDict__root
curr = root[NEXT]
print('curr: {}'.format(curr))
print('root: {}'.format(root))
print('curr is not root: {}'.format(curr is not root))
while curr is not root:
yield curr[KEY]
curr = curr[NEXT]
a = Foo()
print a['bar']
# barbar
print a.keys()
# ['bar']
Notice that you can avoid this problem by making Foo a subclass of collections.MutableMapping and delegating most of its behavior to a OrderedDict attribute:
import collections
dictclass = collections.OrderedDict
class Foo(collections.MutableMapping):
def __init__(self, *args, **kwargs):
self._data = dictclass(*args, **kwargs)
def __setitem__(self, key, value):
self._data[key] = value
def __delitem__(self, key):
del self._data[key]
def __iter__(self):
return iter(self._data)
def __len__(self):
return len(self._data)
def __getitem__(self,key):
try:
return self._data[key]
except KeyError:
pass
data = key*2
self[key] = data
return data
def __contains__(self,whatever):
return dictclass.__contains__(self,whatever) or 'bar' in whatever
which yields
a = Foo()
print a['bar']
# barbar
print a.keys()
# ['bar']
even with __contains__ defined.

What breaks your code is the or 'bar' in whatever. If you remove it, it will work as with the change dictclass = dict you mention.
The __setitem__ implementation of OrderedDict is this:
def __setitem__(self, key, value, dict_setitem=dict.__setitem__):
'od.__setitem__(i, y) <==> od[i]=y'
# Setting a new item creates a new link at the end of the linked list,
# and the inherited dictionary is updated with the new key/value pair.
if key not in self:
root = self.__root
last = root[0]
last[1] = root[0] = self.__map[key] = [last, root, key]
return dict_setitem(self, key, value)
So with self["bar"] = "barbar", the condition should be False, but it is True even before inserting any item. Thus, the key isn' added to self.__root which is used in OrderedDict.__iter__:
def __iter__(self):
'od.__iter__() <==> iter(od)'
# Traverse the linked list in order.
root = self.__root
curr = root[1] # start at the first node
while curr is not root:
yield curr[2] # yield the curr[KEY]
curr = curr[1] # move to next node
Since the code for retrieving the values uses this iterator and self.__root does not contain "bar", this concrete key cannot be returned in the values.

Related

Python dictionary not adding subsequent keys after the first

Fairly new to Python and I can not figure this out. I go to add a key to a dictionary and it adds it fine. I can even update that same key with a new value, however when I go to add a second key to the dictionary, it does not add the second key value pair.
class CountedSet:
def __init__(self):
self.data = {}
def __iadd__(self,other):
if isinstance(other,int):
self.data[other] = self.data.get(other, 0) + 1
return self
elif isinstance(other,CountedSet):
#TODO::iterate through second countedSet and update self
return self
def __add__(self,obj):
for key, value in obj.data.items():
if len(self.data) == 0:
self.data[key] = value
elif self.data[key]:
self.data[key] = self.data[key] + value
else:
self.data[key] = value
return self
def __getitem__(self,item):
if item in self.data:
return self.data.get(item)
else:
return None
def __str__(self):
for key, value in self.data.items():
return("{%s,%s}" % (key,value))
a = CountedSet()
a += 17
a += 4
print(a)
This simply outputs {17,1} when I would expect to see {17,1} {4,1}

Your __str__ implementation returns on the first iteration of the for-loop:
def __str__(self):
for key, value in self.data.items():
return("{%s,%s}" % (key,value)) # here
Maybe you want something like:
def __str__(self):
return " ".join([{"{%s,%s}" % (k,v) for k, v in self.data.items()])
Or, without the comprehension:
def __str__(self):
items = []
for key, value in self.data.items():
items.append("{%s,%s}" % (key,value))
return ' '.join(items)

Python - Accesing a list from another class method

I have a little problem with two different classes and two methods from the same class. I have a class B which is using both methods from class a which seems to work fine.
The problem however is that the first method from class a (insert) changes a list which the second method (lookup) from this class should use. It is using the global list which is still initiated with only zeroes. So I have no idea how to tell the method to use the HashMap from the insert method :/ I Hope somebody can help, thank you!
""" PUBLIC MEMBERS
Insert the given key (given as a string) with the given value (given as
an integer). If the hash table already contains an entry for the given key,
update the value of this entry with the given value.
"""
class Map:
global m
m = 10000
global HashMap
HashMap = []
for i in range(m):
HashMap.append(0)
#classmethod
def insert(self, key, value):
"""
>>> Map.insert("hi", 9)
[4,53]
"""
self.key = key
self.value = value
asci = 0
for i in key:
asci += ord(i)
hashindex = (asci%m)*2
print(hashindex)
print(HashMap[hashindex])
if HashMap[hashindex] == key:
HashMap[hashindex + 1] = value
else:
while HashMap[hashindex] != 0:
hashindex = ((asci+1)%m)*2
HashMap[hashindex] = key
HashMap[hashindex+1] = value
""" Check if there exists an entry with the given key in the hash table.
If such an entry exists, return its associated integer value.
Otherwise return -1.
"""
#classmethod
def lookup(self, key):
self.key = key
ascilookup = 0
for i in key:
ascilookup += ord(i)
indexlookup = (ascilookup%m)*2
for j in HashMap:
if HashMap[j]==key:
return HashMap[j + 1]
elif HashMap[j]==0:
return "-1"
else:
j =((j+1)%m)*2
if __name__ == "__main__":
import doctest
doctest.testmod()

This is a far simpler implementation of a map in python:
class Map:
HashMap = {}
def __init__(self,leng):
for i in range(leng):
self.HashMap[str(i)]=0
def insert(self, key, value):
self.HashMap[key]=value
def lookup(self, key):
for each in self.HashMap.iterkeys():
if each == key:
return self.HashMap[each]
return None
EDIT without using a dictionary, using two lists is easier:
class Map:
keys = []
values = []
def __init__(self,leng):
for i in range(leng):
self.keys.append(str(i))
self.values.append(0)
#classmethod
def insert(self, key, value):
self.keys.append(key)
self.values.append(value)
#classmethod
def lookup(self, key):
for x in range(0, len(self.keys)):
if self.keys[x] == key:
return self.values[x]
return None

How to filter by keys through a nested dictionary in a pythonic way

Try to filter a nested dictionary. My solution is clunky, was hoping to see if there is a better method something using comprehensions. Only interested in the dictionary and lists for this example.
_dict_key_filter() will filter the keys of a nested dictionary or a list of nested dictionaries. Anything not in the obj_filter will be ignored on all nested levels.
obj : can be a dictionary or a list of dictionaries.
obj_filter: has to be a list of filter values
def _dict_key_filter(self, obj, obj_filter):
if isinstance(obj, dict):
retdict = {}
for key, value in obj.iteritems():
if key in obj_filter:
retdict[key] = copy.deepcopy(value)
elif isinstance(value, (dict, list)):
child = self._dict_key_filter(value, obj_filter)
if child:
retdict[key] = child
return retdict if retdict else None
elif isinstance(obj, list):
retlist = []
for value in list:
child = self._dict_key_filter(value, obj_filter)
if child:
retlist.append(child)
return retlist if retlist else None
else:
return None
Example#
dict1 = {'test1': {'test2':[1,2]}, 'test3': [{'test6': 2},
{'test8': {'test9': 23}}], 'test4':{'test5': 5}}
filter = ['test5' , 'test9']
return = _dict_key_filter(dict1, filter)
return value would be {'test3': [{'test8': {'test9': 23}}], 'test4': {'test5': 5}}

It's a really old question. I came across a similar problem recently.
It maybe obvious, but you are dealing with a tree in which each node has an arbitray number of children. You want to cut the subtrees that do not contain some items as nodes (not leaves). To achieve this, you are using a custom DFS: the main function returns either a subtree or None. If the value is None then you "cut" the branch.
First of all, the function dict_key_filter returns a (non empty) dict, a (non empty) list or None if no filter key was not found in the branch.
To reduce complexity, you could return a sequence in every case: an empty sequence if no filter key was found, and a non empty sequence if you are still searching or you found the leaf of the tree. Your code would look like:
def dict_key_filter(obj, obj_filter):
if isinstance(obj, dict):
retdict = {}
...
return retdict # empty or not
elif isinstance(obj, list):
retlist = []
...
return retlist # empty or not
else:
return [] # obvioulsy empty
This was the easy part. Now we have to fill the dots.
The list case
Let's begin with the list case, since it is the easier to refactor:
retlist = []
for value in obj:
child = dict_key_filter0(value, obj_filter)
if child:
retlist.append(child)
We can translate this into a simple list comprehension:
retlist = [dict_key_filter(value, obj_filter) for value in obj if dict_key_filter(value, obj_filter)]
The drawback is that dict_key_filter is evaluated twice. We can avoid this with a little trick (see https://stackoverflow.com/a/15812866):
retlist = [subtree for subtree in (dict_key_filter(value, obj_filter) for value in obj) if subtree]
The inner expression (dict_key_filter(value, obj_filter) for value in obj) is a generator that calls dict_key_filter once per value. But we can even do better if we build a closure of dict_key_filter:
def dict_key_filter(obj, obj_filter):
def inner_dict_key_filter(obj): return dict_key_filter(obj, obj_filter)
...
retlist = list(filter(len, map(inner_dict_key_filter, obj)))
Now we are in the functional world: map applies inner_dict_key_filter to every element of the list and then the subtrees are filtered to exclude empty subtrees (len(subtree) is true iff subtree is not empty). Now, the code looks like:
def dict_key_filter(obj, obj_filter):
def inner_dict_key_filter(obj): return dict_key_filter(obj, obj_filter)
if isinstance(obj, dict):
retdict = {}
...
return retdict
elif isinstance(obj, list):
return list(filter(len, map(inner_dict_key_filter, obj)))
else:
return []
If you are familiar with functional programming, the list case is readable (not quite as readable as it would be in Haskell, but still readable).
The dict case
I do not forget the dictionary-comprehension tag in your question. The first idea is to create a function to return either a whole copy of the branch or the result of the rest of the DFS.
def build_subtree(key, value):
if key in obj_filter:
return copy.deepcopy(value) # keep the branch
elif isinstance(value, (dict, list)):
return inner_dict_key_filter(value) # continue to search
return [] # just an orphan value here
As in the list case, we do not refuse empty subtrees for now:
retdict = {}
for key, value in obj.items():
retdict[key] = build_subtree(key, value)
We have now a perfect case for dict comprehension:
retdict = {key: build_subtree(key, value) for key, value in obj.items() if build_subtree(key, value)}
Again, we use the little trick to avoid to compute a value twice:
retdict = {key:subtree for key, subtree in ((key, build_subtree(key, value)) for key, value in obj.items()) if subtree}
But we have a little problem here: the code above is not exaclty equivalent to the original code. What if the value is 0? In the original version, we have retdict[key] = copy.deepcopy(0) but in the new version we have nothing. The 0 value is evaluated as false and filtered. And then the dict may become empty and we cut the branch wrongfully. We need another test to be sure we want to remove a value: if it's an empty list or dict, then remove it, else keep it:
def to_keep(subtree): return not (isinstance(subtree, (dict, list)) or len(subtree) == 0)
That is:
def to_keep(subtree): return not isinstance(subtree, (dict, list)) or subtree
If you remember a bit of logic (https://en.wikipedia.org/wiki/Truth_table#Logical_implication) you can interpret this as: if subtree is a dict or a list, then it must not be empty.
Let's put the pieces together:
def dict_key_filter(obj, obj_filter):
def inner_dict_key_filter(obj): return dict_key_filter(obj, obj_filter)
def to_keep(subtree): return not isinstance(subtree, (dict, list)) or subtree
def build_subtree(key, value):
if key in obj_filter:
return copy.deepcopy(value) # keep the branch
elif isinstance(value, (dict, list)):
return inner_dict_key_filter(value) # continue to search
return [] # just an orphan value here
if isinstance(obj, dict):
key_subtree_pairs = ((key, build_subtree(key, value)) for key, value in obj.items())
return {key:subtree for key, subtree in key_subtree_pairs if to_keep(subtree)}
elif isinstance(obj, list):
return list(filter(to_keep, map(inner_dict_key_filter, obj)))
return []
I don't know if this is more pythonic, but it seems clearer to me.
dict1 = {
'test1': { 'test2':[1,2] },
'test3': [
{'test6': 2},
{
'test8': { 'test9': 23 }
}
],
'test4':{'test5': 0}
}
obj_filter = ['test5' , 'test9']
print (dict_key_filter(dict1, obj_filter))
# {'test3': [{'test8': {'test9': 23}}], 'test4': {'test5': 0}}

Implementation of a Trie in Python

I programmed a Trie as a class in python. The search and insert function are clear, but now i tried to programm the python function __str__, that i can print it on the screen. But my function doesn't work!
class Trie(object):
def __init__(self):
self.children = {}
self.val = None
def __str__(self):
s = ''
if self.children == {}: return ' | '
for i in self.children:
s = s + i + self.children[i].__str__()
return s
def insert(self, key, val):
if not key:
self.val = val
return
elif key[0] not in self.children:
self.children[key[0]] = Trie()
self.children[key[0]].insert(key[1:], val)
Now if I create a Object of Trie:
tr = Trie()
tr.insert('hallo', 54)
tr.insert('hello', 69)
tr.insert('hellas', 99)
And when i now print the Trie, occures the problem that the entries hello and hellas aren't completely.
print tr
hallo | ellas | o
How can i solve that problem?.

Why not have str actually dump out the data in the format that it is stored:
def __str__(self):
if self.children == {}:
s = str(self.val)
else:
s = '{'
comma = False
for i in self.children:
if comma:
s = s + ','
else:
comma = True
s = s + "'" + i + "':" + self.children[i].__str__()
s = s + '}'
return s
Which results in:
{'h':{'a':{'l':{'l':{'o':54}}},'e':{'l':{'l':{'a':{'s':99},'o':69}}}}}

There are several issues you're running into. The first is that if you have several children at the same level, you'll only be prefixing one of them with the initial part of the string, and just showing the suffix of the others. Another issue is that you're only showing leaf nodes, even though you can have terminal values that are not at a leaf (consider what happens when you use both "foo" and "foobar" as keys into a Trie). Finally, you're not outputting the values at all.
To solve the first issue, I suggest using a recursive generator that does the traversal of the Trie. Separating the traversal from __str__ makes things easier since the generator can simply yield each value we come across, rather than needing to build up a string as we go. The __str__ method can assemble the final result easily using str.join.
For the second issue, you should yield the current node's key and value whenever self.val is not None, rather than only at leaf nodes. As long as you don't have any way to remove values, all leaf nodes will have a value, but we don't actually need any special casing to detect that.
And for the final issue, I suggest using string formatting to make a key:value pair. (I suppose you can skip this if you really don't need the values.)
Here's some code:
def traverse(self, prefix=""):
if self.val is not None:
yield "{}:{}".format(prefix, self.val)
for letter, child in self.children.items():
yield from child.traverse(prefix + letter)
def __str__(self):
return " | ".join(self.traverse())
If you're using a version of Python before 3.3, you'll need to replace the yield from statement with an explicit loop to yield the items from the recursive calls:
for item in child.traverse(prefix + letter)
yield item
Example output:
>>> t = Trie()
>>> t.insert("foo", 5)
>>> t.insert("bar", 10)
>>> t.insert("foobar", 100)
>>> str(t)
'bar:10 | foo:5 | foobar:100'

You could go with a simpler representation that just provides a summary of what the structure contains:
class Trie:
def __init__(self):
self.__final = False
self.__nodes = {}
def __repr__(self):
return 'Trie<len={}, final={}>'.format(len(self), self.__final)
def __getstate__(self):
return self.__final, self.__nodes
def __setstate__(self, state):
self.__final, self.__nodes = state
def __len__(self):
return len(self.__nodes)
def __bool__(self):
return self.__final
def __contains__(self, array):
try:
return self[array]
except KeyError:
return False
def __iter__(self):
yield self
for node in self.__nodes.values():
yield from node
def __getitem__(self, array):
return self.__get(array, False)
def create(self, array):
self.__get(array, True).__final = True
def read(self):
yield from self.__read([])
def update(self, array):
self[array].__final = True
def delete(self, array):
self[array].__final = False
def prune(self):
for key, value in tuple(self.__nodes.items()):
if not value.prune():
del self.__nodes[key]
if not len(self):
self.delete([])
return self
def __get(self, array, create):
if array:
head, *tail = array
if create and head not in self.__nodes:
self.__nodes[head] = Trie()
return self.__nodes[head].__get(tail, create)
return self
def __read(self, name):
if self.__final:
yield name
for key, value in self.__nodes.items():
yield from value.__read(name + [key])

Instead of your current strategy for printing, I suggest the following strategy instead:
Keep a list of all characters in order that you have traversed so far. When descending to one of your children, push its character on the end of its list. When returning, pop the end character off of the list. When you are at a leaf node, print the contents of the list as a string.
So say you have a trie built out of hello and hellas. This means that as you descend to hello, you build a list h, e, l, l, o, and at the leaf node you print hello, return once to get (hell), push a, s and at the next leaf you print hellas. This way you re-print letters earlier in the tree rather than having no memory of what they were and missing them.
(Another possiblity is to just descend the tree, and whenever you reach a leaf node go to your parent, your parent's parent, your parent's parent's parent... etc, keeping track of what letters you encounter, reversing the list you make and printing that out. But it may be less efficient.)

Python dict like surjective multiple key → value container

I'm currently in the need for a Python container class with similar functionality like the builtin dict type. Basically what I need is a dictionary, where an arbitrary number of keys beside a primary key, which map to the very same value. However when iterating over it, it should iterate only over the (primary_key, value) pairs and only the primary key if the list of keys is requested.
If this has already been implemented I'd rather not reinvent the wheel. So is there already a module providing such a container? If not, I'm going to implement it myself.

Here is a quick implementation:
class MultipleKeyDict(dict):
__slots__ = ["_primary_keys"]
def __init__(self, arg=None, **kwargs):
self._primary_keys = {}
self.update(arg, **kwargs)
def __setitem__(self, key, value):
super(MultipleKeyDict, self).__setitem__(key, value)
self._primary_keys.setdefault(value, key)
def __delitem__(self, key):
value = self[key]
super(MultipleKeyDict, self).__delitem__(key)
if self._primary_keys[value] == key:
del self._primary_keys[value]
for k, v in super(MultipleKeyDict, self).iteritems():
if v == value:
self._primary_keys[value] = k
break
def __iter__(self):
return self.iterkeys()
def update(self, arg=None, **kwargs):
if arg is not None:
if isinstance(arg, collections.Mapping):
for k in arg:
self[k] = arg[k]
else:
for k, v in arg:
self[k] = v
for k in kwargs:
self[k] = kwargs[k]
def clear(self):
super(MultipleKeyDict, self).clear()
self._primary_keys.clear()
def iteritems(self):
for v, k in self._primary_keys.iteritems():
yield k, v
def items(self):
return list(self.iteritems())
def itervalues(self):
return self._primary_keys.iterkeys()
def values(self):
return self._primary_keys.keys()
def iterkeys(self):
return self._primary_keys.itervalues()
def keys(self):
return self._primary_keys.values()
The only messy bit is that it has to search the whole dict in case a primary key gets deleted.
I omitted copy(), pop(), popitem() and setdefault(). If you need them, you'll have to implement them yourself.

The simplest and easiest solution would be to use two dictionaries, one of which maps secondary keys to a primary key. If for some reason you need a reverse mapping, that could be included in the primary dictionary.
sec = {'one': 'blue', 'two': 'red', 'three': 'blue', # seconary keys
'blue': 'blue', 'red': 'red'} # include identity mapping for primaries
dict = {'blue': ('doll', '$9.43', ('one', 'three')),
'red': ('truck', '$14.99', ('two',)) }
record = dict[sec['two']]
print('Toy=', record[0], 'Price=', record[1])

There is now a multiple key dictionary python package.
https://pypi.python.org/pypi/multi_key_dict/1.0.2
From the link:
from multi_key_dict import multi_key_dict
k = multi_key_dict()
k[1000, 'kilo', 'k'] = 'kilo (x1000)'
print k[1000] # will print 'kilo (x1000)'
print k['k'] # will also print 'kilo (x1000)'
# the same way objects can be updated, deleted:
# and if an object is updated using one key, the new value will
# be accessible using any other key, e.g. for example above:
k['kilo'] = 'kilo'
print k[1000] # will now print 'kilo' as value was updated

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Why does overriding contains break OrderedDict.keys? - python

Related

Python dictionary not adding subsequent keys after the first

Python - Accesing a list from another class method

How to filter by keys through a nested dictionary in a pythonic way

Implementation of a Trie in Python

Python dict like surjective multiple key → value container

Categories

Resources