Get key count from OrderedDict where key is a tuple - python

I've a dictionary such as this:
my_dict=collections.OrderedDict([((123, 1), 'qwe'), ((232, 1), 'asd'), ((234, 2), 'zxc'), ((6745, 2), 'aaa'), ((456, 3), 'bbb')])
The combination of the tuple is always unique and I would like to maintain the order of insertion, and hence OrderedDict. I've a well over ~10K items in the dict. How can I efficiently maintain a counter that gives the count of the second element in the tuple? Basically, I need to know the count whenever I would like to add/delete an item in the key. Right now I just iterate through my_dict and get the counter everytime but it seems to be very expensive to do that.
In the above example I want the output to be:
1:2 # As in 1 occurs 2 times
2:2
3:1
Right now I do the following:
from collections import OrderedDict, Counter
my_dict = OrderedDict()
my_dict[(123,1)] = 'qwe'
my_dict[(232,1)] = 'asd'
my_dict[(234,2)] = 'zxc'
my_dict[(6745,2)] = 'aaa'
my_dict[(456,3)] = 'bbb'
cnt = []
for item in my_dict.keys():
cnt.append(item[1])
print Counter(cnt)
I'm not sure if this is the best way but is there a way to override the the = operator and pop function, such that it adds or subtracts a count every time I do that operation?

Getting a Counter to work nicely with an OrderedDict is probably going to require some subclassing. Here's something that might work (I've only implemented __setitem__ and __getitem__, but if you'd like a more robust implementation, let me know):
import collections
class CountedOrderedDict(collections.OrderedDict):
def __init__(self, *args, **kwargs):
self.counter = collections.Counter()
super(CountedOrderedDict, self).__init__(*args, **kwargs)
def __delitem__(self, key):
super(CountedOrderedDict, self).__delitem__(key)
self.counter[key[1]] -= 1
def __setitem__(self, key, value):
if key not in self:
self.counter[key[1]] += 1
super(CountedOrderedDict, self).__setitem__(key, value)
Example usage:
>>> my_dict = CountedOrderedDict({(123,1): 'sda', (232,1) : 'bfd', (234,2) : 'csd', (6745,2) : 'ds', (456,3) : 'rd'})
>>> my_dict.counter
Counter({'1': 2, '2': 2, '3': 1})
>>> del my_dict[(123,1)]
>>> my_dict.counter
Counter({'2': 2, '1': 1, '3': 1})
>>> my_dict[(150,1)] = "asdf"
>>> my_dict.counter
Counter({'1': 2, '2': 2, '3': 1})
Here's a more general CountedOrderedDict implementation that takes a key function as a parameter.
import collections
class CountedOrderedDict(collections.OrderedDict):
def __init__(self, key=lambda k: k, *args, **kwargs):
self.counter = collections.Counter()
self.key_transform = key
super(CountedOrderedDict, self).__init__(*args, **kwargs)
def __delitem__(self, key):
super(CountedOrderedDict, self).__delitem__(key)
self.counter[self.key_transform(key)] -= 1
def __setitem__(self, key, value):
if key not in self:
self.counter[self.key_transform(key)] += 1
super(CountedOrderedDict, self).__setitem__(key, value)
For your needs, you'd instantiate it like so:
my_dict = CountedOrderedDict(key=lambda k: k[1])

Related

python mapping that stays sorted by value

There is a SortedDict class, implementing a dictionary which maintains its keys in order, but I cannot find any equivalent datastructure that does the same with its values, i.e., something like this:
m = SortedValueMapping(a=3, b=1, c=2)
m.peekitem(0) # b
m.peekitem(-1) # a
m['d'] = 0
m.peekitem(0) # d
m.peekitem(-1) # a
The following class is a quick and dirty implementation with no guarantees for efficiency but it provides the desired behavior.
Should someone provide a better answer, I will gladly accept it!
class SortedValuesDict:
def __init__(self, args=None, **kwargs):
"""Initialize the SortedValuesDict with an Iterable or Mapping."""
from collections import Mapping
from sortedcontainers import SortedSet
self.__sorted_values = SortedSet()
self.__key_value_pairs = {}
self.__value_key_pairs = {}
if args is not None:
iterable = args.items() if isinstance(args, Mapping) else args
for key, value in iterable:
self.__setitem__(key, value)
for key, value in kwargs.items():
self.__setitem__(key, value)
def __repr__(self):
"""Get a representation of the SortedValuesDict."""
return f"SortedValuesDict({', '.join(f'{k}: {v}' for k, v in self)})"
def __setitem__(self, key, value):
"""Store the key, value pair in the SortedValuesDict."""
if key in self:
del self[key]
self.__key_value_pairs[key] = value
self.__sorted_values.add(value)
if value in self.__value_key_pairs:
self.__value_key_pairs[value].add(key)
else:
self.__value_key_pairs[value] = {key}
def __getitem__(self, key):
"""Get the value corresponding to the key."""
return self.__key_value_pairs[key]
def __delitem__(self, key):
"""Remove the given key from the SortedValuesDict"""
value = self.__key_value_pairs.pop(key)
keys = self.__value_key_pairs[value]
keys.remove(key)
if not keys:
del self.__value_key_pairs[value]
self.__sorted_values.remove(value)
def __contains__(self, key):
"""Test if key is in SortedValuesDict."""
return key in self.__key_value_pairs
def __iter__(self):
"""Iterate over the SortedValuesDict items."""
for value in self.__sorted_values:
for key in self.__value_key_pairs[value]:
yield key, value
def keys(self):
"""Iterate over the SortedValuesDict keys."""
for value in self.__sorted_values:
for key in self.__value_key_pairs[value]:
yield key
def values(self):
"""Iterate over the SortedValuesDict values."""
for value in self.__sorted_values:
for key in self.__value_key_pairs[value]:
yield value
def items(self):
"""Iterate over the SortedValuesDict items."""
yield from self
def peek(self, pos=0):
"""Peek at the next key for the value at position pos."""
return next(iter(self.__value_key_pairs[self.__sorted_values[pos]]))
def peekvalue(self, pos=0):
"""Peek at the value at position pos (default 0)."""
return self.__sorted_values[pos]
def peekitem(self, pos=0):
"""Peek at the the next key for the value at position pos."""
value = self.__sorted_values[pos]
return next(iter(self.__value_key_pairs[value])), value
def pop(self, pos=-1):
"""Pop a key corresponding to the value at position pos."""
value = self.__sorted_values[pos]
keys = self.__value_key_pairs[value]
key = keys.pop()
del self.__key_value_pairs[key]
if not keys:
del self.__value_key_pairs[self.__sorted_values.pop(pos)]
return key
def popitem(self, pos=-1):
"""Pop an item corresponding to the value at position pos."""
value = self.__sorted_values[pos]
keys = self.__value_key_pairs[value]
key = keys.pop()
del self.__key_value_pairs[key]
if not keys:
del self.__value_key_pairs[self.__sorted_values.pop(pos)]
return key, value
m = SortedValuesDict(a=3, b=1, c=2)
[*m.values()] # [1, 2, 3]
[*m] # [('b', 1), ('c', 2), ('a', 3)] (same as [*m.items()])
m.peekitem(0) # ('b', 1)
m.peekitem(-1) # ('a', 3)
m['d'] = 0
m.peekitem(0) # ('d', 0)
m.peekitem(-1) # ('a', 3)
m['e'] = 0
m # SortedValuesDict(d: 0, e: 0, b: 1, c: 2, a: 3)
m.peekitem(0) # (('d', 0), ('e', 0))
m.pop() # 'a' (from back)
m.pop(0) # 'd'
m # SortedValuesDict(e: 0, b: 1, c: 2)
m.popitem() # ('c', 2)

How to restrict the value range in dictionary?

I'd like to impose restrictions on a value in a dictionary.
For example, assume that I have the following dictionary
speed = {
'x': 8,
'y': 0,
'z': 4}
And I want the value corresponding to x-key in the speed dictionary always to be less than 10.
For example, if I use it in some funtion
calcuate_speed(speed)
or
if I implement some mathematical transformation
speed["x"] += 1
the value shouldn't be larger than a certain limit I defined.
What is the simplest way to handle it?
You can do it in a next way:
from collections import UserDict
class MyDict(UserDict):
def __setitem__(self, key, value):
if value > 10: # any validation
raise ValueError('Too big!')
super().__setitem__(key, value)
But it will be better to use a class with property.
Full version:
from collections import UserDict
from math import sqrt
class VectorDict(UserDict):
def __init__(self, *args, speed_limit, **kwargs):
self.speed_limit = speed_limit
super().__init__(*args, **kwargs)
def __setitem__(self, key, value):
if self._calc_speed(key, value) > self.speed_limit:
raise ValueError('Too big!')
super().__setitem__(key, value)
def _calc_speed(self, replace_key=None, replace_value=0):
square_sum = 0
for key, value in self.items():
if key == replace_key:
value = replace_value
square_sum += value ** 2
return sqrt(square_sum)
#property
def speed(self):
return self._calc_speed()
example_1d = VectorDict(x=1, speed_limit=5)
example_2d = VectorDict(x=1, y=5, speed_limit=7)
example_3d = VectorDict(x=1, y=5, z=3, speed_limit=13)
print(example_3d.speed)
example_3d['x'] += 10 # ValueError: Too big!
Just add some value control in your cycle.
For example:
if (speed['x'] >= 10):
speed['x'] = 9
Define a class MyClass with a property speed:
class MyClass:
def __init__(self):
self.speed = 0
def __setattr__(self, k, v):
if k == 'speed' and v > 10:
return
self.__dict__[k] = v
Here I'm just ignoring any changes to speed if it's not a valid value, but you can handle it differently as you per your needs (e.g. raise an exception)
Here's a demo:
m = MyClass()
m.speed = 8
print(m.speed)
m.speed += 1
print(m.speed)
m.speed += 1
print(m.speed)
m.speed += 1
print(m.speed)
We get the following output:
8
9
10
10

iteration through nested dictionaries

I'm trying to implement simple tree class which is inherited from dictionary.
Here my code:
class tree(dict):
def __init__(self, hsymbol="/"):
self.hsymbol = hsymbol
def __setitem__(self, key, value):
if key[0] == self.hsymbol : key = key[1:]
parts = key.split(self.hsymbol, 1)
if len(parts) == 2:
if parts[0] not in self: self[parts[0]] = tree(hsymbol = self.hsymbol)
self[parts[0]].__setitem__(parts[1], value)
else:
super(tree, self).__setitem__(key, value)
def __getitem__(self, key):
if key[0] == self.hsymbol : key = key[1:]
parts = key.split(self.hsymbol, 1)
if len(parts) == 2:
if parts[0] not in self: raise KeyError(parts[0])
return self[parts[0]][parts[1]]
else:
if key not in self: raise KeyError(parts[0])
return super(tree, self).__getitem__(key)
def __contains__(self,key):
if key[0] == self.hsymbol : key = key[1:]
parts = key.split(self.hsymbol, 1)
if len(parts) == 2:
if not super(tree, self).__contains__(parts[0]): return False
return parts[1] in self[parts[0]]
else:
if not super(tree, self).__contains__(key): return False
return True
def __delitem__(self, key):
if key[0] == self.hsymbol : key = key[1:]
parts = key.split(self.hsymbol, 1)
if len(parts) == 2:
if parts[0] not in self: raise KeyError(parts[0])
self[parts[0]].__delitem__(parts[1])
else:
if key not in list(self): raise KeyError(parts[0])
super(tree,self).__delitem__(key)
def keys(self,parent=""):
#if parent is None: parent = self.hsymbol
names = []
for name in super(tree, self).keys():
if isinstance(self[name], tree):
names += self[name].keys(parent=parent+self.hsymbol+name)
else:
names.append(parent+self.hsymbol+name)
return names
So everything works quite well, although I'm not sure about keys function realization:
>>> t=tree()
>>> t['/user/Johnson/incoming'] = 2200
>>> t['/user/Johnson/family'] = 4
>>> t['/user/Johnson/play'] = False
>>> t['/user/Smith/incoming'] = 12000
>>> t['/user/Smith/family'] = 1
>>> t['/user/Smith/play'] = True
>>> t['user/Smith/incoming']
12000
>>> print t
{'user': {'Smith': {'play': True, 'incoming': 12000, 'family': 1}, 'Johnson': {'play': False, 'incoming': 2200, 'family': 4}}}
>>> print t.keys()
['/user/Smith/play', '/user/Smith/incoming', '/user/Smith/family', '/user/Johnson/play', '/user/Johnson/incoming', '/user/Johnson/family']
>>> t
{'user': {'Smith': {'play': True, 'incoming': 12000, 'family': 1}, 'Johnson': {'play': False, 'incoming': 2200, 'family': 4}}}
...but not an iteration through it:
>>> for k in t:
... print k
...
user
>>>
How can I get something like this?
/user/Smith/play
/user/Smith/incoming
/user/Smith/family
/user/Johnson/play
/user/Johnson/incoming
/user/Johnson/family
Pretty sure that it must be __iter__ and next attributes of tree class, but I haven't figured out how to write it yet.
I've searched over Stack Overflow with no luck:
python recursive iteration nested dictionaries
python class inherited from dictionary iteration through nested dictionaries
python iteration through nested dictionaries
Yes, you need __iter__ (an iterator will have a next() automatically).
Following your existing logic:
def __iter__(self, parent=""):
for name in super(tree, self).keys():
if isinstance(self[name], tree):
for item in self[name].__iter__(parent=parent+self.hsymbol+name):
yield item
else:
yield parent+self.hsymbol+name
Unlike your current keys() implementation, this only walks the tree on an as-needed basis: If a client only needs the first two keys, it only calls next() twice, and so the iterator only proceeds past two yields.
(I might suggest implementing keys as simply return list(iter(self)) -- that way you have the lazy approach available for those that want to avoid the inefficiency of unnecessarily walking a full tree, and the non-lazy approach otherwise).

Know the depth of a dictionary

Supposing we have this dict:
d = {'a':1, 'b': {'c':{}}}
What would be the most straightforward way of knowing the nesting depth of it?
You need to create a recursive function:
>>> def depth(d):
... if isinstance(d, dict):
... return 1 + (max(map(depth, d.values())) if d else 0)
... return 0
...
>>> d = {'a':1, 'b': {'c':{}}}
>>> depth(d)
3
You'll have to traverse the dictionary. You could do so with a queue; the following should be safe from circular references:
from collections import deque
def depth(d):
queue = deque([(id(d), d, 1)])
memo = set()
while queue:
id_, o, level = queue.popleft()
if id_ in memo:
continue
memo.add(id_)
if isinstance(o, dict):
queue += ((id(v), v, level + 1) for v in o.values())
return level
Note that because we visit all dictionary values in breath-first order, the level value only ever goes up. The memo set is used to ensure we don't try to traverse a circular reference, endlessly.
Or you could traverse the tree with recursion (which effectively uses function calls as a stack). I've used functools.singledispatch() for easy expansion to other container types:
from functools import singledispatch, wraps
#singledispatch
def depth(_, _level=1, _memo=None):
return _level
def _protect(f):
"""Protect against circular references"""
#wraps(f)
def wrapper(o, _level=1, _memo=None, **kwargs):
_memo, id_ = _memo or set(), id(o)
if id_ in _memo: return _level
_memo.add(id_)
return f(o, _level=_level, _memo=_memo, **kwargs)
return wrapper
def _protected_register(cls, func=None, _orig=depth.register):
"""Include the _protect decorator when registering"""
if func is None and isinstance(cls, type):
return lambda f: _orig(cls, _protect(f))
return _orig(cls, _protect(func)) if func is not None else _orig(_protect(cls))
depth.register = _protected_register
#depth.register
def _dict_depth(d: dict, _level=1, **kw):
return max(depth(v, _level=_level + 1, **kw) for v in d.values())
This is as depth-first search, so now max() is needed to pick the greatest depth for the current object under scrutiny at each level. A dictionary with 3 keys of each different depths should reflect the greatest depth at that level.
The memo set used in either version tracks object ids, so we don't run is circles if you did something like foo = {}; foo["bar"] = foo.
Demo:
>>> d = {'a':1, 'b': {'c':{}}}
>>> depth(d)
3
>>> d = {'foo': {'bar': {'baz': 0}, 'spam': {'ham': {'monty': 1}, 'eric': 'idle'}}, 'john': 'cleese'}
>>> depth(d)
5
>>> circular = {}
>>> circular["self"] = circular
>>> depth(circular)
2
The recursive singledispatch version can be expanded to cover more containers, such as lists:
#depth.register
def _list_depth(l: list, _level=1, **kw):
return max(depth(v, _level=_level + 1, **kw) for v in l)
Because I've augmented the standard .register decorator to handle circular-reference testing, implementing additional container support is relatively trivial. Just remember to pass along any extra keyword arguments to the recursive call!
A non-recursive solution:
def depth(d):
depth=0
q = [(i, depth+1) for i in d.values() if isinstance(i, dict)]
max_depth = 0
while (q):
n, depth = q.pop()
max_depth = max(max_depth, depth)
q = q + [(i, depth+1) for i in n.values() if isinstance(i, dict)]
print max_depth
Iterative solution:
from collections import deque
def depth(d):
q = deque([d])
q2 = deque()
max_depth = 0
while q:
curr_dict = q.popleft()
if isinstance(curr_dict, dict):
for di in curr_dict.itervalues():
q2.append(di)
if not q:
q, q2 = q2, q
max_depth += 1
return max_depth
print depth(None)
print depth({})
print depth({"a": "b"})
print depth({"a": "b", "c": {"d": "e"}, "f": {"g": "h"}, "i": {"j": "k"}, "x": {}, "z": {} })
print depth({'a':1, 'b': {'c':{}}})
print depth({'foo': {'bar': {'baz': 0}, 'spam': {'ham': {'monty': 1}, 'eric': 'idle'}}, 'john': 'cleese'})

Python dict like surjective multiple key → value container

I'm currently in the need for a Python container class with similar functionality like the builtin dict type. Basically what I need is a dictionary, where an arbitrary number of keys beside a primary key, which map to the very same value. However when iterating over it, it should iterate only over the (primary_key, value) pairs and only the primary key if the list of keys is requested.
If this has already been implemented I'd rather not reinvent the wheel. So is there already a module providing such a container? If not, I'm going to implement it myself.
Here is a quick implementation:
class MultipleKeyDict(dict):
__slots__ = ["_primary_keys"]
def __init__(self, arg=None, **kwargs):
self._primary_keys = {}
self.update(arg, **kwargs)
def __setitem__(self, key, value):
super(MultipleKeyDict, self).__setitem__(key, value)
self._primary_keys.setdefault(value, key)
def __delitem__(self, key):
value = self[key]
super(MultipleKeyDict, self).__delitem__(key)
if self._primary_keys[value] == key:
del self._primary_keys[value]
for k, v in super(MultipleKeyDict, self).iteritems():
if v == value:
self._primary_keys[value] = k
break
def __iter__(self):
return self.iterkeys()
def update(self, arg=None, **kwargs):
if arg is not None:
if isinstance(arg, collections.Mapping):
for k in arg:
self[k] = arg[k]
else:
for k, v in arg:
self[k] = v
for k in kwargs:
self[k] = kwargs[k]
def clear(self):
super(MultipleKeyDict, self).clear()
self._primary_keys.clear()
def iteritems(self):
for v, k in self._primary_keys.iteritems():
yield k, v
def items(self):
return list(self.iteritems())
def itervalues(self):
return self._primary_keys.iterkeys()
def values(self):
return self._primary_keys.keys()
def iterkeys(self):
return self._primary_keys.itervalues()
def keys(self):
return self._primary_keys.values()
The only messy bit is that it has to search the whole dict in case a primary key gets deleted.
I omitted copy(), pop(), popitem() and setdefault(). If you need them, you'll have to implement them yourself.
The simplest and easiest solution would be to use two dictionaries, one of which maps secondary keys to a primary key. If for some reason you need a reverse mapping, that could be included in the primary dictionary.
sec = {'one': 'blue', 'two': 'red', 'three': 'blue', # seconary keys
'blue': 'blue', 'red': 'red'} # include identity mapping for primaries
dict = {'blue': ('doll', '$9.43', ('one', 'three')),
'red': ('truck', '$14.99', ('two',)) }
record = dict[sec['two']]
print('Toy=', record[0], 'Price=', record[1])
There is now a multiple key dictionary python package.
https://pypi.python.org/pypi/multi_key_dict/1.0.2
From the link:
from multi_key_dict import multi_key_dict
k = multi_key_dict()
k[1000, 'kilo', 'k'] = 'kilo (x1000)'
print k[1000] # will print 'kilo (x1000)'
print k['k'] # will also print 'kilo (x1000)'
# the same way objects can be updated, deleted:
# and if an object is updated using one key, the new value will
# be accessible using any other key, e.g. for example above:
k['kilo'] = 'kilo'
print k[1000] # will now print 'kilo' as value was updated

Categories