python mapping that stays sorted by value - python

There is a SortedDict class, implementing a dictionary which maintains its keys in order, but I cannot find any equivalent datastructure that does the same with its values, i.e., something like this:
m = SortedValueMapping(a=3, b=1, c=2)
m.peekitem(0) # b
m.peekitem(-1) # a
m['d'] = 0
m.peekitem(0) # d
m.peekitem(-1) # a

The following class is a quick and dirty implementation with no guarantees for efficiency but it provides the desired behavior.
Should someone provide a better answer, I will gladly accept it!
class SortedValuesDict:
def __init__(self, args=None, **kwargs):
"""Initialize the SortedValuesDict with an Iterable or Mapping."""
from collections import Mapping
from sortedcontainers import SortedSet
self.__sorted_values = SortedSet()
self.__key_value_pairs = {}
self.__value_key_pairs = {}
if args is not None:
iterable = args.items() if isinstance(args, Mapping) else args
for key, value in iterable:
self.__setitem__(key, value)
for key, value in kwargs.items():
self.__setitem__(key, value)
def __repr__(self):
"""Get a representation of the SortedValuesDict."""
return f"SortedValuesDict({', '.join(f'{k}: {v}' for k, v in self)})"
def __setitem__(self, key, value):
"""Store the key, value pair in the SortedValuesDict."""
if key in self:
del self[key]
self.__key_value_pairs[key] = value
self.__sorted_values.add(value)
if value in self.__value_key_pairs:
self.__value_key_pairs[value].add(key)
else:
self.__value_key_pairs[value] = {key}
def __getitem__(self, key):
"""Get the value corresponding to the key."""
return self.__key_value_pairs[key]
def __delitem__(self, key):
"""Remove the given key from the SortedValuesDict"""
value = self.__key_value_pairs.pop(key)
keys = self.__value_key_pairs[value]
keys.remove(key)
if not keys:
del self.__value_key_pairs[value]
self.__sorted_values.remove(value)
def __contains__(self, key):
"""Test if key is in SortedValuesDict."""
return key in self.__key_value_pairs
def __iter__(self):
"""Iterate over the SortedValuesDict items."""
for value in self.__sorted_values:
for key in self.__value_key_pairs[value]:
yield key, value
def keys(self):
"""Iterate over the SortedValuesDict keys."""
for value in self.__sorted_values:
for key in self.__value_key_pairs[value]:
yield key
def values(self):
"""Iterate over the SortedValuesDict values."""
for value in self.__sorted_values:
for key in self.__value_key_pairs[value]:
yield value
def items(self):
"""Iterate over the SortedValuesDict items."""
yield from self
def peek(self, pos=0):
"""Peek at the next key for the value at position pos."""
return next(iter(self.__value_key_pairs[self.__sorted_values[pos]]))
def peekvalue(self, pos=0):
"""Peek at the value at position pos (default 0)."""
return self.__sorted_values[pos]
def peekitem(self, pos=0):
"""Peek at the the next key for the value at position pos."""
value = self.__sorted_values[pos]
return next(iter(self.__value_key_pairs[value])), value
def pop(self, pos=-1):
"""Pop a key corresponding to the value at position pos."""
value = self.__sorted_values[pos]
keys = self.__value_key_pairs[value]
key = keys.pop()
del self.__key_value_pairs[key]
if not keys:
del self.__value_key_pairs[self.__sorted_values.pop(pos)]
return key
def popitem(self, pos=-1):
"""Pop an item corresponding to the value at position pos."""
value = self.__sorted_values[pos]
keys = self.__value_key_pairs[value]
key = keys.pop()
del self.__key_value_pairs[key]
if not keys:
del self.__value_key_pairs[self.__sorted_values.pop(pos)]
return key, value
m = SortedValuesDict(a=3, b=1, c=2)
[*m.values()] # [1, 2, 3]
[*m] # [('b', 1), ('c', 2), ('a', 3)] (same as [*m.items()])
m.peekitem(0) # ('b', 1)
m.peekitem(-1) # ('a', 3)
m['d'] = 0
m.peekitem(0) # ('d', 0)
m.peekitem(-1) # ('a', 3)
m['e'] = 0
m # SortedValuesDict(d: 0, e: 0, b: 1, c: 2, a: 3)
m.peekitem(0) # (('d', 0), ('e', 0))
m.pop() # 'a' (from back)
m.pop(0) # 'd'
m # SortedValuesDict(e: 0, b: 1, c: 2)
m.popitem() # ('c', 2)

Related

Dictionary comprehension inside insert() method not working

I am making a MappingList class which is a list implemented as an OrderedDict.
This is the MappingList class (some methods omitted):
class MappingList(MutableSequence):
"""
A MappingList is a regular list implemented as a dictionary
"""
def __repr__(self):
return str(list(self.seq.values()))
def __getitem__(self, item):
try:
return self.seq[item]
except KeyError:
_traceback_from_none(IndexError, "list index out of range")
def __setitem__(self, key, value, *, usage=None):
if key > max(self.seq.keys()) and usage != "append":
raise IndexError("list index out of range")
self.seq[key] = value
def __delitem__(self, key):
try:
del self.seq[key]
except KeyError:
_traceback_from_none(IndexError, "list index out of range")
def __len__(self):
return len(self.seq)
def __eq__(self, other):
if not isinstance(other, MappingList):
return NotImplemented
return self.seq == other.seq
#classmethod
def _dict_from_seq(cls, seq):
return OrderedDict(enumerate(seq))
def _next_available_slot(self):
return max(self.seq) + 1
def insert(self, index, value): # todo: insert() should not overwrite
"""Insert a value into the MappingList"""
if index > max(self.seq.keys()):
raise IndexError("list index out of range")
for k, v in {k: v for k, v in self.seq.items() if k > index}:
del self.seq[k]
self.seq[k + 1] = v
self[index] = value
When I try to insert an item into a MappingList, I get the following error:
File "C:\...\My Python Programs\free_time\mappinglist.py", line 103, in test_insert
self.li.insert(1, MappingList(["blah", 1, 5.8]))
File "C:\...\My Python Programs\free_time\mappinglist.py", line 85, in insert
for k, v in {k: v for k, v in self.seq.items() if k > index}:
TypeError: cannot unpack non-iterable int object
Why is this error happening? Does OrderedDict.items() return an integer?
The error doesn't happen due to that.
When you don't provide keys(), values(), items(), python iterates over the keys by default. You need to provide items() to tell python to get the keys and values.
for k, v in {k: v for k, v in self.seq.items() if k > index}.items():

Finding the closest pair of keys if lookup fails in a Python dictionary

Lets say I have a Python dictionary where the keys are actually integers. I can create one like this:
>>> d = dict([(1, 0), (7, 10), (28, 20)])
>>> d
{1: 0, 7: 10, 28: 20}
Now, I want to do a look up where if the key is found, its index is returned. This part is really easy, like so:
>>> key = 7
>>> d[key]
10
If a key is not found, then I want to return the bound for the keys. For example:
>>> key = 6
>>> d[key]
Bound(1, 7)
Since 6 does not exist as a key, I am returning the 2 keys between which it lies. Is such a thing possible to do without basically iterating the entire dictionary? If not, then this question does not really need to be answered. If this is indeed possible, please include some performance implications as well if possible. Thanks.
Here is a solution using a function to access a normal dict (I used an OrderedDict as I have an older version of Python here now, you can use a normal dict if you have Python 3.6 or more, as they are ordered.)
We sort the dict by key, which lets us use bisect to find the surrounding keys quickly.
import bisect
from collections import OrderedDict
d = OrderedDict(sorted([(1, 0), (7, 10), (28, 20)])) # Could be a simple dict with Python 3.6+
class Bound:
def __init__(self, a, b):
self.a = a
self.b = b
def __repr__(self):
return 'Bound({}, {})'.format(self.a, self.b)
def closest(key, d):
try:
return d[key]
except KeyError:
keys = list(d.keys())
ins_point = bisect.bisect(keys, key)
return Bound(keys[ins_point-1] if ins_point >= 1 else None,
keys[ins_point] if ins_point < len(keys) else None)
closest(7, d)
# 10
closest(8, d)
# Bound(7, 28)
closest(30, d)
# Bound(28, None)
closest(-1, d)
# Bound(None, 1)
You can also subclass dict, updating the __missing__ method (code for Python >=3.6, assuming that dict is ordered:
import bisect
class Bound:
def __init__(self, a, b):
self.a = a
self.b = b
def __repr__(self):
return 'Bound({}, {})'.format(self.a, self.b)
class BoundDict(dict):
def __missing__(self, key):
keys = list(self.keys())
ins_point = bisect.bisect(keys, key)
return Bound(keys[ins_point-1] if ins_point >= 1 else None,
keys[ins_point] if ins_point < len(keys) else None)
d = BoundDict(sorted([(1, 0), (7, 10), (28, 20)]))
print(d[7])
# 10
print(d[8])
# Bound(7, 28)
print(d[30])
# Bound(28, None)
print(d[-1])
# Bound(None, 1)
Solution with custom dict class:
import bisect
import collections
class Bound:
def __init__(self, left, right):
self.left = left
self.right = right
def __repr__(self):
return 'Bound({}, {})'.format(self.left, self.right)
class MyDict(collections.defaultdict):
def __init__(self, *args, **kwargs):
super().__init__()
dict.__init__(self, *args, **kwargs)
self.lst = sorted(key for key in self)
def __setitem__(self, key, value):
if key not in self:
bisect.insort_left(self.lst, key)
super().__setitem__(key, value)
def __delitem__(self, key):
self.lst.remove(key)
super().__delitem__(key)
def __missing__(self, key):
right_index = bisect.bisect(self.lst, key)
left_index = right_index - 1
right = self.lst[right_index] if right_index != len(self.lst) else None
left = self.lst[left_index] if left_index != -1 else None
return Bound(left, right)
d = MyDict([(1, 0), (7, 10), (28, 20)])
print(d[-3]) # Bound(None, 1)
print(d[6]) # Bound(1, 7)
print(d[7]) # 10
print(d[33]) # Bound(28, None)
del d[7]
print(d[6]) # Bound(1, 28)
def bound(x, d):
if x in d:
return x
else:
for i in sorted(d):
if x > i:
l = i
for j in sorted(d, reverse=True):
if j > x:
h = j
return(l,h)
d = dict([(1, 0), (7, 10), (28, 20), (4,5), (2,5), (15,10)])
bound(17,d)
(15, 28)

Python dictionary not adding subsequent keys after the first

Fairly new to Python and I can not figure this out. I go to add a key to a dictionary and it adds it fine. I can even update that same key with a new value, however when I go to add a second key to the dictionary, it does not add the second key value pair.
class CountedSet:
def __init__(self):
self.data = {}
def __iadd__(self,other):
if isinstance(other,int):
self.data[other] = self.data.get(other, 0) + 1
return self
elif isinstance(other,CountedSet):
#TODO::iterate through second countedSet and update self
return self
def __add__(self,obj):
for key, value in obj.data.items():
if len(self.data) == 0:
self.data[key] = value
elif self.data[key]:
self.data[key] = self.data[key] + value
else:
self.data[key] = value
return self
def __getitem__(self,item):
if item in self.data:
return self.data.get(item)
else:
return None
def __str__(self):
for key, value in self.data.items():
return("{%s,%s}" % (key,value))
a = CountedSet()
a += 17
a += 4
print(a)
This simply outputs {17,1} when I would expect to see {17,1} {4,1}
Your __str__ implementation returns on the first iteration of the for-loop:
def __str__(self):
for key, value in self.data.items():
return("{%s,%s}" % (key,value)) # here
Maybe you want something like:
def __str__(self):
return " ".join([{"{%s,%s}" % (k,v) for k, v in self.data.items()])
Or, without the comprehension:
def __str__(self):
items = []
for key, value in self.data.items():
items.append("{%s,%s}" % (key,value))
return ' '.join(items)

Get key count from OrderedDict where key is a tuple

I've a dictionary such as this:
my_dict=collections.OrderedDict([((123, 1), 'qwe'), ((232, 1), 'asd'), ((234, 2), 'zxc'), ((6745, 2), 'aaa'), ((456, 3), 'bbb')])
The combination of the tuple is always unique and I would like to maintain the order of insertion, and hence OrderedDict. I've a well over ~10K items in the dict. How can I efficiently maintain a counter that gives the count of the second element in the tuple? Basically, I need to know the count whenever I would like to add/delete an item in the key. Right now I just iterate through my_dict and get the counter everytime but it seems to be very expensive to do that.
In the above example I want the output to be:
1:2 # As in 1 occurs 2 times
2:2
3:1
Right now I do the following:
from collections import OrderedDict, Counter
my_dict = OrderedDict()
my_dict[(123,1)] = 'qwe'
my_dict[(232,1)] = 'asd'
my_dict[(234,2)] = 'zxc'
my_dict[(6745,2)] = 'aaa'
my_dict[(456,3)] = 'bbb'
cnt = []
for item in my_dict.keys():
cnt.append(item[1])
print Counter(cnt)
I'm not sure if this is the best way but is there a way to override the the = operator and pop function, such that it adds or subtracts a count every time I do that operation?
Getting a Counter to work nicely with an OrderedDict is probably going to require some subclassing. Here's something that might work (I've only implemented __setitem__ and __getitem__, but if you'd like a more robust implementation, let me know):
import collections
class CountedOrderedDict(collections.OrderedDict):
def __init__(self, *args, **kwargs):
self.counter = collections.Counter()
super(CountedOrderedDict, self).__init__(*args, **kwargs)
def __delitem__(self, key):
super(CountedOrderedDict, self).__delitem__(key)
self.counter[key[1]] -= 1
def __setitem__(self, key, value):
if key not in self:
self.counter[key[1]] += 1
super(CountedOrderedDict, self).__setitem__(key, value)
Example usage:
>>> my_dict = CountedOrderedDict({(123,1): 'sda', (232,1) : 'bfd', (234,2) : 'csd', (6745,2) : 'ds', (456,3) : 'rd'})
>>> my_dict.counter
Counter({'1': 2, '2': 2, '3': 1})
>>> del my_dict[(123,1)]
>>> my_dict.counter
Counter({'2': 2, '1': 1, '3': 1})
>>> my_dict[(150,1)] = "asdf"
>>> my_dict.counter
Counter({'1': 2, '2': 2, '3': 1})
Here's a more general CountedOrderedDict implementation that takes a key function as a parameter.
import collections
class CountedOrderedDict(collections.OrderedDict):
def __init__(self, key=lambda k: k, *args, **kwargs):
self.counter = collections.Counter()
self.key_transform = key
super(CountedOrderedDict, self).__init__(*args, **kwargs)
def __delitem__(self, key):
super(CountedOrderedDict, self).__delitem__(key)
self.counter[self.key_transform(key)] -= 1
def __setitem__(self, key, value):
if key not in self:
self.counter[self.key_transform(key)] += 1
super(CountedOrderedDict, self).__setitem__(key, value)
For your needs, you'd instantiate it like so:
my_dict = CountedOrderedDict(key=lambda k: k[1])

Python dict like surjective multiple key → value container

I'm currently in the need for a Python container class with similar functionality like the builtin dict type. Basically what I need is a dictionary, where an arbitrary number of keys beside a primary key, which map to the very same value. However when iterating over it, it should iterate only over the (primary_key, value) pairs and only the primary key if the list of keys is requested.
If this has already been implemented I'd rather not reinvent the wheel. So is there already a module providing such a container? If not, I'm going to implement it myself.
Here is a quick implementation:
class MultipleKeyDict(dict):
__slots__ = ["_primary_keys"]
def __init__(self, arg=None, **kwargs):
self._primary_keys = {}
self.update(arg, **kwargs)
def __setitem__(self, key, value):
super(MultipleKeyDict, self).__setitem__(key, value)
self._primary_keys.setdefault(value, key)
def __delitem__(self, key):
value = self[key]
super(MultipleKeyDict, self).__delitem__(key)
if self._primary_keys[value] == key:
del self._primary_keys[value]
for k, v in super(MultipleKeyDict, self).iteritems():
if v == value:
self._primary_keys[value] = k
break
def __iter__(self):
return self.iterkeys()
def update(self, arg=None, **kwargs):
if arg is not None:
if isinstance(arg, collections.Mapping):
for k in arg:
self[k] = arg[k]
else:
for k, v in arg:
self[k] = v
for k in kwargs:
self[k] = kwargs[k]
def clear(self):
super(MultipleKeyDict, self).clear()
self._primary_keys.clear()
def iteritems(self):
for v, k in self._primary_keys.iteritems():
yield k, v
def items(self):
return list(self.iteritems())
def itervalues(self):
return self._primary_keys.iterkeys()
def values(self):
return self._primary_keys.keys()
def iterkeys(self):
return self._primary_keys.itervalues()
def keys(self):
return self._primary_keys.values()
The only messy bit is that it has to search the whole dict in case a primary key gets deleted.
I omitted copy(), pop(), popitem() and setdefault(). If you need them, you'll have to implement them yourself.
The simplest and easiest solution would be to use two dictionaries, one of which maps secondary keys to a primary key. If for some reason you need a reverse mapping, that could be included in the primary dictionary.
sec = {'one': 'blue', 'two': 'red', 'three': 'blue', # seconary keys
'blue': 'blue', 'red': 'red'} # include identity mapping for primaries
dict = {'blue': ('doll', '$9.43', ('one', 'three')),
'red': ('truck', '$14.99', ('two',)) }
record = dict[sec['two']]
print('Toy=', record[0], 'Price=', record[1])
There is now a multiple key dictionary python package.
https://pypi.python.org/pypi/multi_key_dict/1.0.2
From the link:
from multi_key_dict import multi_key_dict
k = multi_key_dict()
k[1000, 'kilo', 'k'] = 'kilo (x1000)'
print k[1000] # will print 'kilo (x1000)'
print k['k'] # will also print 'kilo (x1000)'
# the same way objects can be updated, deleted:
# and if an object is updated using one key, the new value will
# be accessible using any other key, e.g. for example above:
k['kilo'] = 'kilo'
print k[1000] # will now print 'kilo' as value was updated

Categories