How to get a random value in a very large python dictionary

How to get a random value in a very large python dictionary - python

Given a python dict with multiple million entries, what is the most efficient way to get and remove a random (k,v) pair from it?
The dict is constantly growing and the random remove function is called very often.
The most cited solution for python2 random_key = random.choice(the_dict.keys()) is way too slow as a list of all the keys is created first. With lots of elements in the dict, this solution does not work.
Another proposed solution is the_dict.popitem(), but this does not return a real random object, but depends on the internal ordering of the dict.
The third solution that is also way to slow is an iterator:
it = the_dict.iterkeys()
for i in range (random.randint(0, len(the_dict)-1)):
next(it)
random_key = next(it)
Next to the remove_random(), sometimes a the_dict.pop(x) is required for a specific key. Therefore, a simple list based secondary index does not work.
Can this problem be efficiently be solved with a dict?

A solution is to use a bidirectional map each key to an integer, to allow for randomly selecting a key by using random.randrange(0,N) to select from a range of ints which are bidirectionally mapped to the keys, where N is the number of keys.
Adding a new key simply assigns it the next higher int. Deleting a key reassigns the int for that key to the key that was assigned the previously highest int before deleting the key-value pair. Python code provided for clarity.
Python code:
def create(D): # O(len(D))
# Create the bidirectional maps from the dictionary, D
keys = D.keys()
ints = range(len(keys)
int_to_key = dict(zip(keys, ints))
key_to_int = dict(zip(ints, keys))
return (int_to_key, key_to_int)
def add(D, int_to_key, key_to_int, key, value): # O(1)
# Add key-value pair (no extra work needed for simply changing the value)
new_int = len(D)
D[key] = value
int_to_key[new_int] = key
key_to_int[key] = new_int
def remove(D, int_to_key, key_to_int, key): # O(1)
# Update the bidirectional maps then remove the key-value pair
# Get the two ints and keys.
key_int = key_to_int[key]
swap_int = len(D) - 1 # Should be the highest int
swap_key = int_to_key[swap_int]
# Update the bidirectional maps so that key now has the highest int
key_to_int[key], key_to_int[swap_key] = swap_int, key_int
int_to_key[key_int], int_to_key[swap_int] = swap_key, key
# Remove elements from dictionaries
D.remove(key)
key_to_int.remove(key)
int_to_key.remove(key)
def random_key(D, int_to_key): # O(1)
# Select a random key from the dictionary using the int_to_key map
return int_to_key[random.randrange(0, len(D))]
def remove_random(D, int_to_key, key_to_int): # O(1)
# Randomly remove a key from the dictionary via the bidirectional maps
key = random_key(D, int_to_key)
remove(D, int_to_key, key_to_int, key)
Note: Adding/removing keys from D without using the appropriate above functions will break the bidirectional map. This means it's best to implement this as a class.

No, as you've discovered, this can't be done efficiently with a plain dict. See this issue for some explanations about why implementing random.choice for sets is hard; the same arguments apply to dictionaries.
But it's possible to create a dict-like data structure that does support efficient random selection. Here's a recipe for such an object, based in part on this question and its responses. It's only a starting-point, but it supports most of the existing dict methods, many of which are conveniently filled in by the MutableMapping ABC. Depending on your needs, you may need to flesh it out a bit: for example, to be able to create a RandomChoiceDict directly from a regular dict, or to add a meaningful __repr__, etc.
Essentially, you need to maintain three structures: a list of keys, a list of corresponding values, and a dict that maps keys back to indices (the inverse of the keys list). The basic __getitem__, __setitem__ and __delitem__ operations can be simply implemented in terms of those structures, and if __len__ and __iter__ are specified, the abstract base class takes care of most of the rest.
from collections import MutableMapping
import random
class RandomChoiceDict(MutableMapping):
"""
Dictionary-like object allowing efficient random selection.
"""
def __init__(self):
# Add code to initialize from existing dictionaries.
self._keys = []
self._values = []
self._key_to_index = {}
def __getitem__(self, key):
return self._values[self._key_to_index[key]]
def __setitem__(self, key, value):
try:
index = self._key_to_index[key]
except KeyError:
# Key doesn't exist; add a new one.
index = len(self._keys)
self._key_to_index[key] = index
self._keys.append(key)
self._values.append(value)
else:
# Key already exists; overwrite the value.
self._values[index] = value
def __delitem__(self, key):
index = self._key_to_index.pop(key)
# Remove *last* indexed element, then put
# it back at position 'index' (overwriting the
# one we're actually removing) if necessary.
key, value = self._keys.pop(), self._values.pop()
if index != len(self._key_to_index):
self._keys[index] = key
self._values[index] = value
self._key_to_index[key] = index
def __len__(self):
return len(self._key_to_index)
def __iter__(self):
return iter(self._keys)
def random_key(self):
"""Return a randomly chosen key."""
if not self:
raise KeyError("Empty collection")
index = random.randrange(len(self))
return self._keys[index]
def popitem_random(self):
key = self.random_key()
value = self.pop(key)
return key, value
Example usage:
>>> d = RandomChoiceDict()
>>> for x in range(10**6): # populate with some values
... d[x] = x**2
...
>>> d.popitem_random() # remove and return random item
(132545, 17568177025)
>>> 132545 in d
False
>>> d.popitem_random()
(954424, 910925171776)

Related

Python JSON - finding elements on a dynamic path

I have to update a nested JSON object.
If I knew the specifics of which items were to be updated I could do :
json_object['basket']['items']['apple'] = 'new value'
However, my list of elements to target is dynamic.
> basket.items.apple = 'green'
> name = 'my shopping'
> basket.cost = '15.43'
I could do this by looping through elements.
Find 'basket' > then find 'items > then find 'apple' > set value
Find 'name' > set value
However, was hoping that there was a way to just reference directly/dynamicaly.
i.e. from a string 'basket.cost', build the expression :
json_object['basket']['cost']
P.s. it has to cope with lists of dictionaries too !
Any guidance appreciated :)

Once you have the string "basket.cost", you can split it on "." and it's pretty easy to drill down into json_object['basket']['cost'] using a loop. Functionally, there is no difference between doing this and doing it "directly": you are still getting the 'basket' key first, and then getting the 'cost' key from the value of json_object['basket'].
def get_element(d, path):
# This function can take the string "basket.cost", or the list ["basket", "cost"]
if isinstance(path, str):
path = path.split(".")
for p in path:
d = d[p]
return d
def set_element(d, path, value):
path = path.split(".")
dict_to_set = get_element(d, path[:-1])
key_to_set = path[-1]
dict_to_set[key_to_set] = value
set_element(json_object, "basket.items.apple", 100)
Now, this assumes all elements of your path already exist, so let's say you create a dictionary that looks like so:
json_object = {"basket": {"items": dict()}}
set_element(json_object, "basket.items.apple", 100)
set_element(json_object, "basket.cost", 10)
print(json_object)
# Output: {'basket': {'items': {'apple': 100}, 'cost': 10}}
print(get_element(json_object, "basket.cost"))
# Output: 10
If you try to access an element that doesn't already exist, you get a KeyError:
get_element(json_object, "basket.date")
# KeyError: 'date'
This also happens if you try to set a value in an element that doesn't exist:
set_element(json_object, "basket.date.day", 1)
# KeyError: 'date'
If we want to allow your function to create the dictionaries when they don't exist, we can modify the get_element function to account for this situation and add the key:
def get_element(d, path, create_missing=False):
# This function can take the string "basket.cost", or an iterable containing the elements "basket" and "cost"
if isinstance(path, str):
path = path.split(".")
for p in path:
if create_missing and p not in d:
d[p] = dict()
d = d[p]
return d
def set_element(d, path, value, create_missing=True):
path = path.split(".")
dict_to_set = get_element(d, path[:-1], create_missing)
key_to_set = path[-1]
dict_to_set[key_to_set] = value
set_element(json_object, "basket.date.day", 1)
print(json_object)
# Output: {'basket': {'items': {'apple': 100}, 'cost': 10, 'date': {'day': 1}}}

If using third party package is an option, you can try python-box. It comes with lots of options and utilities to load from json, yaml files. The implementation is optimized for speed using Cython.
from box import Box
test_data = {
"basket": {
"products": [
{"name": "apple", "colour": "green"}
],
}
}
a = Box(test_data)
a.basket.cost = 12.3
a.basket.products[0].colour = "pink"
a.basket.products.append({"name": "pineapple", "taste": "sweet"})
print(a.basket.products[1].taste)

You can get exactly what you want by overloading some python magic methods: __getattr__ and __setattr__. I'll show an example of the API to wet the appetite and then the full code:
test_data = {'basket': {'items': [{'name': 'apple', 'colour': 'green'},
{'name': 'pineapple', 'taste': 'sweet',},
],
'cost': 12.3,
},
'name': 'Other'}
o = wrap(test_data) # This wraps with the correct class, depending if it is a dict or a list
print(o.name) # Prints 'Other'
print(o.basket.items) # Prints the list of items
print(o.basket.cost) # Prints 12.3
o.basket.cost = 10.0 # Changes the cost
assert o.basket.cost == 10.0
assert len(o) == 2
assert len(o.basket.items) == 2
o.basket.items.append({'name': 'orange'})
o.basket.items[2].colour = 'yellow' # It works with lists!
assert o.basket.items[2].name == 'orange'
assert o.basket.items[2].colour == 'yellow'
# You can get a part of it and it holds a reference to the original
b = o.basket
b.type = 'groceries'
assert o.basket.type == 'groceries'
# It is also possible to create a separate wrapped part and then join:
employees = wrap({})
employees.Clara.id = 101
employees.Clara.age = 23
employees.Lucia.id = 102
employees.Lucia.age = 29
o.employees = employees
The implementation is based on special wrapper classes, one for dicts, another for lists. They all inherit from a base class. Note that the need to use super().__setattr__ instead of simply self._data is because we will override the __getattr__ and __setattr__ methods to look for the data inside _data. Of course it gives an infinite loop when you try to define _data.
from collections.abc import Mapping, Sequence, MutableSequence
class BaseWrapper:
__slots__ = ('_data')
def __init__(self, data):
super().__setattr__('_data', data)
def __repr__(self):
return f'{self.__class__.__name__}({repr(self._data)})'
The wrapper for dictionaries is the most interesting: it uses __getattr__ to look for a key in the wrapped dictionary. This allows for a very natural API: if o is a wrapped dictionary, o.entry will give the same result as o['entry']. Most of the code should be self-explanatory, there are only two tricks: the first is that __getattr__ checks if the output is a dict or list and wraps it. This allows for chaining of calls like o.basket.cost. The downside is that a new wrapper is created every call. The second trick is when setting an attribute: it checks if what is being set is a wrapped instance and un-wraps it. Thus, wrapped dictionaries can be combined and the underlying dictionary is always "clean".
class MappingWrapper(BaseWrapper):
"""Wraps a dictionary and provides the keys of the dictionary as class members.
Create new keys when they do not exist."""
def __getattr__(self, name):
# Note: these two lines allow automatic creation of attributes, e.g. in an object 'obj'
# that doesn't have an attribute 'car', the following is possible:
# >> o.car.colour = 'blue'
# And all the missing levels will be automatically created
if name not in self._data and not name.startswith('_'):
self._data[name] = {}
return wrap(self._data[name])
def __setattr__(self, name, value):
self._data[name] = unwrap(value)
# Implements standard dictionary access
def __getitem__(self, name):
return wrap(self._data[name])
def __setitem__(self, name, value):
self._data[name] = unwrap(value)
def __delitem__(self, name):
del self._data[name]
def __len__(self):
return len(self._data)
The list wrapper is simpler, no need to mess around with attribute access. The only special care we have to take is to wrap and unwrap the list elements when one is requested/set. Note that, just like with the dictionary wrapper, the same wrap and unwrap functions are used (in __getitem__/__setitem__/insert).
class ListWrapper(BaseWrapper, MutableSequence):
"""Wraps a list. Essentially, provides wrapping of elements of the list."""
def __getitem__(self, idx):
return wrap(self._data[idx])
def __setitem__(self, idx, value):
self._data[idx] = unwrap(value)
def __delitem__(self, idx):
del self._data[idx]
def __len__(self):
return len(self._data)
def insert(self, index, obj):
self._data.insert(index, unwrap(obj))
Finally, the definition of wrap, which just selects the correct wrapper based on the type of the input, and unwrap, which extracts the raw data:
def wrap(obj):
if isinstance(obj, dict):
return MappingWrapper(obj)
if isinstance(obj, list):
return ListWrapper(obj)
return obj
def unwrap(obj):
if isinstance(obj, BaseWrapper):
return obj._data
return obj
The full code can be found in this gist.
An important caveat: to keep the implementation simple, wrapper objects are created at every access. Thus using this method inside large loops may cause performance issues (per my measurements, this method of access is between 12 to 30 times slower).

I'm going to assume that you already know how to handle the value errors that will probably come up with this nested collection accessing, so I won't focus on it in my approach.
I would split this in two parts:
Traversing a nested collection according to a list of keys for each level
Getting a list of keys out of a string
The first one is quite trivial, where as you said simply looping through the keys and getting to the end of those gives you access to the collection element in question. A simple implementation of that could look something like this:
def get_nested(collection, key):
for part in key:
collection = collection[part]
return collection
def set_nested(collection, key, value):
for part in key[:-1]:
collection = collection[part]
collection[key[-1]] = value
Here the key is expected to be some iterable of keys, such as a tuple or list.
Of course that means there is an expectation that your string representing a path along the collection is already parsed. We can get to that next.
This step would also be very trivial, since one could simply expression.split(".") it. However, since you also want to be able to index nested lists along with dicts, it get's a little more complicated.
There is a tradeoff to be made here. One could simply say: "Any time that one of the items in expression.split(".") can be parsed to an int, we will do just that, and assume that it was ment as an index in a list", however naturally that isn't necessarily the case. There is nothing preventing you from using a number in string form as a key in a dict. However if you think this is never going to be the case for you, perhaps the you can just call it like this:
set_nested(
collection,
(int(part) if part.isdigit() else part for part in expression.split(".")),
"target value",
)
(or of course wrap it in another function like this).
However if the consideration of using digit keys in dicts is important for you, there is another solution:
Whenever traversing the nested collection downward, we check if the collection we are currently looking at is a list. Only if it is a list, do we actually try to parse the path part as an int.
This would be the respective set_nested and get_nested functions for that:
def get_nested(collection, key: str):
for part in key.split("."):
if type(collection) == list:
part = int(part)
collection = collection[part]
return collection
def set_nested(collection, key: str, val):
key = key.split(".")
for i, part in enumerate(key):
if type(collection) == list:
part = int(part)
if i == len(key) - 1:
collection[part] = val
else:
collection = collection[part]
I believe that's the simplest solution to your problem, though of course it's important to keep in mind:
There is no error handling in this code, and indexing on dynamic paths is a topic where you are bound to run into errors. Depending on where and how you want to handle those it's going to be easy or very tedious.
There is no checking of setting values in dicts that don't exist yet, or for expanding arrays to a specific size, but since you didn't mention those that as a requirement I'm presuming it's not an issue. It might be for others reading this.

This is tricky and I would discourage it unless necessary as it is an easy thing to design and implmenet badly.
First: it's easy to split on path separator and follow the object tree to the desired key.
But after a while questions will start to appear. E.g.: what separator to split on?
A slash? It can appear in the JSON dictionary key... A dot? Same.
We'll need to either restrict legal / handled paths or implement some kind of escaping mechanism.
How do you handle empty strings?
Another goal: handle lists... Ok. So how do we interpret a path a.0? Is it ['a'][0] or ['a']['0'] ?
It seem that we'll have to complicate the language or drop the requirement.
So, in general -- I'd avoid it. Ultimately here's a quick implementation which
desing choices may or may not satisfy you:
there's basic backslash escaping of path separator
empty string is allowed as a key
lists are not handled due to ambiguity
def deep_set(root: dict, path: str, value):
segments = [*iter_segments(path, '.')]
for k in segments[:-1]:
root = root[k]
root[segments[-1]] = value
def iter_segments(path: str, separator: str = '.'):
segment = ''
path_iter = iter(path)
while True:
c = next(path_iter, '')
if c in ('.', ''):
yield segment
segment = ''
if c == '':
break
continue
elif '\\' == c:
c = next(path_iter, '')
segment += c

how to safely append to a dictionary of dictionaries

I know there's a similar question:
How to append to a dictionary of dictionaries
but the answers aren't working for me. My problem is as follows. If I need to add a new key: value pair to a Python dictionary, then
my_dict[key] = value
is always safe (as long as key is not a mutable type), whether my_dict had been already initialized or not.
However, if I want my_dict to be a dictionary of dictionaries, then
my_dict[keyA][keyB] = value
doesn't work, unless I already initialized my_dict[keyA] as an empty dictionary. So what I'm doing right now is:
class dict_of_dict():
def __init__(self):
self.ddict = {}
def update(self, keyA, keyB, value):
if not(keyA in self.ddict.keys()):
self.ddict[keyA] = {}
self.ddict[keyA][keyB] = value
a = dict_of_dict()
a.update(0, 3, "foobar")
a.ddict
This works, but I feel like it's overkill. Is there a more compact/Pythonic but still readable solution?

Multi-level defaultdict with variable depth and with list and int type

I am trying to create a multi-level dict with variable depth and with list and int type.
Data structure is like below
A
--B1
-----C1=1
-----C2=[1]
--B2=[3]
D
--E
----F
------G=4
In the case of above data structure, the last value can be an int or list.
If the above data structure has the only int then I can be easily achieved by using the below code:
from collections import defaultdict
f = lambda: defaultdict(f)
d = f()
d['A']['B1']['C1'] = 1
But as the last value has both list and int, it becomes a bit problematic for me.
Now we can insert data in a list using two ways.
d['A']['B1']['C2']= [1]
d['A']['B1']['C2'].append([2])
But when I am using only the append method it is causing the error.
Error is:
AttributeError: 'collections.defaultdict' object has no attribute 'append'
so Is there any way to use only the append method for a list?

There's no way you can use your current defaultdict-based structure to make d['A']['B1']['C2'].append(1) work properly if the 'C2' key doesn't already exist, since the data structure can't tell that the unknown key should correspond to a list rather than another layer of dictionary. It doesn't know what method you're going to call on the value it returns, so it can't know it shouldn't return a dictionary (like it did when it first looked up 'A' and 'B').
This isn't an issue for bare integers, since for those you're as assigning directly to a new key (and all the earlier levels are dictionaries). When you're assigning, the data structure isn't creating the value, you are, so you can use any type you want.
Now, if your keys are distinctive in some way, so that given a key like 'C2' you can know for sure that it should correspond to a list, you may have a chance. You can write your own dict subclass, defining a __missing__ method to handle lookups of keys that don't exist yet in your own special way:
def Tree(dict):
def __missing__(self, key):
if key_corresponds_to_list(key): # magic from somewhere
result = self[key] = []
else:
result = self[key] = Tree()
return result
# you might also want a custom __repr__
Here's an example run with a magic key function that makes any even-length key default to a list, while an odd-length key defaults to a dict:
> def key_corresponds_to_list(key):
return len(key) % 2 == 0
> t = Tree()
> t["A"]["B"]["C2"].append(1) # the default value for C2 is a list because it's even length
> t
{'A': {'B': {'C2': [1]}}}
> t["A"]["B"]["C10"]["D"] = 2 # C10's another layer of dict, since it's length is odd
> t
{'A': {'B': {'C10': {'D': 2}, 'C2': [1]}}} # it didn't matter what length D was though
You probably won't actually want to use a global function to control the class like this, I just did that as an example. If you go with this approach, I'd suggest putting the logic directly into the __missing__ method (or maybe passing a function as a parameter, like defaultdict does with its factory function).

Unique constant reference

Let's take as an example the following code :
ALL = "everything"
my_dict = {"random":"values"}
def get_values(keys):
if keys is None:
return {}
if keys is ALL:
return my_dict
if not hasattr(keys, '__iter__')
keys = [keys]
return {key: my_dict[key] for key in keys}
The function get_values returns a dict with the given key, or keys if the parameter is an iterable, an empty dictionary if the parameter is None or the whole dictionary if the parameter is the constant ALL.
The problem with this happens when you would want to return a key called "everything". Python might use the same reference for ALL and the parameter (since they're both the same immutable), which would make the keys is ALL expression True. The function will therefore return the whole dict, so not the intended behavior.
It would be possible to assign ALL to an instance object of a class defined specifically for that purpose, or to use the type method to generate an object inline, which would make ALL a unique reference. Both solutions seem a little overkill though.
I could also use a flag in the function declaration (i.e. : def get_values(keys, all=False)), but then I can always derive the value of a parameter from the other (if all is True, then keys is None, if keys is not None, then All is not False), so it seems overly verbose.
What is your opinion on the previously mentioned techniques, and do you see other possible ways of fixing this ?

Don't use a value that could be (without extreme effort) a valid key as the sentinel.
ALL = object()
However, it seems much simpler to define the function to take a (possibly empty) sequence of keys.
def get_values(keys=None):
if keys is None:
keys = []
rv = {}
for key in keys:
# Keep in mind, this is a reference to
# an object in my_dict, not a copy. Also,
# you may want to handle keys not found in my_dict:
# ignore them, or set rv[key] to None?
rv[key] = my_dict[key]
return rv
d1 = get_all_values() # Empty dict
d2 = get_all_values([]) # Explicitly empty dict
d3 = get_all_values(["foo", "bar"]) # (Sub)set of values
d4 = get_all_values(my_dict) # A copy of my_dict
In the last case, we take advantage of the fact that get_all_values can take any iterable, and an iterator over a dict iterates over its keys.

Python: update a list of tuples... fastest method

This question is in relation to another question asked here:
Sorting 1M records
I have since figured out the problem I was having with sorting. I was sorting items from a dictionary into a list every time I updated the data. I have since realized that a lot of the power of Python's sort resides in the fact that it sorts data more quickly that is already partially sorted.
So, here is the question. Suppose I have the following as a sample set:
self.sorted_records = [(1, 1234567890), (20, 1245678903),
(40, 1256789034), (70, 1278903456)]
t[1] of each tuple in the list is a unique id. Now I want to update this list with the follwoing:
updated_records = {1245678903:45, 1278903456:76}
What is the fastest way for me to do so ending up with
self.sorted_records = [(1, 1234567890), (45, 1245678903),
(40, 1256789034), (76, 1278903456)]
Currently I am doing something like this:
updated_keys = updated_records.keys()
for i, record in enumerate(self.sorted_data):
if record[1] in updated_keys:
updated_keys.remove(record[1])
self.sorted_data[i] = (updated_records[record[1]], record[1])
But I am sure there is a faster, more elegant solution out there.
Any help?
* edit
It turns out I used bad exaples for the ids since they end up in sorted order when I do my update. I am actually interested in t[0] being in sorted order. After I do the update I was intending on resorting with the updated data, but it looks like bisect might be the ticket to insert in sorted order.
end edit *

You're scanning through all n records. You could instead do a binary search, which would be O(log(n)) instead of O(n). You can use the bisect module to do this.

Since apparently you don't care about the ending value of self.sorted_records actually being sorted (you have values in order 1, 45, 20, 76 -- that's NOT sorted!-), AND you only appear to care about IDs in updated_records that are also in self.sorted_data, a listcomp (with side effects if you want to change the updated_record on the fly) would serve you well, i.e.:
self.sorted_data = [(updated_records.pop(recid, value), recid)
for (value, recid) in self.sorted_data]
the .pop call removes from updated_records the keys (and corresponding values) that are ending up in the new self.sorted_data (and the "previous value for that recid", value, is supplied as the 2nd argument to pop to ensure no change where a recid is NOT in updated_record); this leaves in updated_record the "new" stuff so you can e.g append it to self.sorted_data before re-sorting, i.e I suspect you want to continue with something like
self.sorted_data.extend(value, recid
for recid, value in updated_records.iteritems())
self.sorted_data.sort()
though this part DOES go beyond the question you're actually asking (and I'm giving it only because I've seen your previous questions;-).

You'd probably be best served by some form of tree here (preserving sorted order while allowing O(log n) replacements.) There is no builtin balanaced tree type, but you can find many third party examples. Alternatively, you could either:
Use a binary search to find the node. The bisect module will do this, but it compares based on the normal python comparison order, whereas you seem to be sorted based on the second element of each tuple. You could reverse this, or just write your own binary search (or simply take the code from bisect_left and modify it)
Use both a dict and a list. The list contains the sorted keys only. You can wrap the dict class easily enough to ensure this is kept in sync. This allows you fast dict updating while maintaining sort order of the keys. This should prevent your problem of losing sort performance due to constant conversion between dict/list.
Here's a quick implementation of such a thing:
import bisect
class SortedDict(dict):
"""Dictionary which is iterable in sorted order.
O(n) sorted iteration
O(1) lookup
O(log n) replacement ( but O(n) insertion or new items)
"""
def __init__(self, *args, **kwargs):
dict.__init__(self, *args, **kwargs)
self._keys = sorted(dict.iterkeys(self))
def __setitem__(self, key, val):
if key not in self:
# New key - need to add to list of keys.
pos = bisect.bisect_left(self._keys, key)
self._keys.insert(pos, key)
dict.__setitem__(self, key, val)
def __delitem__(self, key):
if key in self:
pos = bisect.bisect_left(self._keys, key)
del self._keys[pos]
dict.__delitem__(self, key)
def __iter__(self):
for k in self._keys: yield k
iterkeys = __iter__
def iteritems(self):
for k in self._keys: yield (k, self[k])
def itervalues(self):
for k in self._keys: yield self[k]
def update(self, other):
dict.update(self, other)
self._keys = sorted(dict.iterkeys(self)) # Rebuild (faster if lots of changes made - may be slower if only minor changes to large dict)
def keys(self): return list(self.iterkeys())
def values(self): return list(self.itervalues())
def items(self): return list(self.iteritems())
def __repr__(self):
return "%s(%s)" % (self.__class__.__name__, ', '.join("%s=%r" % (k, self[k]) for k in self))

Since you want to replace by dictionary key, but have the array sorted by dictionary value, you definitely need a linear search for the key. In that sense, your algorithm is the best you can hope for.
If you would preserve the old dictionary value, then you could use a binary search for the value, and then locate the key in the proximity of where the binary search lead you.

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

How to get a random value in a very large python dictionary - python

Related

Python JSON - finding elements on a dynamic path

how to safely append to a dictionary of dictionaries

Multi-level defaultdict with variable depth and with list and int type

Unique constant reference

Python: update a list of tuples... fastest method

Categories

Resources