So I have a list of dicts containing letters and their frequencies.
letter_freq = [
{'a': 10, 'b': 7},
{'d': 15, 'g': 8},
{'a': 12, 'q': 2}
]
I want to find all possible combinations of these dictionaries, as well as the total of their values:
perms = {
'ada': 37, 'adq': 27, 'aga': 30, 'agq': 20, 'bda': 34, 'bdq': 24, 'bga': 27, 'bgq': 17
}
I've looked at itertools.product(), but I don't see how to apply that to this specific use case. My intuition is that the easiest way to implement this is to make a recursive function, but I'm struggling to see how to add the values and the strings for the keys and make it all work.
Also, this list and the dicts can be of any length. Is there a simple way to do this that I haven't found yet? Thank you!
Solutions and Benchmark:
Yes, itertools.product works:
from itertools import product
perms = {
''.join(keys): sum(vals)
for prod in product(*map(dict.items, letter_freq))
for keys, vals in [zip(*prod)]
}
Alternatively, build the products for the keys and the values separately, so we don't have to separate them:
perms = {
''.join(keys): sum(vals)
for keys, vals in zip(product(*letter_freq),
product(*map(dict.values, letter_freq)))
}
Or fully separate their constructions (my favorite one):
keys = map(''.join, product(*letter_freq))
vals = map(sum, product(*map(dict.values, letter_freq)))
perms = dict(zip(keys, vals))
Benchmark would be interesting, I suspect my last one will be fastest of these and also faster than Samwise's.
Yet another, inspired by a glance at constantstranger's (but much faster than theirs in some initial benchmark):
items = [('', 0)]
for d in letter_freq:
items = [(k0+k, v0+v)
for k, v in d.items()
for k0, v0 in items]
perms = dict(items)
Benchmark:
With your example list of dicts:
6.6 μs perms1
4.5 μs perms2
4.1 μs perms3
4.0 μs perms4
11.0 μs perms_Samwise
12.7 μs perms_constantstranger
With a list of seven dicts with four items each:
15.5 ms perms1
7.6 ms perms2
5.5 ms perms3
4.8 ms perms4
27.2 ms perms_Samwise
42.2 ms perms_constantstranger
Code (Try it online!):
def perms1(letter_freq):
return {
''.join(keys): sum(vals)
for prod in product(*map(dict.items, letter_freq))
for keys, vals in [zip(*prod)]
}
def perms2(letter_freq):
return {
''.join(keys): sum(vals)
for keys, vals in zip(product(*letter_freq),
product(*map(dict.values, letter_freq)))
}
def perms3(letter_freq):
keys = map(''.join, product(*letter_freq))
vals = map(sum, product(*map(dict.values, letter_freq)))
return dict(zip(keys, vals))
def perms4(letter_freq):
items = [('', 0)]
for d in letter_freq:
items = [(k0+k, v0+v)
for k, v in d.items()
for k0, v0 in items]
return dict(items)
def perms_Samwise(letter_freq):
return {''.join(k for k, _ in p): sum(v for _, v in p) for p in itertools.product(*(d.items() for d in letter_freq))}
def perms_constantstranger(letter_freq):
stack = [['', 0]]
[stack.append((stack[i][0] + k, stack[i][1] + v)) for row in letter_freq if (lenStack := len(stack)) for k, v in row.items() for i in range(lenStack)]
return dict(row for row in stack if len(row[0]) == len(letter_freq))
funcs = perms1, perms2, perms3, perms4, perms_Samwise, perms_constantstranger
letter_freq = [
{'a': 10, 'b': 7, 'c': 5, 'd': 2},
{'d': 15, 'g': 8, 'j': 6, 'm': 3},
{'a': 12, 'q': 2, 'x': 1, 'z': 4},
{'a': 10, 'b': 7, 'c': 5, 'd': 2},
{'d': 15, 'g': 8, 'j': 6, 'm': 3},
{'a': 12, 'q': 2, 'x': 1, 'z': 4},
{'a': 10, 'b': 7, 'c': 5, 'd': 2},
]
from timeit import repeat
import itertools
from itertools import product
expect = funcs[0](letter_freq)
for func in funcs:
result = func(letter_freq)
assert result == expect
for _ in range(3):
for func in funcs:
t = min(repeat(lambda: func(letter_freq), number=1))
print('%5.1f ms ' % (t * 1e3), func.__name__)
print()
itertools.product is indeed what you want.
>>> letter_freq = [
... {'a': 10, 'b': 7},
... {'d': 15, 'g': 8},
... {'a': 12, 'q': 2}
... ]
>>> import itertools
>>> {''.join(k for k, _ in p): sum(v for _, v in p) for p in itertools.product(*(d.items() for d in letter_freq))}
{'ada': 37, 'adq': 27, 'aga': 30, 'agq': 20, 'bda': 34, 'bdq': 24, 'bga': 27, 'bgq': 17}
If for any reason you wanted to roll your own permutations using comprehensions instead of product() and map(), you could do it this way:
letter_freq = [
{'a': 10, 'b': 7},
{'d': 15, 'g': 8},
{'a': 12, 'q': 2}
]
stack = [['', 0]]
[stack.append((stack[i][0] + k, stack[i][1] + v)) for row in letter_freq if (lenStack := len(stack)) for k, v in row.items() for i in range(lenStack)]
perms = dict(row for row in stack if len(row[0]) == len(letter_freq))
print(perms)
Output:
{'ada': 37, 'bda': 34, 'aga': 30, 'bga': 27, 'adq': 27, 'bdq': 24, 'agq': 20, 'bgq': 17}
Related
I am new to the python and dealing with dictionaries.
I am getting 10 dictionaries like this
d1 = {'a': 13, 'b':4.53, 'c':3243, 'd':0.2323}
d2 = {'a': 12, 'b':4.3, 'c':373, 'd':0.263}
…
dn = {'a': 16, 'b':9.53, 'c':76843, 'd':13}
what I want to return nested dictionary which will contain min and max for each of the values. like this
d_out = {'a': {'min': 12,'max': 16},
'b': {'min': 4.3,'max': 9.53},
'c': {'min': 373,'max': 76843},
'd': {'min': 0.2323,'max': 13},
}
Assuming the following input:
ds = [{'a': 13, 'b':4.53, 'c':3243, 'd':0.2323},
{'a': 12, 'b':4.3, 'c':373, 'd':0.263},
{'a': 16, 'b':9.53, 'c':76843, 'd':13},]
## or
# ds = [d1, d2, d3, ...]
You can use a dictionary comprehension with a bit of zip/map help:
dict(zip(ds[0],
map(lambda x: {'min': min(x), 'max': max(x)},
zip(*(d.values()
for d in ds)))))
output:
{'a': {'min': 12, 'max': 16},
'b': {'min': 4.3, 'max': 9.53},
'c': {'min': 373, 'max': 76843},
'd': {'min': 0.2323, 'max': 13}}
comparison of this approach and the nice Counter trick from #Dani. Timing on 30k elements:
# dictionary comprehension
5.09 ms ± 202 µs per loop
# Counter trick
112 ms ± 2.65 ms per loop
One approach is to use collections.Counter and take advantage of the fact that intersection computes the min, and union computes the max:
from collections import Counter
from operator import and_, or_
from functools import reduce
# toy data
dict1 = {'a': 13, 'b': 4.53, 'c': 3243, 'd': 0.2323}
dict2 = {'a': 12, 'b': 4.3, 'c': 373, 'd': 0.263}
dict3 = {'a': 16, 'b': 9.53, 'c': 76843, 'd': 13}
# conver to Counter
d = list(Counter(**di) for di in [dict1, dict2, dict3])
# find intersection i.e min
mi = reduce(and_, d)
# find union i.e max
ma = reduce(or_, d)
# build result dictionary
res = {key: {"min": value, "ma": ma[key]} for key, value in mi.items()}
print(res)
Output
{'a': {'min': 12, 'ma': 16}, 'b': {'min': 4.3, 'ma': 9.53}, 'c': {'min': 373, 'ma': 76843}, 'd': {'min': 0.2323, 'ma': 13}}
dict1 = {'a': 10, 'b': 8, 'c':5}
dict2 = {'d': 6, 'c': 4, 'a':20}
Given two dictionaries, I'd like an output of.
output = {'a':30, 'b':8, 'c':9, 'd':6}
This is what I've so far, not quite sure what I'd do next.
I'm looking for a solution that is efficient in time/space complexity.
def merge_dict(dict1, dict2):
merged_dictionaries = {**dict1, **dict2}
return merged_dictionaries
dict1 = {'a': 10, 'b': 8, 'c':5}
dict2 = {'d': 6, 'c': 4, 'a':20}
merge_dictionaries = merge_dict (dict1, dict2)
sorted_dictionary = sorted(merge_dictionaries)
If the values are numeric, you can use counters:
from collections import Counter
def merge_dicts(*dicts):
return dict(sum(map(Counter, dicts), Counter()))
dict1 = merge_dicts(dict1, dict2)
dict1
# {'a': 30, 'b': 8, 'c': 9, 'd': 6}
This might be a bit excessive for only two dictionaries, so another option is:
for k, v in dict2.items():
dict1[k] = dict1.setdefault(k, 0) + v
dict1
# {'a': 30, 'b': 8, 'c': 9, 'd': 6}
Which updates dict1 in-place.
Finally, if you really need the result sorted (python3.7+), use
result = {k : dict1[k] for k in sorted(dict1)}
You can use a dict comprehension that iterates over a sorted union of the keys of the two dicts, and outputs values that are sums of the respective values of two dicts by the given keys, defaulting to 0:
{k: dict1.get(k, 0) + dict2.get(k, 0) for k in sorted(dict1.keys() | dict2.keys())}
This returns:
{'a': 30, 'b': 8, 'c': 9, 'd': 6}
result = dict(Counter(dict1) + Counter(dict2))
result = {k: result[k] for k in sorted(result)}
First merge the dicts together by turning them into Counters and convert the result it back into a dict, then sort the dict by keys.
You can Try Collections for add two dictionary..
from collections import Counter
def merged_dic():
dict1 = {'a': 10, 'b': 8, 'c':5}
dict2 = {'d': 6, 'c': 4, 'a':20}
a = Counter(dict1)
b = Counter(dict2)
c = a+b
print(dict(c))
merged_dic()
Output:- {'a': 30, 'b': 8, 'c': 9, 'd': 6}
Assuming that there are two python list with the same structure like this:
var1 = [{'a':1,'b':2},{'c':2,'d':5,'h':4},{'c':2,'d':5,'e':4}]
var2 = [{'a':3,'b':2},{'c':1,'d':5,'h':4},{'c':5,'d':5,'e':4}]
In my case, i need to combine both of those list, so i'll get this value :
result = [{'a':4,'b':4},{'c':3,'d':10,'h':8},{'c':7,'d':10,'e':8}]
How can i do that?
zip-based one-liner comprehension:
result = [{k: d1[k]+d2[k] for k in d1} for d1, d2 in zip(var1, var2)]
This assumes that two dicts at the same index always have identical key sets.
Use list comprehensions to put the code in one line,
result = [{key : d1.get(key, 0)+d2.get(key, 0)
for key in set(d1.keys()) | set(d2.keys())} # union two sets
for d1, d2 in zip(var1, var2)]
print(result)
[{'a': 4, 'b': 4}, {'h': 8, 'c': 3, 'd': 10}, {'c': 7, 'e': 8, 'd': 10}]
This code takes into consideration the case that two dictionaries may not have the same keys.
var1 = [{'a':1,'b':2},{'c':2,'d':5,'h':4},{'c':2,'d':5,'e':4}]
var2 = [{'a':3,'b':2},{'c':1,'d':5,'h':4},{'c':5,'d':5,'e':4}]
res = []
for i in range(len(var1)):
dic = {}
dic1, dic2 = var1[i], var2[i]
for key, val in dic1.items(): // dic1.iteritems() in python 2.
dic[key] = dic1[key] + dic2[key]
res.append(dic)
>>>print(res)
[{'a': 4, 'b': 4}, {'c': 3, 'd': 10, 'h': 8}, {'c': 7, 'd': 10, 'e': 8}]
var1 = [{'a': 1, 'b': 2}, {'c': 2, 'd': 5, 'h': 4}, {'c': 2, 'd': 5, 'e': 4}]
var2 = [{'a': 3, 'b': 2}, {'c': 1, 'd': 5, 'h': 4}, {'c': 5, 'd': 5, 'e': 4}]
ret = []
for i, ele in enumerate(var1):
d = {}
for k, v in ele.items():
value = v
value += var2[i][k]
d[k] = value
ret.append(d)
print(ret)
For the sake of completeness, another zip-based one-liner that will work even if the dicts are uneven in the both lists:
result = [{k: d1.get(k, 0) + d2.get(k, 0) for k in set(d1) | set(d2)} for d1, d2 in zip(var1, var2)]
Would something like this help?
ar1 = [{'a':1,'b':2},{'c':2,'d':5,'h':4},{'c':2,'d':5,'e':4}]
var2 = [{'a':3,'b':2},{'c':1,'d':5,'h':4},{'c':5,'d':5,'e':4}]
combined_var = zip(var1, var2)
new_d = {}
list_new_ds = []
for i, j in combined_var:
new_d = {}
for key in i and j:
new_d[key] = i[key] + j[key]
list_new_ds.append(new_d)
list_new_ds = [{'a': 4, 'b': 4}, {'h': 8, 'c': 3, 'd': 10}, {'c': 7, 'e': 8, 'd': 10}]
To explain, the zip function merges the lists as a list of tuples. I then unpack the tuples and iterate through the keys in each dictionary and add the values for the same keys together using a new dictionary to store them. I then append the value to a list, and then re-initialise the temporary dictionary to empty before looking at the next tuple in the zipped list.
The order is different due to dictionary behaviour I believe.
I am a novice, so would appreciate any critiques of my answer!
I looking the most elegant way to get this:
{'i_1': {'a': 33, 't': 4}, 'i_2': {'a': 9, 't': 0}}
From this:
{'i_1': {'a': 33, 'b': 55, 't': 4}, 'i_2': {'a': 9, 'b': 11, 't': 0}}
Each inner dict can have a lot of a, b, ..., z keys.
for now I have this::
In [3]: {k:dict(a=d[k]['a'], t=d[k]['t']) for k in d.keys()}
Out[3]: {'i_1': {'a': 33, 't': 4}, 'i_2': {'a': 9, 't': 0}}
but it's not very elegant
You can make your code a little bit more readable by using items instead of keys:
{k: dict(a=v['a'], t=v['t']) for k, v in d.items())
Here you go. This functions takes a dict in a format you specified and a list of keys that have to be removed from inner dictionaries:
def remove_inner_keys(data: dict, inner_keys_to_remove: list) -> dict:
result = dict()
for outer_key in data.keys():
partial_result = dict()
for inner_key in data[outer_key]:
if inner_key not in inner_keys_to_remove:
partial_result[inner_key] = data[outer_key][inner_key]
result[outer_key] = partial_result
return result
Testing:
data = { 'i_1': { 'a': 33, 'b': 55, 't': 4 }, 'i_2': { 'a': 9, 'b': 11, 't': 0 } }
print(str(remove_inner_keys(data, ["b"])))
output:
{'i_2': {'a': 9, 't': 0}, 'i_1': {'a': 33, 't': 4}}
import copy
def foo(d):
d_copy = copy.deepcopy(d)
for key in d_copy:
print(key, d[key])
if isinstance(d[key], dict):
foo(d[key])
if key == 'b':
d.pop(key)
If I have:
dicts = [{'a': 4,'b': 7,'c': 9},
{'a': 2,'b': 1,'c': 10},
{'a': 11,'b': 3,'c': 2}]
How can I get the maximum keys only, like this:
{'a': 11,'c': 10,'b': 7}
Use collection.Counter() objects instead, or convert your dictionaries:
from collections import Counter
result = Counter()
for d in dicts:
result |= Counter(d)
or even:
from collections import Counter
from operator import or_
result = reduce(or_, map(Counter, dicts), Counter())
Counter objects support finding the maximum per key natively through the | operation; & gives you the minimum.
Demo:
>>> result = Counter()
>>> for d in dicts:
... result |= Counter(d)
...
>>> result
Counter({'a': 11, 'c': 10, 'b': 7})
or using the reduce() version:
>>> reduce(or_, map(Counter, dicts), Counter())
Counter({'a': 11, 'c': 10, 'b': 7})
>>> dicts = [{'a': 4,'b': 7,'c': 9},
... {'a': 2,'b': 1,'c': 10},
... {'a': 11,'b': 3,'c': 2}]
>>> {letter: max(d[letter] for d in dicts) for letter in dicts[0]}
{'a': 11, 'c': 10, 'b': 7}
dicts = [{'a': 4,'b': 7,'c': 9},
{'a': 2,'b': 1,'c': 10},
{'a': 11,'b': 3,'c': 2}]
def get_max(dicts):
res = {}
for d in dicts:
for k in d:
res[k] = max(res.get(k, float('-inf')), d[k])
return res
>>> get_max(dicts)
{'a': 11, 'c': 10, 'b': 7}
Something like this should work:
dicts = [{'a': 4,'b': 7,'c': 9},
{'a': 2,'b': 1,'c': 10},
{'a': 11,'b': 3,'c': 2}]
max_keys= {}
for d in dicts:
for k, v in d.items():
max_keys.setdefault(k, []).append(v)
for k in max_keys:
max_keys[k] = max(max_keys[k])