Combine two dictionaries, concatenate string values? - python

Related: Is there any pythonic way to combine two dicts (adding values for keys that appear in both)?
I'd like to merge two string:string dictionaries, and concatenate the values. The above post recommends using collections.Counter, but it doesn't handle string concatenation.
>>> from collections import Counter
>>> a = Counter({'foo':'bar', 'baz':'bazbaz'})
>>> b = Counter({'foo':'baz'})
>>> a + b
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/collections.py", line 569, in __add__
TypeError: cannot concatenate 'str' and 'int' objects
(My guess is Counter tries to set b['baz'] to 0.)
I'd like to get a result of {'foo':'barbaz', 'baz':'bazbaz'}. Concatenation order doesn't matter to me. What is a clean, Pythonic way to do this?

Dict-comprehension:
>>> d = {'foo': 'bar', 'baz': 'bazbaz'}
>>> d1 = {'foo': 'baz'}
>>> keys = d.viewkeys() | d1.viewkeys()
>>> {k : d.get(k, '') + d1.get(k, '') for k in keys}
{'foo': 'barbaz', 'baz': 'bazbaz'}
For Python 2.6 and earlier:
>>> dict((k, d.get(k, '') + d1.get(k, '')) for k in keys)
{'foo': 'barbaz', 'baz': 'bazbaz'}
This will work for any number of dicts:
def func(*dicts):
keys = set().union(*dicts)
return {k: "".join(dic.get(k, '') for dic in dicts) for k in keys}
...
>>> d = {'foo': 'bar', 'baz': 'bazbaz'}
>>> d1 = {'foo': 'baz','spam': 'eggs'}
>>> d2 = {'foo': 'foofoo', 'spam': 'bar'}
>>> func(d, d1, d2)
{'foo': 'barbazfoofoo', 'baz': 'bazbaz', 'spam': 'eggsbar'}

Can write a generic helper, such as:
a = {'foo':'bar', 'baz':'bazbaz'}
b = {'foo':'baz'}
def concatd(*dicts):
if not dicts:
return {} # or should this be None or an exception?
fst = dicts[0]
return {k: ''.join(d.get(k, '') for d in dicts) for k in fst}
print concatd(a, b)
# {'foo': 'barbaz', 'baz': 'bazbaz'}
c = {'foo': '**not more foo!**'}
print concatd(a, b, c)
# {'foo': 'barbaz**not more foo!**', 'baz': 'bazbaz'}

One can use defaultdict to achieve this:
from collections import defaultdict
a = {'foo': 'bar', 'baz': 'bazbaz'}
b = {'foo': 'baz'}
new_dict = defaultdict(str)
for key, value in a.items():
new_dict[key] += value
for key, value in b.items():
new_dict[key] += value
print(new_dict)
# defaultdict(<class 'str'>, {'foo': 'barbaz', 'baz': 'bazbaz'})
print(dict(new_dict))
# {'foo': 'barbaz', 'baz': 'bazbaz'}
If there are many dicts to join, we could use itertools.chain.from_iterable:
from collections import defaultdict
from itertools import chain
a = {'foo': 'bar', 'baz': 'bazbaz'}
b = {'foo': 'baz'}
c = {'baz': '123'}
dicts = [a, b, c]
new_dict = defaultdict(str)
for key, value in chain.from_iterable(map(dict.items, dicts)):
new_dict[key] += value
print(dict(new_dict))
# {'foo': 'barbaz', 'baz': 'bazbaz123'}

Related

Group similar dict entries as a tuple of keys

I would like to group similar entries of a dataset.
ds = {1: 'foo',
2: 'bar',
3: 'foo',
4: 'bar',
5: 'foo'}
>>>tupelize_dict(ds)
{
(1,3,5): 'foo',
(2,4): 'bar'
}
I wrote this function, but I am sure there is something way simpler, isn't?
def tupelize_dict(data):
from itertools import chain, combinations
while True:
rounds = []
for x in combinations(data.keys(), 2):
rounds.append((x, data[x[0]], data[x[1]]))
end = True
for k, a, b in rounds:
if a == b:
k_chain = [x if isinstance(x, (tuple, list)) else [x] for x in k]
data[tuple(sorted(chain.from_iterable(k_chain)))] = a
[data.pop(r) for r in k]
end = False
break
if end:
break
return data
EDIT
I am interested in the general case where the content of the dataset can be any type of object that allows ds[i] == ds[j]:
ds = {1: {'a': {'b':'c'}},
2: 'bar',
3: {'a': {'b':'c'}},
4: 'bar',
5: {'a': {'b':'c'}}}
something like this should do the trick:
>>> from collections import defaultdict
>>> ds = {1: 'foo',
... 2: 'bar',
... 3: 'foo',
... 4: 'bar',
... 5: 'foo'}
>>>
>>> d = defaultdict(list)
>>> for k, v in ds.items():
... d[v].append(k)
...
>>> res = {tuple(v): k for k, v in d.items()}
>>> res
{(1, 3, 5): 'foo', (2, 4): 'bar'}
as well as you could do something like this.
def tupelize_dict(ds):
cache = {}
for key, value in ds.items():
cache.setdefault(value, []).append(key)
return {tuple(v): k for k, v in cache.items()}
ds = {1: 'foo',
2: 'bar',
3: 'foo',
4: 'bar',
5: 'foo'}
print(tupelize_dict(ds))
Following the answer of acushner, it is possible to make it work if I can compute a hash of the content of dataset's elements.
import pickle
from collections import defaultdict
def tupelize_dict(ds):
t = {}
d = defaultdict(list)
for k, v in ds.items():
h = dumps(ds)
t[h] = v
d[h].append(k)
return {tuple(v): t[k] for k, v in d.items()}
This solution is MUCH faster than my original proposition.
To test it I made a set of big random nested dictionary and run cProfile on both implementations:
original: 204.9 seconds
new: 6.4 seconds
EDIT:
I realized the dumps does not work with some dictionaries because the keys order can internally vary for obscure reasons (see this question)
A workaround would be to order all the dicts:
import copy
import collections
def faithfulrepr(od):
od = od.deepcopy(od)
if isinstance(od, collections.Mapping):
res = collections.OrderedDict()
for k, v in sorted(od.items()):
res[k] = faithfulrepr(v)
return repr(res)
if isinstance(od, list):
for i, v in enumerate(od):
od[i] = faithfulrepr(v)
return repr(od)
return repr(od)
def tupelize_dict(ds):
taxonomy = {}
binder = collections.defaultdict(list)
for key, value in ds.items():
signature = faithfulrepr(value)
taxonomy[signature] = value
binder[signature].append(key)
def tu(keys):
return tuple(sorted(keys)) if len(keys) > 1 else keys[0]
return {tu(keys): taxonomy[s] for s, keys in binder.items()}

Split on whitespaces in keys of dict, take value

I have a dictionary which has this form:
myDict = {'foo': bar, 'foobar baz': qux}
Now, I want to split on whitespaces in the key of the dict, make it the next key and take the value (duplicate).
myDictRev1 = {'foo': bar, 'foobar': qux, 'baz': qux}
You can use dictionary comprehensions like this:
>>> myDict = {'foo': 'bar', 'foobar baz': 'qux'}
>>> {k:v for k, v in myDict.items() for k in k.split()}
{'baz': 'qux', 'foo': 'bar', 'foobar': 'qux'}

Adding key:value pair in a dictionary within a dictionary

How do I add key: value pairs to a dictionary within a dictionary in Python?
I need to take an input of a dictionary and sort the results by the type of the key:
new_d = {'int':{}, 'float':{}, 'str':{}}
temp = {}
for key in d:
temp[key] = d[key]
print temp
if type(key) == str:
new_d['str'] = temp
temp.clear()
elif type(key) == int:
print 'int'
temp.clear()
elif type(key) == float:
print 'float'
temp.clear()
This is what I have and nothing is writing to the new_d dictionary.
Output should look like this
>>> new_d = type_subdicts({1: 'hi', 3.0: '5', 'hi': 5, 'hello': 10})
>>> new_d[int]
{1: 'hi'}
>>> new_d[float]
{3.0: '5'}
>>> new_d[str] == {'hi': 5, 'hello': 10}
True
"""
You don't need a temporary dictionary for that. You can use the types directly as keys, too.
d = {1:'a', 'c':[5], 1.1:3}
result = {int:{}, float:{}, str:{}}
for k in d:
result[type(k)][k] = d[k]
Result:
>>> result
{<class 'float'>: {1.1: 3}, <class 'str'>: {'c': [5]}, <class 'int'>: {1: 'a'}}
>>> result[float]
{1.1: 3}
If you want, you can use collections.defaultdict to automatically add keys of the necessary type if they don't yet exist, instead of hard-coding them:
import collections
d = {1:'a', 'c':[5], 1.1:3}
result = collections.defaultdict(dict)
for k in d:
result[type(k)][k] = d[k]
Result:
>>> result
defaultdict(<class 'dict'>, {<class 'float'>: {1.1: 3}, <class 'str'>: {'c': [5]}, <class 'int'>: {1: 'a'}})
>>> result[float]
{1.1: 3}

recursive access to dictionary and modification

I have the following dictionary:
my_dict = {'key1': {'key2': {'foo': 'bar'} } }
and I would like to append an entry to key1->key2->key3 with value 'blah' yielding:
my_dict = {'key1': {'key2': {'foo': 'bar', 'key3': 'blah'} } }
I am looking for a generic solution that is independent of the number of keys, i.e. key1->key2->key3->key4->key5 should work as well, even though keys from key3 on downwards do not exist. So that I get:
my_dict = {'key1': {'key2': {'foo': 'bar', 'key3': {'key4': {'key5': 'blah'} } } } }
Thanks in advance.
You can use the reduce() function to traverse a series of nested dictionaries:
def get_nested(d, path):
return reduce(dict.__getitem__, path, d)
Demo:
>>> def get_nested(d, path):
... return reduce(dict.__getitem__, path, d)
...
>>> my_dict = {'key1': {'key2': {'foo': 'bar', 'key3': {'key4': {'key5': 'blah'}}}}}
>>> get_nested(my_dict, ('key1', 'key2', 'key3', 'key4', 'key5'))
'blah'
This version throws an exception when a key doesn't exist:
>>> get_nested(my_dict, ('key1', 'nonesuch'))
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "<stdin>", line 2, in get_nested
KeyError: 'nonesuch'
but you could replace dict.__getitem__ with lambda d, k: d.setdefault(k, {}) to have it create empty dictionaries instead:
def get_nested_default(d, path):
return reduce(lambda d, k: d.setdefault(k, {}), path, d)
Demo:
>>> def get_nested_default(d, path):
... return reduce(lambda d, k: d.setdefault(k, {}), path, d)
...
>>> get_nested_default(my_dict, ('key1', 'nonesuch'))
{}
>>> my_dict
{'key1': {'key2': {'key3': {'key4': {'key5': 'blah'}}, 'foo': 'bar'}, 'nonesuch': {}}}
To set a value at a given path, traverse over all keys but the last one, then use the final key in a regular dictionary assignment:
def set_nested(d, path, value):
get_nested_default(d, path[:-1])[path[-1]] = value
This uses the get_nested_default() function to add empty dictionaries as needed:
>>> def set_nested(d, path, value):
... get_nested_default(d, path[:-1])[path[-1]] = value
...
>>> my_dict = {'key1': {'key2': {'foo': 'bar'}}}
>>> set_nested(my_dict, ('key1', 'key2', 'key3', 'key4', 'key5'), 'blah')
>>> my_dict
{'key1': {'key2': {'key3': {'key4': {'key5': 'blah'}}, 'foo': 'bar'}}}
An alternative to Martijn Pieters's excellent answer would be to use a nested defaultdict, rather than a regular dictionary:
from collections import defaultdict
nested = lambda: defaultdict(nested) # nested dictionary factory
my_dict = nested()
You can set values by using regular nested dictionary access semantics, and empty dictionaries will be created to fill the middle levels as necessary:
my_dict["key1"]["key2"]["key3"] = "blah"
This of course requires that the number of keys be known in advance when you write the code to set the value. If you want to be able to handle a variable-length list of keys, rather than a fixed number, you'll need functions to do the getting and setting for you, like in Martijn's answer.

Sort a list of Python dictionaries depending on a ordered criteria

Hi, I want to order a list of dictionaries based on an ordered criteria in the most Pythonic way. For example:
[{'foo': FOO1}, {'foo': FOO2}, {'foo': FOO10}]
The criteria is variable, for example, I want to order first by [FOO2, FOO1, FOO8, FOO10], the result would be:
[{'foo': FOO2}, {'foo': FOO1}, {'foo': FOO10}]
Then, the situation changes and now we have another criteria [FOO2, FOO10, FOO1], the result would be:
[{'foo': FOO2}, {'foo': FOO10}, {'foo': FOO1}]
Note: The criteria will always have the symbols related to key 'foo'.
Any ideas?
to_sort = [{'foo': FOO1}, {'foo': FOO2}, {'foo': FOO10}]
to_sort.sort(key=lambda x: x....)
EDIT: I figured it out:
>>> to_sort = [{'foo': FOO1}, {'foo': FOO2}, {'foo': FOO10}]
>>> criteria = [FOO10, FOO2, FOO1]
>>> to_sort.sort(key=lambda x: criteria.index(x['foo']))
>>> to_sort
[{'foo': FOO10}, {'foo': FOO2}, {'foo': FOO1}]
Kind regards
You want list.index().

Categories