merge a list of dicts by a certain key - python

I have a list which is composed of dict of same structure ,
sample = [{'a':1, 'b':2, 'c':3}, {'a':1, 'b':2, 'c':4}, {'a':2, 'b':2, 'c':5}, {'a':2, 'b':3, 'c':5}]
I want to combine them by key a, the out should be
[{'a': 1, 'd': [{'b':2, 'c':3}, {'b':2, 'c':4}]}, {'a': 2, 'd': [{'b':2, 'c':5}, {'b': 3, 'c':5}]}]

You can use itertools.groupby:
>>> from itertools import groupby
>>> result = []
>>> for key, group in groupby(sorted(sample, key=lambda x:x['a']), key=lambda x:x.pop('a')):
result.append({'a':key, 'd':[*group]})
>>> result
[{'a': 1, 'd': [{'b': 2, 'c': 3}, {'b': 2, 'c': 4}]},
{'a': 2, 'd': [{'b': 2, 'c': 5}, {'b': 3, 'c': 5}]}]
NOTE: You don't need sorted if it is guaranteed that the list of dicts come sorted by the value of key a.

Combine by key:
dict_list = [{'a':1, 'b':2, 'c':3}, {'a':1, 'b':2, 'c':4}, {'a':2, 'b':2, 'c':5}, {'a':2, 'b':3, 'c':5}]
new_dict = {}
for d in dict_list:
a = d.pop('a', None)
if new_dict.get(a):
new_dict[a].append(d)
else:
new_dict[a] = [d]
Convert to list:
final_list = [{'a': key, 'd': value} for key, value in new_dict.items()]
print(final_list)
[{'a': 1, 'd': [{'c': 3, 'b': 2}, {'c': 4, 'b': 2}]}, {'a': 2, 'd': [{'c': 5, 'b': 2}, {'c': 5, 'b': 3}]}]

sample = [{'a':1, 'b':2, 'c':3}, {'a':1, 'b':2, 'c':4}, {'a':2, 'b':2, 'c':5}, {'a':2, 'b':3, 'c':5}]
tmp = {}
for v in sample:
tmp.setdefault(v['a'], []).append(v)
del v['a']
out = [{'a': k, 'd': v} for k, v in tmp.items()]
from pprint import pprint
pprint(out)
Prints:
[{'a': 1, 'd': [{'b': 2, 'c': 3}, {'b': 2, 'c': 4}]},
{'a': 2, 'd': [{'b': 2, 'c': 5}, {'b': 3, 'c': 5}]}]

This may be a bit twisty code unfortunately, but it works:
from itertools import groupby
sample = [{'a':1, 'b':2, 'c':3},
{'a':1, 'b':2, 'c':4},
{'a':2, 'b':2, 'c':5},
{'a':2, 'b':3, 'c':5}]
main_key = "a"
print(
[{main_key:k,
"d": [{kk: vv for kk, vv in dct.items() if kk != main_key}
for dct in v]}
for k, v in groupby(sample, lambda d:d[main_key])]
)
gives:
[{'a': 1, 'd': [{'b': 2, 'c': 3}, {'b': 2, 'c': 4}]},
{'a': 2, 'd': [{'b': 2, 'c': 5}, {'b': 3, 'c': 5}]}]
(output slightly pretty-printed for readability)

An alternative solution using Pandas for your query.
import pandas as pd
sample = [{'a':1, 'b':2, 'c':3}, {'a':1, 'b':2, 'c':4}, {'a':2, 'b':2, 'c':5}, {'a':2, 'b':3, 'c':5}]
df=pd.DataFrame(sample)
This would create a DataFrame df using the above sample list. The next step would be to iterate through the GroupBy Object and create the output as required.
final_list=[]
for i, temp_df in df.groupby('a'):
temp_list=[]
for j in temp_df.index:
temp_list.append({'b':temp_df.loc[:,'b'][j],'c':temp_df.loc[:,'c'][j]})
final_list.append({'a':temp_df.loc[:,'a'][j],'d':temp_list})

Related

Cartesian product of two dict in two lists in Python

Here is my code.
>>> a = [{'a': 1}, {'b': 2}]
>>> b = [{'c': 3}, {'d': 4}]
I want to show:
[{'a':1, 'c':3}, {'b':2, 'c':3}, {'a':1, 'd':4}, {'b':2, 'd':4}]
Is there a way I can do it only with list/dict comprehension?
A one line, no import solution can consist of a lambda function:
f = lambda d, c:[c] if not d else [i for k in d[0] for i in f(d[1:], {**c, **k})]
a = [{'a': 1}, {'b': 2}]
b = [{'c': 3}, {'d': 4}]
print(f([a, b], {}))
Output:
[{'a': 1, 'c': 3}, {'a': 1, 'd': 4}, {'b': 2, 'c': 3}, {'b': 2, 'd': 4}]
However, a much cleaner solution can include itertools.product:
from itertools import product
result = [{**j, **k} for j, k in product(a, b)]
Output:
[{'a': 1, 'c': 3}, {'a': 1, 'd': 4}, {'b': 2, 'c': 3}, {'b': 2, 'd': 4}]
You can try this.
a = [{'a': 1}, {'b': 2}]
b = [{'c': 3}, {'d': 4}]
d = [ {**i, **j} for i in a for j in b ]
print(d)

How to convert list of dictionaries to dictionaries

mylist = [{'a': 1, 'b': 2}, {'c': 3, 'd': 4}, {'e': 5, 'f': 6}]
i want it as
myDict ={'a': 1, 'b': 2,'c': 3, 'd': 4,'e': 5, 'f': 6}
You can make use of ChainMap.
from collections import ChainMap
myDict = dict(ChainMap(*mylist ))
This will take each dictionary and iterate through its key value pairs in for (k,v) in elem.items() part and assign them to a new dictionary.
mylist = [{'a': 1, 'b': 2}, {'c': 3, 'd': 4}, {'e': 5, 'f': 6}]
new_dict = {k:v for elem in mylist for (k,v) in elem.items()}
print new_dict
This will replace the duplicated keys.
I would create a new dictionary, iterate over the dictionaries in mylist, then iterate over the key/value pairs in that dictionary. From there, you can add each key/value pair to myDict.
mylist = [{'a': 1, 'b': 2}, {'c': 3, 'd': 4}, {'e': 5, 'f': 6}]
myDict = {}
for Dict in mylist:
for key in Dict:
myDict[key] = Dict[key]
print(myDict)

Split python dictionary to result in all combinations of values

my_dict = {'a':[1,2], 'b':[3], 'c':{'d':[4,5], 'e':[6,7]}}
I need to derive all the combinations out of it as below.
{'a':1, 'b':3, 'c':{'d':4, 'e':6}}
{'a':1, 'b':3, 'c':{'d':4, 'e':7}}
{'a':1, 'b':3, 'c':{'d':5, 'e':6}}
{'a':1, 'b':3, 'c':{'d':5, 'e':7}}
{'a':2, 'b':3, 'c':{'d':4, 'e':6}}
and so on. There could be any level of nesting here
Please let me know how to achieve this
Something that I tried is pasted below but definitely was reaching nowhere
def gen_combinations(data):
my_list =[]
if isinstance(data, dict):
for k, v in data.iteritems():
if isinstance(v, dict):
gen_combinations(v)
elif isinstance(v, list):
for i in range(len(v)):
temp_dict = data.copy()
temp_dict[k] = v[i]
print temp_dict
my_dict = {'a':[1,2], 'b':[3], 'c':{'d':[4,5], 'e':[6,7]}}
gen_combinations(my_dict)
Which resulted in
{'a': 1, 'c': {'e': [6, 7], 'd': [4, 5]}, 'b': [3]}
{'a': 2, 'c': {'e': [6, 7], 'd': [4, 5]}, 'b': [3]}
{'e': 6, 'd': [4, 5]}
{'e': 7, 'd': [4, 5]}
{'e': [6, 7], 'd': 4}
{'e': [6, 7], 'd': 5}
{'a': [1, 2], 'c': {'e': [6, 7], 'd': [4, 5]}, 'b': 3}
from itertools import product
my_dict = {'a':[1,2], 'b':[3], 'c':{'d':[4,5], 'e':[6,7]}}
def process(d):
to_product = [] # [[('a', 1), ('a', 2)], [('b', 3),], ...]
for k, v in d.items():
if isinstance(v, list):
to_product.append([(k, i) for i in v])
elif isinstance(v, dict):
to_product.append([(k, i) for i in process(v)])
else:
to_product.append([(k, v)])
return [dict(l) for l in product(*to_product)]
for i in process(my_dict):
print(i)
Output:
{'a': 1, 'b': 3, 'c': {'e': 6, 'd': 4}}
{'a': 2, 'b': 3, 'c': {'e': 6, 'd': 4}}
{'a': 1, 'b': 3, 'c': {'e': 6, 'd': 5}}
{'a': 2, 'b': 3, 'c': {'e': 6, 'd': 5}}
{'a': 1, 'b': 3, 'c': {'e': 7, 'd': 4}}
{'a': 2, 'b': 3, 'c': {'e': 7, 'd': 4}}
{'a': 1, 'b': 3, 'c': {'e': 7, 'd': 5}}
{'a': 2, 'b': 3, 'c': {'e': 7, 'd': 5}}
Upd:
Code that works as asked here:
from itertools import product
my_dict = {'a':[1,2], 'e':[7], 'f':{'x':[{'a':[3,5]}, {'a':[4]}] } }
def process(d):
to_product = [] # [[('a', 1), ('a', 2)], [('b', 3),], ...]
for k, v in d.items():
if isinstance(v, list) and all(isinstance(i, dict) for i in v):
# specific case, when list of dicts process differently...
c = product(*list(process(i) for i in v))
to_product.append([(k, list(l)) for l in c])
elif isinstance(v, list):
to_product.append([(k, i) for i in v])
elif isinstance(v, dict):
to_product.append([(k, i) for i in process(v)])
else:
to_product.append([(k, v)])
return [dict(l) for l in product(*to_product)]
for i in process(my_dict):
print(i)
Output:
{'f': {'x': [{'a': 3}, {'a': 4}]}, 'a': 1, 'e': 7}
{'f': {'x': [{'a': 3}, {'a': 4}]}, 'a': 2, 'e': 7}
{'f': {'x': [{'a': 5}, {'a': 4}]}, 'a': 1, 'e': 7}
{'f': {'x': [{'a': 5}, {'a': 4}]}, 'a': 2, 'e': 7}
Solve it with two steps.
First replace each dict with a list of dicts generated by gen_combinations, called recursively.
Second, make the inner join between all keys. Each key has a flat list now.

How to set a value by key for a dictionary in python using the map function

I know that I can set a key-value pair by using
dict[key] = value
but I have a very long list of dicts of the type
dict = [{a:1, b:2, c:3, d:4},
{a:2, b:3, c:4, d:5},
{a:5, b:7, c:3, d:9}]
and I'd like to do something along the lines of
dict = map(lambda x: x['d'] <- x['d'] -1, dict)
how would I go about this? (This is a very simplified example so I'm not really trying to just subtract a number from all items by a particular key)
expected output would be in this case and not the general case I'm looking for
[{a:1, b:2, c:3, d:3},
{a:2, b:3, c:4, d:4},
{a:5, b:7, c:3, d:8}]
EDIT: 2
I believe the following does not work - so any similar solution would be helpful:
dict = map(lambda x: x.update(d, x[d] - 1), dict)
dicts = [{'a':1, 'b':2, 'c':3, 'd':4},
{'a':2, 'b':3, 'c':4, 'd':5},
{'a':5, 'b':7, 'c':3, 'd':9}]
for d in dicts:
d['d'] -= 1
Output:
In [94]: dicts
Out[94]:
[{'d': 3, 'b': 2, 'c': 3, 'a': 1},
{'d': 4, 'b': 3, 'c': 4, 'a': 2},
{'d': 8, 'b': 7, 'c': 3, 'a': 5}]
how about this: as exactly you said
>>> dicts = [{'a':1, 'b':2, 'c':3, 'd':4},
{'a':2, 'b':3, 'c':4, 'd':5},
{'a':5, 'b':7, 'c':3, 'd':9}]
>>> map(lambda x:x.update([('d',x['d']-1)]),dicts)
[None, None, None]
>>> dicts
[{'a': 1, 'c': 3, 'b': 2, 'd': 3}, {'a': 2, 'c': 4, 'b': 3, 'd': 4}, {'a': 5, 'c': 3, 'b': 7, 'd': 8}]
update will update the dictionary with (key,value) pair. Returns None
map is a way of transforming an iterable to a list by performing the same operation on every item from the iterable. I don't think that's what you want to do here, and it has confused you.
On the face of it (although you haven't mentioned what the real operation is that you want to perform) a simple for is all that is necessary:
dict_list = [
{'a': 1, 'b': 2, 'c': 3, 'd': 4},
{'a': 2, 'b': 3, 'c': 4, 'd': 5},
{'a': 5, 'b': 7, 'c': 3, 'd': 9},
]
for d in dict_list:
d['d'] -= 1
print(d)
output
{'a': 1, 'b': 2, 'c': 3, 'd': 3}
{'a': 2, 'b': 3, 'c': 4, 'd': 4}
{'a': 5, 'b': 7, 'c': 3, 'd': 8}
Using dict.__setitem__ and temporary list (or any other collection typer) trick:
>>> dicts = [{'a':1, 'b':2, 'c':3, 'd':4},
... {'a':2, 'b':3, 'c':4, 'd':5},
... {'a':5, 'b':7, 'c':3, 'd':9}]
>>> map(lambda d: [d.__setitem__('d', d['d'] - 1), d][1], dicts)
[{'a': 1, 'c': 3, 'b': 2, 'd': 3},
{'a': 2, 'c': 4, 'b': 3, 'd': 4},
{'a': 5, 'c': 3, 'b': 7, 'd': 8}]
Using simple for loop is moe recommended way. Especially there's a side effect in the function.
BTW, don't use dict as a variable name. It will shadows builtin function/type dict.
How about this:
my_dict = {k: f(v) for k, v in my_dict.iteritems()}
where f is whatever function you want.

Get max keys of a list of dictionaries

If I have:
dicts = [{'a': 4,'b': 7,'c': 9},
{'a': 2,'b': 1,'c': 10},
{'a': 11,'b': 3,'c': 2}]
How can I get the maximum keys only, like this:
{'a': 11,'c': 10,'b': 7}
Use collection.Counter() objects instead, or convert your dictionaries:
from collections import Counter
result = Counter()
for d in dicts:
result |= Counter(d)
or even:
from collections import Counter
from operator import or_
result = reduce(or_, map(Counter, dicts), Counter())
Counter objects support finding the maximum per key natively through the | operation; & gives you the minimum.
Demo:
>>> result = Counter()
>>> for d in dicts:
... result |= Counter(d)
...
>>> result
Counter({'a': 11, 'c': 10, 'b': 7})
or using the reduce() version:
>>> reduce(or_, map(Counter, dicts), Counter())
Counter({'a': 11, 'c': 10, 'b': 7})
>>> dicts = [{'a': 4,'b': 7,'c': 9},
... {'a': 2,'b': 1,'c': 10},
... {'a': 11,'b': 3,'c': 2}]
>>> {letter: max(d[letter] for d in dicts) for letter in dicts[0]}
{'a': 11, 'c': 10, 'b': 7}
dicts = [{'a': 4,'b': 7,'c': 9},
{'a': 2,'b': 1,'c': 10},
{'a': 11,'b': 3,'c': 2}]
def get_max(dicts):
res = {}
for d in dicts:
for k in d:
res[k] = max(res.get(k, float('-inf')), d[k])
return res
>>> get_max(dicts)
{'a': 11, 'c': 10, 'b': 7}
Something like this should work:
dicts = [{'a': 4,'b': 7,'c': 9},
{'a': 2,'b': 1,'c': 10},
{'a': 11,'b': 3,'c': 2}]
max_keys= {}
for d in dicts:
for k, v in d.items():
max_keys.setdefault(k, []).append(v)
for k in max_keys:
max_keys[k] = max(max_keys[k])

Categories