Make new list of dictionaries from existing list of dictionaries? - python

I have the following list of dictionaries:
dict1 = [{"id": 1, "name": "tamara", "age":23},
{"id": 1, "name": "mia", "age":14},
{"id": 1, "name": "teo", "age":33},
{"id": 2, "name": "maya", "age":30}}
I would like to create new list of dictionaries from the existing list of dictionaries where If I have the same "id":1 three times in dict1 then don't repeat them in the list and rather have dict in a dict:
dict2 = [{"id": 1, newkey: [{"name": "tamara", "age":23},
{"name":"mia", "age":14},
{"name": "teo", "age":33}]},
{"id": 2, "name": "maya", "age":30}}
This is what I want to achieve any suggestion how?

You can use itertools.groupby:
import itertools
dict1 = [{"id": 1, "name": "tamara", "age":23}, {"id": 1, "name": "mia", "age":14}, {"id": 1, "name": "teo", "age":33}, {"id": 2, "name": "maya", "age":30}]
new_d = [[a, list(b)] for a, b in itertools.groupby(sorted(dict1, key=lambda x:x['id']), key=lambda x:x['id'])]
dict2 = [{'id':a, 'new_key':[{c:d for c, d in i.items() if c != 'id'} for i in b]} for a, b in new_d]
Output:
[{'new_key': [{'age': 23, 'name': 'tamara'}, {'age': 14, 'name': 'mia'}, {'age': 33, 'name': 'teo'}], 'id': 1}, {'new_key': [{'age': 30, 'name': 'maya'}], 'id': 2}]

Use itertools.groupby
>>> from operator import itemgetter
>>> from itertools import groupby
>>> dict1 = [{"id": 1, "name": "tamara", "age":23}, {"id": 1, "name": "mia", "age":14}, {"id": 1, "name": "teo", "age":33}, {"id": 2, "name": "maya", "age":30}]
>>> [{'id': k, 'new_key':[{k2:v2} for d in list(v) for k2,v2 in d.items() if k2!='id']} for k,v in groupby(dict1, itemgetter('id'))]
# [{'new_key': [{'age': 23}, {'name': 'tamara'}, {'age': 14}, {'name': 'mia'}, {'age': 33}, {'name': 'teo'}], 'id': 1}, {'new_key': [{'age': 30}, {'name': 'maya'}], 'id': 2}]

Related

Need help in extracting value counts from a list of dictionaries using python

I am looking to calculate the % of values against keys from a List of dictionaries and their corresponding counts along with values and their corresponding count using Python.
Used below code to extract keys and their count. Need help in expanding it to extract values and their corresponding count.
The Data looks like this:
people = [
{"name": "Tom", "age": 10, "city": "NewYork"},
{"name": "Mark", "age": 5, "country": "Japan"},
{"name": "Pam", "age": 7, "city": "London"},
{"name": "Tom", "hight": 163, "city": "California"},
{"name": "Lena", "weight": 45, "country": "Italy"},
{"name": "Ben", "age": 17, "city": "Colombo"},
{"name": "Lena", "gender": "Female", "country": "Italy"},
{"name": "Ben", "gender": "Male", "city": "Colombo"},
]
def getKeyCount(lst):
out = {}
for d in lst:
for k in d.keys():
out[k] = out.get(k, 0) + 1
return out
def getValCount(lst):
out = {}
for d in lst:
for v in d.values():
out[v] = out.get(v, 0) + 1
return out
getKeyCount(people)
# {'name': 8, 'age': 4, 'city': 5, 'country': 3,
# 'hight': 1, 'weight': 1, 'gender': 2}
getValCount(people)
# {'Tom': 2, 'NewYork': 1, 'Mark': 1, 'Japan': 1, 'Pam': 1,
# 'London': 1, 'California': 1, etc.}
I want output like this:
Name: 10
'Tom': 2, 'Mark': 3, 'Pam': 1,'Lena': 3, 'Ben': 2
City:4
'London': 1, 'California': 1, 'NewYork': 2
I am new to this, can someone help me?
Try:
from collections import Counter, defaultdict
people = [
{"name": "Tom", "age": 10, "city": "NewYork"},
{"name": "Mark", "age": 5, "country": "Japan"},
{"name": "Pam", "age": 7, "city": "London"},
{"name": "Tom", "hight": 163, "city": "California"},
{"name": "Lena", "weight": 45, "country": "Italy"},
{"name": "Ben", "age": 17, "city": "Colombo"},
{"name": "Lena", "gender": "Female", "country": "Italy"},
{"name": "Ben", "gender": "Male", "city": "Colombo"},
]
cnt = defaultdict(Counter)
for p in people:
if not isinstance(p, dict): # <-- make sure the items are dicts
continue
for k, v in p.items():
cnt[k].update([v])
for k, v in cnt.items():
print(k, sum(cnt[k].values()))
for kk, vv in v.items():
print("{}: {}".format(kk, vv), end=" ")
print("\n")
Prints:
name 8
Tom: 2 Mark: 1 Pam: 1 Lena: 2 Ben: 2
age 4
10: 1 5: 1 7: 1 17: 1
city 5
NewYork: 1 London: 1 California: 1 Colombo: 2
country 3
Japan: 1 Italy: 2
hight 1
163: 1
weight 1
45: 1
gender 2
Female: 1 Male: 1
UPDATE: Added check that values are of type dict
You can try the list.count method on every value to get the count of each value:
people = [{'name': "Tom", 'age': 10, "city" : "NewYork"},
{'name': "Mark", 'age': 5, "country" : "Japan"},
{'name': "Pam", 'age': 7, "city" : "London"},
{'name': "Tom", 'hight': 163, "city" : "California"},
{'name': "Lena", 'weight': 45, "country" : "Italy"},
{'name': "Ben", 'age': 17, "city" : "Colombo"},
{'name': "Lena", 'gender': "Female", "country" : "Italy"},
{'name': "Ben", 'gender': "Male", "city" : "Colombo"}]
def getKeyCount(lst):
out = {}
for d in lst:
for k in d:
out[k] = out.get(k, []) + [d[k]]
return out
d = getKeyCount(people)
def display(d, key):
vals = d[key]
print("Name:", len(vals))
print(', '.join(f"{val}: {vals.count(val)}" for val in set(vals)))
display(d, 'name')
display(d, 'city')
Output:
Name: 8
Mark: 1, Ben: 2, Tom: 2, Pam: 1, Lena: 2
City: 5
London: 1, NewYork: 1, Colombo: 2, California: 1

Merge dict in a Python list

Assume I have this:
[
{"name": "bob", "total": 1},
{"name": "alice", "total": 5},
{"name": "eve", "total": 2},
{"name": "bob", "total": 3},
{"name": "alice", "total": 2},
{"name": "alice", "total": 2},
]
I want to transform this list into :
[
{"name": "bob", "total": 4},
{"name": "alice", "total": 9},
{"name": "eve", "total": 2}
]
For now, I walk through the whole second list to find if the key exist for each loop of the first list.
How can I achieve this with a lower complexity?
from collections import defaultdict
a = [
{"name": "bob", "total": 1},
{"name": "alice", "total": 5},
{"name": "eve", "total": 2},
{"name": "bob", "total": 3},
{"name": "alice", "total": 2},
{"name": "alice", "total": 2},
]
# calculate the frequency of each key
freq = defaultdict(lambda: 0)
for d in a:
freq[d['name']] += d['total']
# build the result list
a = list()
for key, val in freq.items():
a.append({'name': key, 'total': val})
print(a)
If you only have two pieces of information (name and total), I would suggest changing your schema a bit. Instead of a list of dictionaries, use a single dictionary where the keys are names and the values are totals:
>>> values = [
... {"name": "bob", "total": 1},
... {"name": "alice", "total": 5},
... {"name": "eve", "total": 2},
... {"name": "bob", "total": 3},
... {"name": "alice", "total": 2},
... {"name": "alice", "total": 2},
... ]
>>> from collections import defaultdict
>>> totals_by_name = defaultdict(int)
>>> for value in values:
... totals_by_name[value["name"]] += value["total"]
...
>>> totals_by_name
defaultdict(<class 'int'>, {'bob': 4, 'alice': 9, 'eve': 2})
This can work even if you have more pieces of data that you want to look up by name (replace the integer value with a nested dictionary that stores the total as well as other data).
You can use groupby from the itertools module:
from itertools import groupby
from operator import itemgetter
# itemgetter(foo) is roughly equivalent to lambda x: x[foo]
get_name = itemgetter('name')
get_total = itemgetter('total')
lst = [
{"name": "bob", "total": 1},
{"name": "alice", "total": 5},
{"name": "eve", "total": 2},
{"name": "bob", "total": 3},
{"name": "alice", "total": 2},
{"name": "alice", "total": 2},
]
grouped = groupby(sorted(lst, key=get_name), get_name)
new_list = [{'name': k, 'total': sum(get_total(x) for x in v)} for k, v in grouped]
groupby will produce a new sequence that collects the dicts from the original list into subsequences, based on a common value of the 'name' attribute. Iterating over that lets you extract all the total values to sum up for use in a new list of dict values.
Let's say,
your_data = [
{"name": "bob", "total": 1},
{"name": "alice", "total": 5},
{"name": "eve", "total": 2},
{"name": "bob", "total": 3},
{"name": "alice", "total": 2},
{"name": "alice", "total": 2},
]
You can simply use pandas to receive the desired output.
import pandas as pd
df = pd.DataFrame(your_data)
df = df.groupby(by = 'name', as_index = False).sum('total')
result = df.to_dict(orient = 'records')
OUTPUT: [{'name': 'alice', 'total': 9}, {'name': 'bob', 'total': 4}, {'name': 'eve', 'total': 2}]

How to merge dicts in list of dicts with similar keys

I have a list of dicts like this:
list_of_dicts = [
{"id": 1, "color_positive": "green"},
{"id": 1, "color_negative": "red"},
{"id": 2, "color_positive": "blue"},
{"id": 2, "color_negative": "yellow"},
]
And I want to make:
[
{"id": 1, "color_positive": "green", "color_negative": "red"},
{"id": 2, "color_positive": "blue", "color_negative": "yellow"},
]
Are there any ways?
You can use defaultdict for this.
from collections import defaultdict
result = defaultdict(dict)
for item in list_of_dicts:
result[item["id"]].update(**item)
result = list(result.values())
print(result)
Output:
[{'id': 1, 'color_positive': 'green', 'color_negative': 'red'}, {'id': 2, 'color_positive': 'blue', 'color_negative': 'yellow'}]

Correlating to list of dictionaries in Python

I have two lists containing dictionaries:
List1 = [{"Value": "Value1", "Start": 7.11, "End": 8},
{"Value": "Value2", "Start": 16.45, "End": 20}]
List2 = [{"From":7.11, "To": 8, "Result": 0},
{"From":16.45, "To": 20 "Result": 1}
]
I need to produce a list by correlating these lists. So result will be
Result = [{"Value": "Value1", "Start": 7.11, "End": 8, Result: 0},
{"Value": "Value2", "Start": 16.45, "End": 20,Result: 1}]
This almost seem like simple table join in SQL.
How would I do it in Python?
Thanks!
You can use a nested dictionary comprehension:
List1 = [{"Value": "Value1", "Start": 7.11, "End": 8},
{"Value": "Value2", "Start": 16.45, "End": 20}]
List2 = [{"From":7.11, "To": 8, "Result": 0},
{"From":16.45, "To": 20, "Result": 1}
]
new_list = [{**a, **{'Result':b['Result']}} for a, b in zip(List1, List2)]
Output:
[{'Value': 'Value1', 'Start': 7.11, 'End': 8, 'Result': 0}, {'Value': 'Value2', 'Start': 16.45, 'End': 20, 'Result': 1}]
Since, dictionary unpacking (**) is a feature in Python3 only, you can use dict.items in Python2:
new_list = [dict(a.items()+[('Result', b['Result'])]) for a, b in zip(List1, List2)]
Output:
[{'Start': 7.11, 'End': 8, 'Result': 0, 'Value': 'Value1'}, {'Start': 16.45, 'End': 20, 'Result': 1, 'Value': 'Value2'}]

n-depth tree: set parent value based on children values

In a n-depth dict where values are set in the deepest level of a hierarchy:
{
"name": "root",
"value": None, # expected value to be 80
"children": [
{
"name": "a",
"value": None, # expected value to be 30
"children": [
{ "name": "a.1", "value": 10 },
{ "name": "a.2", "value": 20 }
]
},
{
"name": "b",
"value": None, # expected value to be 50
"children": [
{ "name": "b.1", "value": 25 },
{
"name": "b.2",
"value": None, # expected value to be 25
"children": [
{"name": "b.2.1", "value": 5},
{"name": "b.2.2", "value": 5},
{"name": "b.2.3", "value": 5},
{"name": "b.2.4", "value": 5},
{"name": "b.2.5", "value": 5}
]
}
]
}
]
}
What could be the approach to recursively set each parent value based on the result of an operation perfomed with its children value (i.e. sum)?
I finally managed to do it using the iterative level order traversal pattern (BFS), I was missing just a couple of details.
This approach works because the depth iteration order is guaranteed, so once we are getting to a node wich has children, all its sub-level children are already calculated.
The solution:
def reverseTraversal(obj):
def parentOperation(node):
out = 0
for child in node['children']:
out = out + child['value']
return out
if obj is None:
return
queue = []
stack = []
queue.append(obj)
while len(queue) > 0:
temp = queue.pop(0)
stack.append(temp)
if 'children' in temp and len(temp['children']) > 0:
for child in temp['children']:
queue.append(child)
while len(stack)>0:
node = stack.pop()
if 'children' in node and len(node['children']) > 0:
node['value'] = parentOperation(node)
# obj is the original dict
obj = reverseTraversal(obj)
print(obj)
Results in:
{
"name": "root",
"value": 80,
"children": [
{
"name": "a",
"value": 30,
"children": [
{"name": "a.1","value": 10},
{"name": "a.2","value": 20}
]
},
{
"name": "b",
"value": 50,
"children": [
{"name": "b.1","value": 25},
{
"name": "b.2",
"value": 25,
"children": [
{"name": "b.2.1","value": 5},
{"name": "b.2.2","value": 5},
{"name": "b.2.3","value": 5},
{"name": "b.2.4","value": 5},
{"name": "b.2.5","value": 5}
]
}
]
}
]
}
Given your datastructure and a list of values to update, you can use next in recursion:
def update(d, targets):
return {a:[update(i, targets) for i in b] if isinstance(b, list) else update(b, targets) if isinstance(b, dict) else next(targets) if not b else b for a, b in d.items()}
targets = [80, 30, 50, 25]
results = update(nlist, iter(targets))
Output:
{'children': [{'children': [{'name': 'a.1', 'value': 10},
{'name': 'a.2', 'value': 20}],
'name': 'a',
'value': 30},
{'children': [{'name': 'b.1', 'value': 25},
{'children': [{'name': 'b.2.1', 'value': 5},
{'name': 'b.2.2', 'value': 5},
{'name': 'b.2.3', 'value': 5},
{'name': 'b.2.4', 'value': 5},
{'name': 'b.2.5', 'value': 5}],
'name': 'b.2',
'value': 25}],
'name': 'b',
'value': 50}],
'name': 'root',
'value': 80}

Categories