Assume I have this:
[
{"name": "bob", "total": 1},
{"name": "alice", "total": 5},
{"name": "eve", "total": 2},
{"name": "bob", "total": 3},
{"name": "alice", "total": 2},
{"name": "alice", "total": 2},
]
I want to transform this list into :
[
{"name": "bob", "total": 4},
{"name": "alice", "total": 9},
{"name": "eve", "total": 2}
]
For now, I walk through the whole second list to find if the key exist for each loop of the first list.
How can I achieve this with a lower complexity?
from collections import defaultdict
a = [
{"name": "bob", "total": 1},
{"name": "alice", "total": 5},
{"name": "eve", "total": 2},
{"name": "bob", "total": 3},
{"name": "alice", "total": 2},
{"name": "alice", "total": 2},
]
# calculate the frequency of each key
freq = defaultdict(lambda: 0)
for d in a:
freq[d['name']] += d['total']
# build the result list
a = list()
for key, val in freq.items():
a.append({'name': key, 'total': val})
print(a)
If you only have two pieces of information (name and total), I would suggest changing your schema a bit. Instead of a list of dictionaries, use a single dictionary where the keys are names and the values are totals:
>>> values = [
... {"name": "bob", "total": 1},
... {"name": "alice", "total": 5},
... {"name": "eve", "total": 2},
... {"name": "bob", "total": 3},
... {"name": "alice", "total": 2},
... {"name": "alice", "total": 2},
... ]
>>> from collections import defaultdict
>>> totals_by_name = defaultdict(int)
>>> for value in values:
... totals_by_name[value["name"]] += value["total"]
...
>>> totals_by_name
defaultdict(<class 'int'>, {'bob': 4, 'alice': 9, 'eve': 2})
This can work even if you have more pieces of data that you want to look up by name (replace the integer value with a nested dictionary that stores the total as well as other data).
You can use groupby from the itertools module:
from itertools import groupby
from operator import itemgetter
# itemgetter(foo) is roughly equivalent to lambda x: x[foo]
get_name = itemgetter('name')
get_total = itemgetter('total')
lst = [
{"name": "bob", "total": 1},
{"name": "alice", "total": 5},
{"name": "eve", "total": 2},
{"name": "bob", "total": 3},
{"name": "alice", "total": 2},
{"name": "alice", "total": 2},
]
grouped = groupby(sorted(lst, key=get_name), get_name)
new_list = [{'name': k, 'total': sum(get_total(x) for x in v)} for k, v in grouped]
groupby will produce a new sequence that collects the dicts from the original list into subsequences, based on a common value of the 'name' attribute. Iterating over that lets you extract all the total values to sum up for use in a new list of dict values.
Let's say,
your_data = [
{"name": "bob", "total": 1},
{"name": "alice", "total": 5},
{"name": "eve", "total": 2},
{"name": "bob", "total": 3},
{"name": "alice", "total": 2},
{"name": "alice", "total": 2},
]
You can simply use pandas to receive the desired output.
import pandas as pd
df = pd.DataFrame(your_data)
df = df.groupby(by = 'name', as_index = False).sum('total')
result = df.to_dict(orient = 'records')
OUTPUT: [{'name': 'alice', 'total': 9}, {'name': 'bob', 'total': 4}, {'name': 'eve', 'total': 2}]
Related
I am looking to calculate the % of values against keys from a List of dictionaries and their corresponding counts along with values and their corresponding count using Python.
Used below code to extract keys and their count. Need help in expanding it to extract values and their corresponding count.
The Data looks like this:
people = [
{"name": "Tom", "age": 10, "city": "NewYork"},
{"name": "Mark", "age": 5, "country": "Japan"},
{"name": "Pam", "age": 7, "city": "London"},
{"name": "Tom", "hight": 163, "city": "California"},
{"name": "Lena", "weight": 45, "country": "Italy"},
{"name": "Ben", "age": 17, "city": "Colombo"},
{"name": "Lena", "gender": "Female", "country": "Italy"},
{"name": "Ben", "gender": "Male", "city": "Colombo"},
]
def getKeyCount(lst):
out = {}
for d in lst:
for k in d.keys():
out[k] = out.get(k, 0) + 1
return out
def getValCount(lst):
out = {}
for d in lst:
for v in d.values():
out[v] = out.get(v, 0) + 1
return out
getKeyCount(people)
# {'name': 8, 'age': 4, 'city': 5, 'country': 3,
# 'hight': 1, 'weight': 1, 'gender': 2}
getValCount(people)
# {'Tom': 2, 'NewYork': 1, 'Mark': 1, 'Japan': 1, 'Pam': 1,
# 'London': 1, 'California': 1, etc.}
I want output like this:
Name: 10
'Tom': 2, 'Mark': 3, 'Pam': 1,'Lena': 3, 'Ben': 2
City:4
'London': 1, 'California': 1, 'NewYork': 2
I am new to this, can someone help me?
Try:
from collections import Counter, defaultdict
people = [
{"name": "Tom", "age": 10, "city": "NewYork"},
{"name": "Mark", "age": 5, "country": "Japan"},
{"name": "Pam", "age": 7, "city": "London"},
{"name": "Tom", "hight": 163, "city": "California"},
{"name": "Lena", "weight": 45, "country": "Italy"},
{"name": "Ben", "age": 17, "city": "Colombo"},
{"name": "Lena", "gender": "Female", "country": "Italy"},
{"name": "Ben", "gender": "Male", "city": "Colombo"},
]
cnt = defaultdict(Counter)
for p in people:
if not isinstance(p, dict): # <-- make sure the items are dicts
continue
for k, v in p.items():
cnt[k].update([v])
for k, v in cnt.items():
print(k, sum(cnt[k].values()))
for kk, vv in v.items():
print("{}: {}".format(kk, vv), end=" ")
print("\n")
Prints:
name 8
Tom: 2 Mark: 1 Pam: 1 Lena: 2 Ben: 2
age 4
10: 1 5: 1 7: 1 17: 1
city 5
NewYork: 1 London: 1 California: 1 Colombo: 2
country 3
Japan: 1 Italy: 2
hight 1
163: 1
weight 1
45: 1
gender 2
Female: 1 Male: 1
UPDATE: Added check that values are of type dict
You can try the list.count method on every value to get the count of each value:
people = [{'name': "Tom", 'age': 10, "city" : "NewYork"},
{'name': "Mark", 'age': 5, "country" : "Japan"},
{'name': "Pam", 'age': 7, "city" : "London"},
{'name': "Tom", 'hight': 163, "city" : "California"},
{'name': "Lena", 'weight': 45, "country" : "Italy"},
{'name': "Ben", 'age': 17, "city" : "Colombo"},
{'name': "Lena", 'gender': "Female", "country" : "Italy"},
{'name': "Ben", 'gender': "Male", "city" : "Colombo"}]
def getKeyCount(lst):
out = {}
for d in lst:
for k in d:
out[k] = out.get(k, []) + [d[k]]
return out
d = getKeyCount(people)
def display(d, key):
vals = d[key]
print("Name:", len(vals))
print(', '.join(f"{val}: {vals.count(val)}" for val in set(vals)))
display(d, 'name')
display(d, 'city')
Output:
Name: 8
Mark: 1, Ben: 2, Tom: 2, Pam: 1, Lena: 2
City: 5
London: 1, NewYork: 1, Colombo: 2, California: 1
I have this list of dictionaries:
"ingredients": [
{
"unit_of_measurement": {"name": "Pound (Lb)", "id": 13},
"quantity": "1/2",
"ingredient": {"name": "Balsamic Vinegar", "id": 12},
},
{
"unit_of_measurement": {"name": "Pound (Lb)", "id": 13},
"quantity": "1/2",
"ingredient": {"name": "Balsamic Vinegar", "id": 12},
},
{
"unit_of_measurement": {"name": "Tablespoon", "id": 15},
"ingredient": {"name": "Basil Leaves", "id": 14},
"quantity": "3",
},
]
I want to be able to find the duplicates of ingredients (by either name or id). If there are duplicates and have the same unit_of_measurement, combine them into one dictionary and add the quantity accordingly. So the above data should return:
[
{
"unit_of_measurement": {"name": "Pound (Lb)", "id": 13},
"quantity": "1",
"ingredient": {"name": "Balsamic Vinegar", "id": 12},
},
{
"unit_of_measurement": {"name": "Tablespoon", "id": 15},
"ingredient": {"name": "Basil Leaves", "id": 14},
"quantity": "3",
},
]
How do I go about it?
Assuming you have a dictionary represented like this:
data = {
"ingredients": [
{
"unit_of_measurement": {"name": "Pound (Lb)", "id": 13},
"quantity": "1/2",
"ingredient": {"name": "Balsamic Vinegar", "id": 12},
},
{
"unit_of_measurement": {"name": "Pound (Lb)", "id": 13},
"quantity": "1/2",
"ingredient": {"name": "Balsamic Vinegar", "id": 12},
},
{
"unit_of_measurement": {"name": "Tablespoon", "id": 15},
"ingredient": {"name": "Basil Leaves", "id": 14},
"quantity": "3",
},
]
}
What you could do is use a collections.defaultdict of lists to group the ingredients by a (name, id) grouping key:
from collections import defaultdict
ingredient_groups = defaultdict(list)
for ingredient in data["ingredients"]:
key = tuple(ingredient["ingredient"].items())
ingredient_groups[key].append(ingredient)
Then you could go through the grouped values of this defaultdict, and calculate the sum of the fraction quantities using fractions.Fractions. For unit_of_measurement and ingredient, we could probably just use the first grouped values.
from fractions import Fraction
result = [
{
"unit_of_measurement": value[0]["unit_of_measurement"],
"quantity": str(sum(Fraction(ingredient["quantity"]) for ingredient in value)),
"ingredient": value[0]["ingredient"],
}
for value in ingredient_groups.values()
]
Which will then give you this result:
[{'ingredient': {'id': 12, 'name': 'Balsamic Vinegar'},
'quantity': '1',
'unit_of_measurement': {'id': 13, 'name': 'Pound (Lb)'}},
{'ingredient': {'id': 14, 'name': 'Basil Leaves'},
'quantity': '3',
'unit_of_measurement': {'id': 15, 'name': 'Tablespoon'}}]
You'll probably need to amend the above to account for ingredients with different units or measurements, but this should get you started.
I am aware that this is copy of this however, there was no answer that I could see. The asker's problem could be fixed by lists. But I do not believe that mine can. I am using nested lists for an amateur game I'm making, does anyone know how re-order the element back to ascending numerical order after I delete an element.
potions = {
1: {"name": "Potion of Bruh", "amount": 5},
2: {"name": "Potion of Epic", "amount": 10},
3: {"name": "Potion of Boi", "amount": 15},
4: {"name": "Potion of Matt", "amount": 12},
5: {"name": "Potion of Garfield", "amount": 3}
}
for i in range(1, len(potions) + 1):
if "Potion of Boi" == potions[i]["name"]:
del potions[i]
print(potions)
You are using a dictionary keyed by integers and should be using a list instead. You'll need to change how you remove elements but then the reordering will work properly:
potions = [
{"name": "Potion of Bruh", "amount": 5},
{"name": "Potion of Epic", "amount": 10},
{"name": "Potion of Boi", "amount": 15},
{"name": "Potion of Matt", "amount": 12},
{"name": "Potion of Garfield", "amount": 3}
]
idx_to_remove = None
for idx, potion in enumerate(potions):
if "Potion of Boi" == potion["name"]:
idx_to_remove = idx
break
if idx_to_remove is not None:
potions.pop(idx_to_remove)
print(potions)
use sorted function
potions = {
9: {"name": "Potion of Bruh", "amount": 5},
1: {"name": "Potion of Bruh", "amount": 5},
2: {"name": "Potion of Epic", "amount": 10},
3: {"name": "Potion of Boi", "amount": 15},
4: {"name": "Potion of Matt", "amount": 12},
5: {"name": "Potion of Garfield", "amount": 3}
}
potions.popitem()
sorted_d = dict(sorted(potions.items(),reverse=False))
print('Dictionary in ascending order by value : ',sorted_d)
I have the following list of dictionaries:
dict1 = [{"id": 1, "name": "tamara", "age":23},
{"id": 1, "name": "mia", "age":14},
{"id": 1, "name": "teo", "age":33},
{"id": 2, "name": "maya", "age":30}}
I would like to create new list of dictionaries from the existing list of dictionaries where If I have the same "id":1 three times in dict1 then don't repeat them in the list and rather have dict in a dict:
dict2 = [{"id": 1, newkey: [{"name": "tamara", "age":23},
{"name":"mia", "age":14},
{"name": "teo", "age":33}]},
{"id": 2, "name": "maya", "age":30}}
This is what I want to achieve any suggestion how?
You can use itertools.groupby:
import itertools
dict1 = [{"id": 1, "name": "tamara", "age":23}, {"id": 1, "name": "mia", "age":14}, {"id": 1, "name": "teo", "age":33}, {"id": 2, "name": "maya", "age":30}]
new_d = [[a, list(b)] for a, b in itertools.groupby(sorted(dict1, key=lambda x:x['id']), key=lambda x:x['id'])]
dict2 = [{'id':a, 'new_key':[{c:d for c, d in i.items() if c != 'id'} for i in b]} for a, b in new_d]
Output:
[{'new_key': [{'age': 23, 'name': 'tamara'}, {'age': 14, 'name': 'mia'}, {'age': 33, 'name': 'teo'}], 'id': 1}, {'new_key': [{'age': 30, 'name': 'maya'}], 'id': 2}]
Use itertools.groupby
>>> from operator import itemgetter
>>> from itertools import groupby
>>> dict1 = [{"id": 1, "name": "tamara", "age":23}, {"id": 1, "name": "mia", "age":14}, {"id": 1, "name": "teo", "age":33}, {"id": 2, "name": "maya", "age":30}]
>>> [{'id': k, 'new_key':[{k2:v2} for d in list(v) for k2,v2 in d.items() if k2!='id']} for k,v in groupby(dict1, itemgetter('id'))]
# [{'new_key': [{'age': 23}, {'name': 'tamara'}, {'age': 14}, {'name': 'mia'}, {'age': 33}, {'name': 'teo'}], 'id': 1}, {'new_key': [{'age': 30}, {'name': 'maya'}], 'id': 2}]
In a n-depth dict where values are set in the deepest level of a hierarchy:
{
"name": "root",
"value": None, # expected value to be 80
"children": [
{
"name": "a",
"value": None, # expected value to be 30
"children": [
{ "name": "a.1", "value": 10 },
{ "name": "a.2", "value": 20 }
]
},
{
"name": "b",
"value": None, # expected value to be 50
"children": [
{ "name": "b.1", "value": 25 },
{
"name": "b.2",
"value": None, # expected value to be 25
"children": [
{"name": "b.2.1", "value": 5},
{"name": "b.2.2", "value": 5},
{"name": "b.2.3", "value": 5},
{"name": "b.2.4", "value": 5},
{"name": "b.2.5", "value": 5}
]
}
]
}
]
}
What could be the approach to recursively set each parent value based on the result of an operation perfomed with its children value (i.e. sum)?
I finally managed to do it using the iterative level order traversal pattern (BFS), I was missing just a couple of details.
This approach works because the depth iteration order is guaranteed, so once we are getting to a node wich has children, all its sub-level children are already calculated.
The solution:
def reverseTraversal(obj):
def parentOperation(node):
out = 0
for child in node['children']:
out = out + child['value']
return out
if obj is None:
return
queue = []
stack = []
queue.append(obj)
while len(queue) > 0:
temp = queue.pop(0)
stack.append(temp)
if 'children' in temp and len(temp['children']) > 0:
for child in temp['children']:
queue.append(child)
while len(stack)>0:
node = stack.pop()
if 'children' in node and len(node['children']) > 0:
node['value'] = parentOperation(node)
# obj is the original dict
obj = reverseTraversal(obj)
print(obj)
Results in:
{
"name": "root",
"value": 80,
"children": [
{
"name": "a",
"value": 30,
"children": [
{"name": "a.1","value": 10},
{"name": "a.2","value": 20}
]
},
{
"name": "b",
"value": 50,
"children": [
{"name": "b.1","value": 25},
{
"name": "b.2",
"value": 25,
"children": [
{"name": "b.2.1","value": 5},
{"name": "b.2.2","value": 5},
{"name": "b.2.3","value": 5},
{"name": "b.2.4","value": 5},
{"name": "b.2.5","value": 5}
]
}
]
}
]
}
Given your datastructure and a list of values to update, you can use next in recursion:
def update(d, targets):
return {a:[update(i, targets) for i in b] if isinstance(b, list) else update(b, targets) if isinstance(b, dict) else next(targets) if not b else b for a, b in d.items()}
targets = [80, 30, 50, 25]
results = update(nlist, iter(targets))
Output:
{'children': [{'children': [{'name': 'a.1', 'value': 10},
{'name': 'a.2', 'value': 20}],
'name': 'a',
'value': 30},
{'children': [{'name': 'b.1', 'value': 25},
{'children': [{'name': 'b.2.1', 'value': 5},
{'name': 'b.2.2', 'value': 5},
{'name': 'b.2.3', 'value': 5},
{'name': 'b.2.4', 'value': 5},
{'name': 'b.2.5', 'value': 5}],
'name': 'b.2',
'value': 25}],
'name': 'b',
'value': 50}],
'name': 'root',
'value': 80}