Sum values grouped by key in list of dict - python

I have a list of dicts and now I am trying to find the total jobs for each remote identifier.
In this case I am expecting for the id 64 -> 11 jobs and 68 -> 0 jobs
[{
'jobs': {
'count': 4
},
'remote_identifier': {
'id': '64'
}
}, {
'jobs': {
'count': 0
},
'remote_identifier': {
'id': '68'
}
}, {
'jobs': {
'count': 7
},
'remote_identifier': {
'id': '64'
}
}]
I already tried something like this, but I don't know how to adapt it to my needs, since that only counts the number of occurrences.
from collections import Counter
print Counter(item['remote_identifier']['id'] for item in items )

Pretty straight forward with a defaultdict. (data is your original list.)
>>> from collections import defaultdict
>>> d = defaultdict(int)
>>>
>>> for d_inner in data:
... id_ = d_inner['remote_identifier']['id']
... d[int(id_)] += d_inner['jobs']['count']
...
>>> d
defaultdict(<type 'int'>, {64: 11, 68: 0})

You can use a defaultdict to add up the counts:
from collections import defaultdict
jobs = [{
'jobs': {
'count': 4
},
'remote_identifier': {
'id': '64'
}
}, {
'jobs': {
'count': 0
},
'remote_identifier': {
'id': '68'
}
}, {
'jobs': {
'count': 7
},
'remote_identifier': {
'id': '64'
}
}]
counts = defaultdict(int)
for job in jobs:
counts[job['remote_identifier']['id']] += job['jobs']['count']
print(counts)
Output:
defaultdict(<class 'int'>, {'64': 11, '68': 0})

The simplest way is by using the itertools module, which provides the function groupby.
import itertools as it
def get_id(entry):
return entry['remote_identifier']['id']
data.sort(key=get_id)
for key, group in it.groupby(data, get_id):
print(key, sum(entry['jobs']['count'] for entry in group))
Note that groupby assumes that the data is already sorted by the key you are using to group the elements in the data.

This should do the trick:
result = {}
for i in items:
ri = i['remote_identifier']['id']
j = i['jobs']['count']
if ri in result:
result[ri] += j
else:
result[ri] = j
result
#{'68': 0, '64': 11}

Another solution is as follows:
input = [{
'jobs': {
'count': 4
},
'remote_identifier': {
'id': '64'
}
}, {
'jobs': {
'count': 0
},
'remote_identifier': {
'id': '68'
}
}, {
'jobs': {
'count': 7
},
'remote_identifier': {
'id': '64'
}
}]
res = dict()
for item in input:
if item['remote_identifier']['id'] in res:
total = res[item['remote_identifier']['id']] + item['jobs']['count']
else:
total = item['jobs']['count']
res.update({item['remote_identifier']['id']: total})
print res
output:
{'68': 0, '64': 11}

Related

Sort nested list of dict by it's dict value

I have the following structure:
d = {
'futures': {
'test': {
'nested': {
1: {
'list': [
{
'c': 'third',
'price': 3
},
{
'b': 'second',
'price': 2
},
{
'a': 'first',
'price': 1
}
]
},
2: {
'list': [
{
'f': 'sixth',
'price': 6
},
{
'e': 'fifth',
'price': 5
},
{
'd': 'fourth',
'price': 4
}
]
}
}
}
}
}
I need to order each list by price, ascending. The result should be:
d = {
'futures': {
'test': {
'nested': {
1: {
'list': [
{
'a': 'first',
'price': 1
},
{
'b': 'second',
'price': 2
},
{
'c': 'third',
'price': 3
},
]
},
2: {
'list': [
{
'd': 'fourth',
'price': 4
},
{
'e': 'fifth',
'price': 5
},
{
'f': 'sixth',
'price': 6
}
]
}
}
}
}
}
None of the questions I've found fits my needs because of this particular structure.
Is there a way to order it without having to access each previous keys? Because on my project I have cases with more nested keys before the list, so I need a dynamic solution for sorting it.
I mean, I don't know the exactly path to the list, only the list key.
Make a function to recursively traverse your dict looking for lists, and sort each one based on your criteria:
def find_and_sort_lists(d):
for value in d.values():
if isinstance(value, list):
value.sort(key = lambda nested_d: nested_d['price'])
if isinstance(value, dict):
find_and_sort_lists(value)
If it's a requirement to sort only lists whose key is actually 'list', you can use the following:
def find_and_sort_lists(d):
for key, value in d.items():
if key == 'list' and isinstance(value, list):
value.sort(key = lambda nested_d: nested_d['price'])
if isinstance(value, dict):
find_and_sort_lists(value)

How to extract group count from dictionary?

I need to get the count of groups which is same 'id' and 'name'
Input:
myd = {
"Items": [
{
"id": 1,
"name": "ABC",
"value": 666
},
{
"id": 1,
"name": "ABC",
"value": 89
},
{
"id": 2,
"name": "DEF",
"value": 111
},
{
"id": 3,
"name": "GHI",
"value": 111
}
]
}
Expected output:
The count of {'id':1, 'name': 'ABC' } is 2
The count of {'id':2, 'name': 'DEF' } is 1
The count of {'id':3, 'name': 'GHI' } is 1
for total length we can get by len(myd) for single key its len(myd['id'])
How to get the count for the combination of id and name
You can use collections.OrderedDict and set both 'id' and 'name' as tuple keys. In this way, the OrderedDict automatically groups the dictionaries with same 'id' and 'name' values in order:
myd = {'Items': [
{'id':1, 'name': 'ABC', 'value': 666},
{'id':1, 'name': 'ABC', 'value': 89},
{'id':2, 'name': 'DEF', 'value': 111 },
{'id':3, 'name': 'GHI', 'value': 111 }]
}
from collections import OrderedDict
od = OrderedDict()
for d in myd['Items']:
od.setdefault((d['id'], d['name']), set()).add(d['value'])
for ks, v in od.items():
print("The count of {{'id': {}, 'name': {}}} is {}".format(ks[0], ks[1], len(v)))
Output:
The count of {'id': 1, 'name': ABC} is 2
The count of {'id': 2, 'name': DEF} is 1
The count of {'id': 3, 'name': GHI} is 1
This is a good candidate for groupby and itemgetter usage:
from itertools import groupby
from operator import itemgetter
myd = {'Items': [
{'id': 1, 'name': 'ABC', 'value': 666},
{'id': 1, 'name': 'ABC', 'value': 89},
{'id': 2, 'name': 'DEF', 'value': 111},
{'id': 3, 'name': 'GHI', 'value': 111}]
}
grouper = itemgetter('id', 'name')
for i, v in groupby(sorted(myd['Items'], key=grouper), key=grouper):
print(f"the count for {dict(id=i[0], name=i[1])} is {len(list(v))}")

Python recursive aggregation

I am working with a nested data structure which needs to be flattened. The values need to be aggregated so totals are produced across each level of the nested data. I'm trying to do this recursively but it's not clear how best to achieve this?
The following is an example of the data I'm working with.
def get_result():
return {
"a1": {
"b1": {
"c1": {
"d1": 1,
"d2": 1,
},
"c2": {
"d3": 1,
}
},
"b2": {
"c3": {
"d4": 1
}
}
},
"a2": {}
}
The data I'd like to produce would be as follows:
[
{
"key": "a1",
"total": 4
},
{
"key": "b1",
"total": 3
},
{
"key": "c1",
"total": 2
},
{
"key": "d1",
"total": 1
},
{
"key": "d2",
"total": 1
}
{
"key": "c2",
"total": 1
},
{
"key": "d3",
"total": 1
},
{
"key": "b2",
"total": 1
},
{
"key": "c3",
"total": 1
},
{
"key": "d4",
"total": 1
}
]
You can use recursion
from collections import defaultdict
def agg(data):
result = defaultdict(int)
agg_sum = 0
for k, v in data.items():
if isinstance(v, dict):
d, sub = agg(v)
if sub:
result.update(d)
result[k] += sub
agg_sum += sub
else:
result[k] += v
agg_sum += v
return result, agg_sum
You can use a recursive generator function for a shorter solution:
d = {'a1': {'b1': {'c1': {'d1': 1, 'd2': 1}, 'c2': {'d3': 1}}, 'b2': {'c3': {'d4': 1}}}, 'a2': {}}
def get_aggr(d):
return d if not isinstance(d, dict) else sum(map(get_aggr, d.values()))
def aggr_keys(d):
for a, b in d.items():
yield {'key':a, 'total':get_aggr(b)}
yield from (() if not isinstance(b, dict) else aggr_keys(b))
print(list(aggr_keys(d)))
Output:
[{'key': 'a1', 'total': 4},
{'key': 'b1', 'total': 3},
{'key': 'c1', 'total': 2},
{'key': 'd1', 'total': 1},
{'key': 'd2', 'total': 1},
{'key': 'c2', 'total': 1},
{'key': 'd3', 'total': 1},
{'key': 'b2', 'total': 1},
{'key': 'c3', 'total': 1},
{'key': 'd4', 'total': 1},
{'key': 'a2', 'total': 0}]

Create dictionary from difference of two dictionaries

Suppose, if I have a dictonary,
dictA = {
'flower':
{
'jasmine': 10,
'roses':
{
'red': 1,
'white': 2
}
},
'fruit':
{
'apple':3
}
}
and if dictA is updated (say to dictB)
dictB = {
'flower':
{
'jasmine': 10,
'roses':
{
'red': 1,
'white': 2
}
},
'fruit':
{
'apple':3,
'orange': 4
}
}
now how would I get a dictionary of only newly added items (preserving the structure}, something like,
difference(dictB, dictA) = {'fruit': {'orange': 4}}
by this way, I would avoid storing redundant items each time and instead have a smaller dictionary showing only newly added items
This kind of manipulation of dictionaries has a lot of practical uses, but unfortunately harder
Any help would be much appreciated and Thanks in advance
Use DictDiffer:
from dictdiffer import diff, patch, swap, revert
dictA = {
'flower':
{
'jasmine': 10,
'roses':
{
'red': 1,
'white': 2
}
},
'fruit':
{
'apple':3
}
}
dictB = {
'flower':
{
'jasmine': 10,
'roses':
{
'red': 1,
'white': 2
}
},
'fruit':
{
'apple':3,
'orange': 4
}
}
result = diff(dictA, dictB)
# [('add', 'fruit', [('orange', 4)])]
print(f'Diffrence :\n{list(result)}')
patched = patch(result, dictA)
# {'flower': {'jasmine': 10, 'roses': {'red': 1, 'white': 2}}, 'fruit': {'apple': 3}}
print(f'Apply diffrence :\n{patched}')

n-depth tree: set parent value based on children values

In a n-depth dict where values are set in the deepest level of a hierarchy:
{
"name": "root",
"value": None, # expected value to be 80
"children": [
{
"name": "a",
"value": None, # expected value to be 30
"children": [
{ "name": "a.1", "value": 10 },
{ "name": "a.2", "value": 20 }
]
},
{
"name": "b",
"value": None, # expected value to be 50
"children": [
{ "name": "b.1", "value": 25 },
{
"name": "b.2",
"value": None, # expected value to be 25
"children": [
{"name": "b.2.1", "value": 5},
{"name": "b.2.2", "value": 5},
{"name": "b.2.3", "value": 5},
{"name": "b.2.4", "value": 5},
{"name": "b.2.5", "value": 5}
]
}
]
}
]
}
What could be the approach to recursively set each parent value based on the result of an operation perfomed with its children value (i.e. sum)?
I finally managed to do it using the iterative level order traversal pattern (BFS), I was missing just a couple of details.
This approach works because the depth iteration order is guaranteed, so once we are getting to a node wich has children, all its sub-level children are already calculated.
The solution:
def reverseTraversal(obj):
def parentOperation(node):
out = 0
for child in node['children']:
out = out + child['value']
return out
if obj is None:
return
queue = []
stack = []
queue.append(obj)
while len(queue) > 0:
temp = queue.pop(0)
stack.append(temp)
if 'children' in temp and len(temp['children']) > 0:
for child in temp['children']:
queue.append(child)
while len(stack)>0:
node = stack.pop()
if 'children' in node and len(node['children']) > 0:
node['value'] = parentOperation(node)
# obj is the original dict
obj = reverseTraversal(obj)
print(obj)
Results in:
{
"name": "root",
"value": 80,
"children": [
{
"name": "a",
"value": 30,
"children": [
{"name": "a.1","value": 10},
{"name": "a.2","value": 20}
]
},
{
"name": "b",
"value": 50,
"children": [
{"name": "b.1","value": 25},
{
"name": "b.2",
"value": 25,
"children": [
{"name": "b.2.1","value": 5},
{"name": "b.2.2","value": 5},
{"name": "b.2.3","value": 5},
{"name": "b.2.4","value": 5},
{"name": "b.2.5","value": 5}
]
}
]
}
]
}
Given your datastructure and a list of values to update, you can use next in recursion:
def update(d, targets):
return {a:[update(i, targets) for i in b] if isinstance(b, list) else update(b, targets) if isinstance(b, dict) else next(targets) if not b else b for a, b in d.items()}
targets = [80, 30, 50, 25]
results = update(nlist, iter(targets))
Output:
{'children': [{'children': [{'name': 'a.1', 'value': 10},
{'name': 'a.2', 'value': 20}],
'name': 'a',
'value': 30},
{'children': [{'name': 'b.1', 'value': 25},
{'children': [{'name': 'b.2.1', 'value': 5},
{'name': 'b.2.2', 'value': 5},
{'name': 'b.2.3', 'value': 5},
{'name': 'b.2.4', 'value': 5},
{'name': 'b.2.5', 'value': 5}],
'name': 'b.2',
'value': 25}],
'name': 'b',
'value': 50}],
'name': 'root',
'value': 80}

Categories