I have a list of dicts with the same structure
[{"Program Name": "Bulldozer", "Level": 3}, {"Program Name": "Robot", "Level": 1}, {"Program Name": "Bulldozer", "Level": 4}]
What I want is duplicate keys of "Program Name" that have the same value (ex: "Bulldozer" appearing 2x) to be renamed as "Bulldozer (1)", "Bulldozer (2)" and so on.
An Efficient way is to use defaultdict to count the "Program Name", the time complexity
is O(n):
from collections import defaultdict
l = [{"Program Name": "Bulldozer", "Level": 3}, {"Program Name": "Robot", "Level": 1},
{"Program Name": "Bulldozer", "Level": 4}, {"Program Name": "Bulldozer", "Level": 4}, {"Program Name": "Robot", "Level": 1}]
tmp = defaultdict(int)
for i in l:
i["Program Name"] = f'{i["Program Name"]} ({tmp[i["Program Name"]]})' if tmp[i["Program Name"]] else i["Program Name"]
tmp[i["Program Name"].split()[0]] += 1
print(l)
Result:
[{'Program Name': 'Bulldozer', 'Level': 3}, {'Program Name': 'Robot', 'Level': 1}, {'Program Name': 'Bulldozer (1)', 'Level': 4}, {'Program Name': 'Bulldozer (2)', 'Level': 4}, {'Program Name': 'Robot (1)', 'Level': 1}]
Hope this helps:
input = [{"Program Name": "Bulldozer", "Level": 3}, {"Program Name": "Robot", "Level": 1}, {"Program Name": "Bulldozer", "Level": 4}]
def update_input(input):
existing_program_names = {}
for i, d in enumerate(input):
current_list_program_name = d['Program Name']
try:
existing_program_names[current_list_program_name] += 1
except KeyError:
# Program name not in storage yet add it
existing_program_names.update({current_list_program_name: 0})
if existing_program_names[current_list_program_name] > 0 :
ID = existing_program_names[current_list_program_name]
input[i]['Program Name'] = current_list_program_name + ' ({ID})'.format(ID=ID)
else:
pass
return input
output = update_input(input)
yields:
[{'Program Name': 'Bulldozer', 'Level': 3}, {'Program Name': 'Robot', 'Level': 1}, {'Program Name': 'Bulldozer (1)', 'Level': 4}]
You can try this too:
data = [
{"Program Name": "Bulldozer", "Level": 3},
{"Program Name": "Robot", "Level": 1},
{"Program Name": "Bulldozer", "Level": 4},
{"Program Name": "Rozer", "Level": 3},
{"Program Name": "Robot", "Level": 1},
{"Program Name": "Rozer", "Level": 3},
{"Program Name": "Bulldozer", "Level": 3},
{"Program Name": "Robot", "Level": 1},
{"Program Name": "Bulldozer", "Level": 4},
{"Program Name": "Rozer", "Level": 3},
{"Program Name": "Robot", "Level": 1},
{"Program Name": "Rozer", "Level": 3}
]
Approach: 01
import pandas as pd
c = pd.DataFrame(data)
c['group_code'] = c.groupby(['Program Name']).cumcount() + 1
c['Program Name'] = ["{0} ({1})".format(x, y) for (x, y) in c[[
'Program Name', 'group_code']].values]
output = c[['Program Name', 'Level']].to_dict(orient='records')
print(output)
Approach: 02
temp = {}
for item in data:
temp.update(
{
item['Program Name']: temp[item['Program Name']] + 1 if temp.get(item['Program Name']) else 1
}
)
item['Program Name'] = item['Program Name'] + ' (' + str(temp[item['Program Name']]) + ')'
print(data)
output:
[
{"Program Name": "Bulldozer (1)", "Level": 3},
{"Program Name": "Robot (1)", "Level": 1},
{"Program Name": "Bulldozer (2)", "Level": 4},
{"Program Name": "Rozer (1)", "Level": 3},
{"Program Name": "Robot (2)", "Level": 1},
{"Program Name": "Rozer (2)", "Level": 3},
{"Program Name": "Bulldozer (3)", "Level": 3},
{"Program Name": "Robot (3)", "Level": 1},
{"Program Name": "Bulldozer (4)", "Level": 4},
{"Program Name": "Rozer (3)", "Level": 3},
{"Program Name": "Robot (4)", "Level": 1},
{"Program Name": "Rozer (4)", "Level": 3}
]
I would recommend you to use pandas(approach 01) if you have huge amount of data.
Thanks to #jizhihaoSAMA I have managed to find a solution to my problem with a small edit
machines = [{"Program Name": "Bulldozer", "Level": 3}, {"Program Name": "Robot", "Level": 1}, {"Program Name": "Bulldozer", "Level": 4}]
tmp = defaultdict(int)
for i in machines:
name = i["Program Name"].strip(f' ({tmp[i["Program Name"]]})')
i["Program Name"] = f'{name} ({tmp[i["Program Name"]]})' if tmp[name] else i["Program Name"]
tmp[name] += 1
This disables problems with spaces in the program name or any complexed names.
Related
I am working with a nested data structure which needs to be flattened. The values need to be aggregated so totals are produced across each level of the nested data. I'm trying to do this recursively but it's not clear how best to achieve this?
The following is an example of the data I'm working with.
def get_result():
return {
"a1": {
"b1": {
"c1": {
"d1": 1,
"d2": 1,
},
"c2": {
"d3": 1,
}
},
"b2": {
"c3": {
"d4": 1
}
}
},
"a2": {}
}
The data I'd like to produce would be as follows:
[
{
"key": "a1",
"total": 4
},
{
"key": "b1",
"total": 3
},
{
"key": "c1",
"total": 2
},
{
"key": "d1",
"total": 1
},
{
"key": "d2",
"total": 1
}
{
"key": "c2",
"total": 1
},
{
"key": "d3",
"total": 1
},
{
"key": "b2",
"total": 1
},
{
"key": "c3",
"total": 1
},
{
"key": "d4",
"total": 1
}
]
You can use recursion
from collections import defaultdict
def agg(data):
result = defaultdict(int)
agg_sum = 0
for k, v in data.items():
if isinstance(v, dict):
d, sub = agg(v)
if sub:
result.update(d)
result[k] += sub
agg_sum += sub
else:
result[k] += v
agg_sum += v
return result, agg_sum
You can use a recursive generator function for a shorter solution:
d = {'a1': {'b1': {'c1': {'d1': 1, 'd2': 1}, 'c2': {'d3': 1}}, 'b2': {'c3': {'d4': 1}}}, 'a2': {}}
def get_aggr(d):
return d if not isinstance(d, dict) else sum(map(get_aggr, d.values()))
def aggr_keys(d):
for a, b in d.items():
yield {'key':a, 'total':get_aggr(b)}
yield from (() if not isinstance(b, dict) else aggr_keys(b))
print(list(aggr_keys(d)))
Output:
[{'key': 'a1', 'total': 4},
{'key': 'b1', 'total': 3},
{'key': 'c1', 'total': 2},
{'key': 'd1', 'total': 1},
{'key': 'd2', 'total': 1},
{'key': 'c2', 'total': 1},
{'key': 'd3', 'total': 1},
{'key': 'b2', 'total': 1},
{'key': 'c3', 'total': 1},
{'key': 'd4', 'total': 1},
{'key': 'a2', 'total': 0}]
Assume I have this:
[
{"name": "bob", "total": 1},
{"name": "alice", "total": 5},
{"name": "eve", "total": 2},
{"name": "bob", "total": 3},
{"name": "alice", "total": 2},
{"name": "alice", "total": 2},
]
I want to transform this list into :
[
{"name": "bob", "total": 4},
{"name": "alice", "total": 9},
{"name": "eve", "total": 2}
]
For now, I walk through the whole second list to find if the key exist for each loop of the first list.
How can I achieve this with a lower complexity?
from collections import defaultdict
a = [
{"name": "bob", "total": 1},
{"name": "alice", "total": 5},
{"name": "eve", "total": 2},
{"name": "bob", "total": 3},
{"name": "alice", "total": 2},
{"name": "alice", "total": 2},
]
# calculate the frequency of each key
freq = defaultdict(lambda: 0)
for d in a:
freq[d['name']] += d['total']
# build the result list
a = list()
for key, val in freq.items():
a.append({'name': key, 'total': val})
print(a)
If you only have two pieces of information (name and total), I would suggest changing your schema a bit. Instead of a list of dictionaries, use a single dictionary where the keys are names and the values are totals:
>>> values = [
... {"name": "bob", "total": 1},
... {"name": "alice", "total": 5},
... {"name": "eve", "total": 2},
... {"name": "bob", "total": 3},
... {"name": "alice", "total": 2},
... {"name": "alice", "total": 2},
... ]
>>> from collections import defaultdict
>>> totals_by_name = defaultdict(int)
>>> for value in values:
... totals_by_name[value["name"]] += value["total"]
...
>>> totals_by_name
defaultdict(<class 'int'>, {'bob': 4, 'alice': 9, 'eve': 2})
This can work even if you have more pieces of data that you want to look up by name (replace the integer value with a nested dictionary that stores the total as well as other data).
You can use groupby from the itertools module:
from itertools import groupby
from operator import itemgetter
# itemgetter(foo) is roughly equivalent to lambda x: x[foo]
get_name = itemgetter('name')
get_total = itemgetter('total')
lst = [
{"name": "bob", "total": 1},
{"name": "alice", "total": 5},
{"name": "eve", "total": 2},
{"name": "bob", "total": 3},
{"name": "alice", "total": 2},
{"name": "alice", "total": 2},
]
grouped = groupby(sorted(lst, key=get_name), get_name)
new_list = [{'name': k, 'total': sum(get_total(x) for x in v)} for k, v in grouped]
groupby will produce a new sequence that collects the dicts from the original list into subsequences, based on a common value of the 'name' attribute. Iterating over that lets you extract all the total values to sum up for use in a new list of dict values.
Let's say,
your_data = [
{"name": "bob", "total": 1},
{"name": "alice", "total": 5},
{"name": "eve", "total": 2},
{"name": "bob", "total": 3},
{"name": "alice", "total": 2},
{"name": "alice", "total": 2},
]
You can simply use pandas to receive the desired output.
import pandas as pd
df = pd.DataFrame(your_data)
df = df.groupby(by = 'name', as_index = False).sum('total')
result = df.to_dict(orient = 'records')
OUTPUT: [{'name': 'alice', 'total': 9}, {'name': 'bob', 'total': 4}, {'name': 'eve', 'total': 2}]
I have some data, more or less like this:
[
{"tag": "A", "level":0},
{"tag": "B", "level":1},
{"tag": "D", "level":2},
{"tag": "F", "level":3},
{"tag": "G", "level":4},
{"tag": "E", "level":2},
{"tag": "H", "level":3},
{"tag": "I", "level":3},
{"tag": "C", "level":1},
{"tag": "J", "level":2},
]
I want to turn it into a multilevel dict based on depth level (key "level"):
{
"A": {"level": 0, "children": {
"B": {"level": 1, "children": {
"D": {"level": 2, "children": {
"F": {"level": 3, "children": {
"G": {"level": 4, "children": {}}}}}},
"E": {"level": 2, "children": {
"H": {"level": 3, "children": {}},
"I": {"level": 3, "children": {}}}}}},
"C": {"level": 1, "children": {
"J": {"level": 2, "children": {}}}}}}
}
All I can come up with right now is this little piece of code... which obviously breaks after few items:
def list2multilevel(list):
children = {}
parent = list.pop(0)
tag = parent.get("Tag")
level = parent.get("Level")
for child in list:
ctag = child.get("Tag")
clevel = child.get("Level")
if clevel == level + 1:
children.update(list2multilevel(list))
elif clevel <= level:
print(clevel, level)
break
return {tag: children}
Originally sat down to it on Friday and it was supposed to be just a small exercise....
data = [
{"tag": "A", "level": 0},
{"tag": "B", "level": 1},
{"tag": "D", "level": 2},
{"tag": "F", "level": 3},
{"tag": "G", "level": 4},
{"tag": "E", "level": 2},
{"tag": "H", "level": 3},
{"tag": "I", "level": 3},
{"tag": "C", "level": 1},
{"tag": "J", "level": 2},
]
root = {'level': -1, 'children': {}}
parents = {-1: root}
for datum in data:
level = datum['level']
parents[level] = parents[level - 1]['children'][datum['tag']] = {
'level': datum['level'],
'children': {},
}
result = root['children']
print(result)
output:
{'A': {'level': 0, 'children': {'B': {'level': 1, 'children': {'D': {'level': 2, 'children': {'F': {'level': 3, 'children': {'G': {'level': 4, 'children': {}}}}}}, 'E': {'level': 2, 'children': {'H': {'level': 3, 'children': {}}, 'I': {'level': 3, 'children': {}}}}}}, 'C': {'level': 1, 'children': {'J': {'level': 2, 'children': {}}}}}}}
restriction:
level >= 0
Any level cannot be bigger than +1 of max level appeared before.
explanation:
parents is a dictionary to remember last element for each level.
root is a starting point(dummy element).
logic:
Start with -1 level which indicates the root.
Make an item and register it into parent's children.
Update same item to parents dictionary.
Repeat.
Extract root['children'].
Other solution using recursion (same restrictions as with Boseong Choi's answer):
data = [
{"tag": "A", "level": 0},
{"tag": "B", "level": 1},
{"tag": "D", "level": 2},
{"tag": "F", "level": 3},
{"tag": "G", "level": 4},
{"tag": "E", "level": 2},
{"tag": "H", "level": 3},
{"tag": "I", "level": 3},
{"tag": "C", "level": 1},
{"tag": "J", "level": 2},
]
def make_node(dic):
node = dic.copy()
node["children"] = {}
tag = node.pop("tag")
return tag, node
def add_child(parent, child, tag):
assert child["level"] > parent["level"]
if child["level"] == parent["level"] + 1:
parent["children"][tag] = child
return True
for node in parent["children"].values():
if add_child(node, child, tag):
return True
return False
def parse(lst):
assert lst[0]["level"] == 0
root_tag, root = make_node(lst[0])
for item in lst[1:]:
tag, node = make_node(item)
add_child(root, node, tag)
print(parse(data))
You can use recursion:
from itertools import groupby as gb
data = [{'tag': 'A', 'level': 0}, {'tag': 'B', 'level': 1}, {'tag': 'D', 'level': 2}, {'tag': 'F', 'level': 3}, {'tag': 'G', 'level': 4}, {'tag': 'E', 'level': 2}, {'tag': 'H', 'level': 3}, {'tag': 'I', 'level': 3}, {'tag': 'C', 'level': 1}, {'tag': 'J', 'level': 2}]
def to_tree(d, s = 0):
v = [list(b) for _, b in gb(d, key=lambda x:x['level'] == s)]
if len(v) == 1:
return {i['tag']:{'level':s, 'children':{}} for i in v[0]}
return {v[i][0]['tag']:{'level':s, 'children':to_tree(v[i+1], s+1)} for i in range(0, len(v), 2)}
import json
print(json.dumps(to_tree(data), indent=4))
Output:
{
"A": {
"level": 0,
"children": {
"B": {
"level": 1,
"children": {
"D": {
"level": 2,
"children": {
"F": {
"level": 3,
"children": {
"G": {
"level": 4,
"children": {}
}
}
}
}
},
"E": {
"level": 2,
"children": {
"H": {
"level": 3,
"children": {}
},
"I": {
"level": 3,
"children": {}
}
}
}
}
},
"C": {
"level": 1,
"children": {
"J": {
"level": 2,
"children": {}
}
}
}
}
}
}
I am aware that this is copy of this however, there was no answer that I could see. The asker's problem could be fixed by lists. But I do not believe that mine can. I am using nested lists for an amateur game I'm making, does anyone know how re-order the element back to ascending numerical order after I delete an element.
potions = {
1: {"name": "Potion of Bruh", "amount": 5},
2: {"name": "Potion of Epic", "amount": 10},
3: {"name": "Potion of Boi", "amount": 15},
4: {"name": "Potion of Matt", "amount": 12},
5: {"name": "Potion of Garfield", "amount": 3}
}
for i in range(1, len(potions) + 1):
if "Potion of Boi" == potions[i]["name"]:
del potions[i]
print(potions)
You are using a dictionary keyed by integers and should be using a list instead. You'll need to change how you remove elements but then the reordering will work properly:
potions = [
{"name": "Potion of Bruh", "amount": 5},
{"name": "Potion of Epic", "amount": 10},
{"name": "Potion of Boi", "amount": 15},
{"name": "Potion of Matt", "amount": 12},
{"name": "Potion of Garfield", "amount": 3}
]
idx_to_remove = None
for idx, potion in enumerate(potions):
if "Potion of Boi" == potion["name"]:
idx_to_remove = idx
break
if idx_to_remove is not None:
potions.pop(idx_to_remove)
print(potions)
use sorted function
potions = {
9: {"name": "Potion of Bruh", "amount": 5},
1: {"name": "Potion of Bruh", "amount": 5},
2: {"name": "Potion of Epic", "amount": 10},
3: {"name": "Potion of Boi", "amount": 15},
4: {"name": "Potion of Matt", "amount": 12},
5: {"name": "Potion of Garfield", "amount": 3}
}
potions.popitem()
sorted_d = dict(sorted(potions.items(),reverse=False))
print('Dictionary in ascending order by value : ',sorted_d)
I have three lists of dicts, some sample data can look like:
list_1.append({"shared_id": "111", "users": [{"id": "444", "opens": 2}, {"id": "555", "opens": 2}]})
list_1.append({"shared_id": "222", "users": [{"id": "444", "opens": 2}, {"id": "555", "opens": 3}]})
list_2.append({"shared_id": "111", "users": [{"id": "444", "clicks": 1}, {"id": "555", "clicks": 2}]})
list_2.append({"shared_id": "222", "users": [{"id": "444", "clicks": 3}, {"id": "555", "clicks": 3}]})
list_3.append({"shared_id": "111", "users": [{"id": "444", "bounces": 2}, {"id": "555", "bounces": 1}]})
list_3.append({"shared_id": "222", "users": [{"id": "444", "bounces": 3}, {"id": "555", "bounces": 2}]})
Using the shared id and ids on the users I want to create 1 combined list that looks like:
combined_list.append([{"shared_id": "111", "users": [{"id": "444", "opens": 2, "clicks": 1, "bounces": 2, "combined_id": 444111}, {"id": "555", "opens": 3, "clicks": 2, "bounces": 1, "combined_id": 555111}}])
combined_list.append([{"shared_id": "222", "users": [{"id": "444", "opens": 2, "clicks": 3, "bounces": 1, "combined_id": 444222}, {"id": "555", "opens": 3, "clicks": 3, "bounces": 2, "combined_id": 555222}}])
I have solved it when I had 2 lists using nested for loops, but the code is super messy and does not feel very pythonized, is there a better way now that I am using 3 lists?
for o in list_1:
for c in list_2:
if o['shared_id'] == c['shared_id']:
csd = {
'users': [],
'shared_id': o['shared_id']
}
for op in o['users']:
for cp in c['users']:
if op['id'] == cp['id']:
cpd = {
'opens': op['opens'],
'clicks': cp['clicks'],
'combined_id': '{}{}'.format(
op['id'],
csd['shared_id']
)
}
csd['users'].append(cpd)
combined.append(csd)
Here a simple function for that:
from pprint import pprint
def combine_lists(*lists):
result = {}
# For each list
for lst in lists:
# For each dict in the list
for d1 in lst:
# Get or make common dict for shared_id
shared_id = d1["shared_id"]
shared_dict = result.setdefault(shared_id, {})
# For each user dict in the dict
for d2 in d1["users"]:
# Get or make user dict
user_id = d2["id"]
if user_id not in shared_dict:
shared_dict[user_id] = {"combined_id": "{}{}".format(user_id, shared_id)}
# Update information in user dictionary
shared_dict[user_id].update(d2)
# Make output as a list
return [{"shared_id": k, "users": list(v.values())} for k, v in result.items()]
# Test
list_1 = []
list_1.append({"shared_id": "111", "users": [{"id": "444", "opens": 2}, {"id": "555", "opens": 2}]})
list_1.append({"shared_id": "222", "users": [{"id": "444", "opens": 2}, {"id": "555", "opens": 3}]})
list_2 = []
list_2.append({"shared_id": "111", "users": [{"id": "444", "clicks": 1}, {"id": "555", "clicks": 2}]})
list_2.append({"shared_id": "222", "users": [{"id": "444", "clicks": 3}, {"id": "555", "clicks": 3}]})
list_3 = []
list_3.append({"shared_id": "111", "users": [{"id": "444", "bounces": 2}, {"id": "555", "bounces": 1}]})
list_3.append({"shared_id": "222", "users": [{"id": "444", "bounces": 3}, {"id": "555", "bounces": 2}]})
pprint(combine_lists(list_1, list_2, list_3))
# [{'shared_id': '111',
# 'users': [{'bounces': 2,
# 'clicks': 1,
# 'combined_id': '444111',
# 'id': '444',
# 'opens': 2},
# {'bounces': 1,
# 'clicks': 2,
# 'combined_id': '555111',
# 'id': '555',
# 'opens': 2}]},
# {'shared_id': '222',
# 'users': [{'bounces': 3,
# 'clicks': 3,
# 'combined_id': '444222',
# 'id': '444',
# 'opens': 2},
# {'bounces': 2,
# 'clicks': 3,
# 'combined_id': '555222',
# 'id': '555',
# 'opens': 3}]}]
You could use itertools.groupby (doc) for grouping elements:
list_1, list_2, list_3 = [], [], []
list_1.append({"shared_id": "111", "users": [{"id": "444", "opens": 2}, {"id": "555", "opens": 2}]})
list_1.append({"shared_id": "222", "users": [{"id": "444", "opens": 2}, {"id": "555", "opens": 3}]})
list_2.append({"shared_id": "111", "users": [{"id": "444", "clicks": 1}, {"id": "555", "clicks": 2}]})
list_2.append({"shared_id": "222", "users": [{"id": "444", "clicks": 3}, {"id": "555", "clicks": 3}]})
list_3.append({"shared_id": "111", "users": [{"id": "444", "bounces": 2}, {"id": "555", "bounces": 1}]})
list_3.append({"shared_id": "222", "users": [{"id": "444", "bounces": 3}, {"id": "555", "bounces": 2}]})
from itertools import groupby
shared_ids = {}
for v, g in groupby(sorted(list_1 + list_2 + list_3, key=lambda k: k['shared_id']), lambda k: k['shared_id']):
for shared_id in g:
for user in shared_id['users']:
shared_ids.setdefault(v, {}).setdefault(user['id'], {}).update(**user)
shared_ids[v][user['id']]['combined_id'] = '{}{}'.format(user['id'], v)
out = [{'shared_id': k, 'users': [shared_ids[k][kk] for kk in shared_ids[k]]} for k in shared_ids]
from pprint import pprint
pprint(out)
Prints:
[{'shared_id': '111',
'users': [{'bounces': 2, 'clicks': 1, 'combined_id': '444111', 'id': '444', 'opens': 2},
{'bounces': 1, 'clicks': 2, 'combined_id': '555111', 'id': '555', 'opens': 2}]},
{'shared_id': '222',
'users': [{'bounces': 3, 'clicks': 3, 'combined_id': '444222', 'id': '444', 'opens': 2},
{'bounces': 2, 'clicks': 3, 'combined_id': '555222', 'id': '555', 'opens': 3}]}]
NOTE: If you print shared_ids variable, you get:
{'111': {'444': {'bounces': 2, 'clicks': 1, 'combined_id': '444111', 'id': '444', 'opens': 2},
'555': {'bounces': 1, 'clicks': 2, 'combined_id': '555111', 'id': '555', 'opens': 2}},
'222': {'444': {'bounces': 3, 'clicks': 3, 'combined_id': '444222', 'id': '444', 'opens': 2},
'555': {'bounces': 2, 'clicks': 3, 'combined_id': '555222', 'id': '555', 'opens': 3}}}
Maybe this dictionary will be better to manipulate further.
Objected Oriented Solution
# Data structure to manage shared object information
class shared(object):
def __init__(self, i):
self.i = i
self.users = dict()
# Add information of the particulate user
def update_user(self, user_id, key, value):
if user_id in self.users:
self.users[user_id][key] = value
else:
self.users[user_id]= {key:value,
"id":user_id,
"combined_id": "{0}{1}".format(user_id,self.i)}
# Return the information is required format
def get(self):
return {
'shared_id': self.i,
'users' : [ user for user in self.users.values()]
}
Data
list_1 = []
list_2 = []
list_3 = []
list_1.append({"shared_id": "111", "users": [{"id": "444", "opens": 2}, {"id": "555", "opens": 2}]})
list_1.append({"shared_id": "222", "users": [{"id": "444", "opens": 2}, {"id": "555", "opens": 3}]})
list_2.append({"shared_id": "111", "users": [{"id": "444", "clicks": 1}, {"id": "555", "clicks": 2}]})
list_2.append({"shared_id": "222", "users": [{"id": "444", "clicks": 3}, {"id": "555", "clicks": 3}]})
list_3.append({"shared_id": "111", "users": [{"id": "444", "bounces": 2}, {"id": "555", "bounces": 1}]})
list_3.append({"shared_id": "222", "users": [{"id": "444", "bounces": 3}, {"id": "555", "bounces": 2}]})
Parse the Data
info = {}
for l in list_1+list_2+list_3:
# Is there already information for this shared_id ?
if l["shared_id"] not in info:
info[l["shared_id"]] = shared(l["shared_id"])
# Parse all the users
for user_info in l['users']:
user_id = user_info["id"]
# Parse the user information
for key,value in user_info.items():
if key != "id":
# Update the user information
info[l["shared_id"]].update_user(user_id, key, value)
result = [x.get() for x in info.values()]
print (result)
Output
[{'shared_id': '111', 'users': [{'opens': 2, 'id': '444', 'combined_id': '444111', 'clicks': 1, 'bounces': 2}, {'opens': 2, 'id': '555', 'combined_id': '555111', 'clicks': 2, 'bounces': 1}]},
{'shared_id': '222', 'users': [{'opens': 2, 'id': '444', 'combined_id': '444222', 'clicks': 3, 'bounces': 3}, {'opens': 3, 'id': '555', 'combined_id': '555222', 'clicks': 3, 'bounces': 2}]}]
You could use itertools.groupby() to group the ids together, then merge the dictionaries as needed:
from collections import ChainMap
from itertools import chain, groupby
from operator import itemgetter
combined_list = []
for k, g in groupby(sorted(chain(list_1, list_2, list_3), key=itemgetter('shared_id')), key=itemgetter('shared_id')):
users = []
for k2, g2 in groupby(sorted(chain(*map(itemgetter('users'), g)), key=itemgetter('id')), key=itemgetter('id')):
users.append({'id': k2, 'combined_id': k2 + k, **ChainMap(*g2)})
combined_list.append({'shared_id': k, 'users': users})
print(combined_list)
Output:
[{'shared_id': '111', 'users': [{'id': '444', 'combined_id': '444111', 'bounces': 2, 'clicks': 1, 'opens': 2}, {'id': '555', 'combined_id': '555111', 'bounces': 1, 'clicks': 2, 'opens': 2}]}, {'shared_id': '222', 'users': [{'id': '444', 'combined_id': '444222', 'bounces': 3, 'clicks': 3, 'opens': 2}, {'id': '555', 'combined_id': '555222', 'bounces': 2, 'clicks': 3, 'opens': 3}]}]
global_list = []
def add_item(item):
item_found = False
for e_item in global_list:
if e_item['shared_id'] == item['shared_id']:
item_found = True
e_users = e_item['users']
users = item['users']
for user in users:
user_found = False
for e_user in e_users:
if e_user['id'] == user['id']:
user_found = True
e_user.update(user)
if user_found is False:
e_users.append(user)
if item_found is False:
global_list.append(item)
list_1 = []
list_2 = []
list_3 = []
list_1.append({"shared_id": "111", "users": [{"id": "444", "opens": 2}, {"id": "555", "opens": 2}]})
list_1.append({"shared_id": "222", "users": [{"id": "444", "opens": 2}, {"id": "555", "opens": 3}]})
list_2.append({"shared_id": "111", "users": [{"id": "444", "clicks": 1}, {"id": "555", "clicks": 2}]})
list_2.append({"shared_id": "222", "users": [{"id": "444", "clicks": 3}, {"id": "555", "clicks": 3}]})
list_3.append({"shared_id": "111", "users": [{"id": "444", "bounces": 2}, {"id": "555", "bounces": 1}]})
list_3.append({"shared_id": "222", "users": [{"id": "444", "bounces": 3}, {"id": "555", "bounces": 2}]})
for item in list_1+list_2+list_3:
add_item(item)
print(global_list)