I have three lists of dicts, some sample data can look like:
list_1.append({"shared_id": "111", "users": [{"id": "444", "opens": 2}, {"id": "555", "opens": 2}]})
list_1.append({"shared_id": "222", "users": [{"id": "444", "opens": 2}, {"id": "555", "opens": 3}]})
list_2.append({"shared_id": "111", "users": [{"id": "444", "clicks": 1}, {"id": "555", "clicks": 2}]})
list_2.append({"shared_id": "222", "users": [{"id": "444", "clicks": 3}, {"id": "555", "clicks": 3}]})
list_3.append({"shared_id": "111", "users": [{"id": "444", "bounces": 2}, {"id": "555", "bounces": 1}]})
list_3.append({"shared_id": "222", "users": [{"id": "444", "bounces": 3}, {"id": "555", "bounces": 2}]})
Using the shared id and ids on the users I want to create 1 combined list that looks like:
combined_list.append([{"shared_id": "111", "users": [{"id": "444", "opens": 2, "clicks": 1, "bounces": 2, "combined_id": 444111}, {"id": "555", "opens": 3, "clicks": 2, "bounces": 1, "combined_id": 555111}}])
combined_list.append([{"shared_id": "222", "users": [{"id": "444", "opens": 2, "clicks": 3, "bounces": 1, "combined_id": 444222}, {"id": "555", "opens": 3, "clicks": 3, "bounces": 2, "combined_id": 555222}}])
I have solved it when I had 2 lists using nested for loops, but the code is super messy and does not feel very pythonized, is there a better way now that I am using 3 lists?
for o in list_1:
for c in list_2:
if o['shared_id'] == c['shared_id']:
csd = {
'users': [],
'shared_id': o['shared_id']
}
for op in o['users']:
for cp in c['users']:
if op['id'] == cp['id']:
cpd = {
'opens': op['opens'],
'clicks': cp['clicks'],
'combined_id': '{}{}'.format(
op['id'],
csd['shared_id']
)
}
csd['users'].append(cpd)
combined.append(csd)
Here a simple function for that:
from pprint import pprint
def combine_lists(*lists):
result = {}
# For each list
for lst in lists:
# For each dict in the list
for d1 in lst:
# Get or make common dict for shared_id
shared_id = d1["shared_id"]
shared_dict = result.setdefault(shared_id, {})
# For each user dict in the dict
for d2 in d1["users"]:
# Get or make user dict
user_id = d2["id"]
if user_id not in shared_dict:
shared_dict[user_id] = {"combined_id": "{}{}".format(user_id, shared_id)}
# Update information in user dictionary
shared_dict[user_id].update(d2)
# Make output as a list
return [{"shared_id": k, "users": list(v.values())} for k, v in result.items()]
# Test
list_1 = []
list_1.append({"shared_id": "111", "users": [{"id": "444", "opens": 2}, {"id": "555", "opens": 2}]})
list_1.append({"shared_id": "222", "users": [{"id": "444", "opens": 2}, {"id": "555", "opens": 3}]})
list_2 = []
list_2.append({"shared_id": "111", "users": [{"id": "444", "clicks": 1}, {"id": "555", "clicks": 2}]})
list_2.append({"shared_id": "222", "users": [{"id": "444", "clicks": 3}, {"id": "555", "clicks": 3}]})
list_3 = []
list_3.append({"shared_id": "111", "users": [{"id": "444", "bounces": 2}, {"id": "555", "bounces": 1}]})
list_3.append({"shared_id": "222", "users": [{"id": "444", "bounces": 3}, {"id": "555", "bounces": 2}]})
pprint(combine_lists(list_1, list_2, list_3))
# [{'shared_id': '111',
# 'users': [{'bounces': 2,
# 'clicks': 1,
# 'combined_id': '444111',
# 'id': '444',
# 'opens': 2},
# {'bounces': 1,
# 'clicks': 2,
# 'combined_id': '555111',
# 'id': '555',
# 'opens': 2}]},
# {'shared_id': '222',
# 'users': [{'bounces': 3,
# 'clicks': 3,
# 'combined_id': '444222',
# 'id': '444',
# 'opens': 2},
# {'bounces': 2,
# 'clicks': 3,
# 'combined_id': '555222',
# 'id': '555',
# 'opens': 3}]}]
You could use itertools.groupby (doc) for grouping elements:
list_1, list_2, list_3 = [], [], []
list_1.append({"shared_id": "111", "users": [{"id": "444", "opens": 2}, {"id": "555", "opens": 2}]})
list_1.append({"shared_id": "222", "users": [{"id": "444", "opens": 2}, {"id": "555", "opens": 3}]})
list_2.append({"shared_id": "111", "users": [{"id": "444", "clicks": 1}, {"id": "555", "clicks": 2}]})
list_2.append({"shared_id": "222", "users": [{"id": "444", "clicks": 3}, {"id": "555", "clicks": 3}]})
list_3.append({"shared_id": "111", "users": [{"id": "444", "bounces": 2}, {"id": "555", "bounces": 1}]})
list_3.append({"shared_id": "222", "users": [{"id": "444", "bounces": 3}, {"id": "555", "bounces": 2}]})
from itertools import groupby
shared_ids = {}
for v, g in groupby(sorted(list_1 + list_2 + list_3, key=lambda k: k['shared_id']), lambda k: k['shared_id']):
for shared_id in g:
for user in shared_id['users']:
shared_ids.setdefault(v, {}).setdefault(user['id'], {}).update(**user)
shared_ids[v][user['id']]['combined_id'] = '{}{}'.format(user['id'], v)
out = [{'shared_id': k, 'users': [shared_ids[k][kk] for kk in shared_ids[k]]} for k in shared_ids]
from pprint import pprint
pprint(out)
Prints:
[{'shared_id': '111',
'users': [{'bounces': 2, 'clicks': 1, 'combined_id': '444111', 'id': '444', 'opens': 2},
{'bounces': 1, 'clicks': 2, 'combined_id': '555111', 'id': '555', 'opens': 2}]},
{'shared_id': '222',
'users': [{'bounces': 3, 'clicks': 3, 'combined_id': '444222', 'id': '444', 'opens': 2},
{'bounces': 2, 'clicks': 3, 'combined_id': '555222', 'id': '555', 'opens': 3}]}]
NOTE: If you print shared_ids variable, you get:
{'111': {'444': {'bounces': 2, 'clicks': 1, 'combined_id': '444111', 'id': '444', 'opens': 2},
'555': {'bounces': 1, 'clicks': 2, 'combined_id': '555111', 'id': '555', 'opens': 2}},
'222': {'444': {'bounces': 3, 'clicks': 3, 'combined_id': '444222', 'id': '444', 'opens': 2},
'555': {'bounces': 2, 'clicks': 3, 'combined_id': '555222', 'id': '555', 'opens': 3}}}
Maybe this dictionary will be better to manipulate further.
Objected Oriented Solution
# Data structure to manage shared object information
class shared(object):
def __init__(self, i):
self.i = i
self.users = dict()
# Add information of the particulate user
def update_user(self, user_id, key, value):
if user_id in self.users:
self.users[user_id][key] = value
else:
self.users[user_id]= {key:value,
"id":user_id,
"combined_id": "{0}{1}".format(user_id,self.i)}
# Return the information is required format
def get(self):
return {
'shared_id': self.i,
'users' : [ user for user in self.users.values()]
}
Data
list_1 = []
list_2 = []
list_3 = []
list_1.append({"shared_id": "111", "users": [{"id": "444", "opens": 2}, {"id": "555", "opens": 2}]})
list_1.append({"shared_id": "222", "users": [{"id": "444", "opens": 2}, {"id": "555", "opens": 3}]})
list_2.append({"shared_id": "111", "users": [{"id": "444", "clicks": 1}, {"id": "555", "clicks": 2}]})
list_2.append({"shared_id": "222", "users": [{"id": "444", "clicks": 3}, {"id": "555", "clicks": 3}]})
list_3.append({"shared_id": "111", "users": [{"id": "444", "bounces": 2}, {"id": "555", "bounces": 1}]})
list_3.append({"shared_id": "222", "users": [{"id": "444", "bounces": 3}, {"id": "555", "bounces": 2}]})
Parse the Data
info = {}
for l in list_1+list_2+list_3:
# Is there already information for this shared_id ?
if l["shared_id"] not in info:
info[l["shared_id"]] = shared(l["shared_id"])
# Parse all the users
for user_info in l['users']:
user_id = user_info["id"]
# Parse the user information
for key,value in user_info.items():
if key != "id":
# Update the user information
info[l["shared_id"]].update_user(user_id, key, value)
result = [x.get() for x in info.values()]
print (result)
Output
[{'shared_id': '111', 'users': [{'opens': 2, 'id': '444', 'combined_id': '444111', 'clicks': 1, 'bounces': 2}, {'opens': 2, 'id': '555', 'combined_id': '555111', 'clicks': 2, 'bounces': 1}]},
{'shared_id': '222', 'users': [{'opens': 2, 'id': '444', 'combined_id': '444222', 'clicks': 3, 'bounces': 3}, {'opens': 3, 'id': '555', 'combined_id': '555222', 'clicks': 3, 'bounces': 2}]}]
You could use itertools.groupby() to group the ids together, then merge the dictionaries as needed:
from collections import ChainMap
from itertools import chain, groupby
from operator import itemgetter
combined_list = []
for k, g in groupby(sorted(chain(list_1, list_2, list_3), key=itemgetter('shared_id')), key=itemgetter('shared_id')):
users = []
for k2, g2 in groupby(sorted(chain(*map(itemgetter('users'), g)), key=itemgetter('id')), key=itemgetter('id')):
users.append({'id': k2, 'combined_id': k2 + k, **ChainMap(*g2)})
combined_list.append({'shared_id': k, 'users': users})
print(combined_list)
Output:
[{'shared_id': '111', 'users': [{'id': '444', 'combined_id': '444111', 'bounces': 2, 'clicks': 1, 'opens': 2}, {'id': '555', 'combined_id': '555111', 'bounces': 1, 'clicks': 2, 'opens': 2}]}, {'shared_id': '222', 'users': [{'id': '444', 'combined_id': '444222', 'bounces': 3, 'clicks': 3, 'opens': 2}, {'id': '555', 'combined_id': '555222', 'bounces': 2, 'clicks': 3, 'opens': 3}]}]
global_list = []
def add_item(item):
item_found = False
for e_item in global_list:
if e_item['shared_id'] == item['shared_id']:
item_found = True
e_users = e_item['users']
users = item['users']
for user in users:
user_found = False
for e_user in e_users:
if e_user['id'] == user['id']:
user_found = True
e_user.update(user)
if user_found is False:
e_users.append(user)
if item_found is False:
global_list.append(item)
list_1 = []
list_2 = []
list_3 = []
list_1.append({"shared_id": "111", "users": [{"id": "444", "opens": 2}, {"id": "555", "opens": 2}]})
list_1.append({"shared_id": "222", "users": [{"id": "444", "opens": 2}, {"id": "555", "opens": 3}]})
list_2.append({"shared_id": "111", "users": [{"id": "444", "clicks": 1}, {"id": "555", "clicks": 2}]})
list_2.append({"shared_id": "222", "users": [{"id": "444", "clicks": 3}, {"id": "555", "clicks": 3}]})
list_3.append({"shared_id": "111", "users": [{"id": "444", "bounces": 2}, {"id": "555", "bounces": 1}]})
list_3.append({"shared_id": "222", "users": [{"id": "444", "bounces": 3}, {"id": "555", "bounces": 2}]})
for item in list_1+list_2+list_3:
add_item(item)
print(global_list)
Related
I have a list of dictionaries:
data = [{"average": 2, "day": "2022-01-01", "name": "joe", "employee_id": 1},
{"average": 3, "day": "2022-01-02", "name": "joe", "employee_id": 1},
{"average": 9, "day": "2022-01-03", "name": "joe", "employee_id": 1},
{"sum": 13, "day": "2022-01-01", "name": "joe", "employee_id": 1},
{"sum": 15, "day": "2022-01-02", "name": "joe", "employee_id": 1},
{"sum": 0, "day": "2022-01-03", "name": "joe", "employee_id": 1},
{"average": 1, "day": "2022-01-01", "name": "bob", "employee_id": 2},
{"average": 3, "day": "2022-01-02", "name": "bob", "employee_id": 2},
{"sum": 9, "day": "2022-01-01", "name": "bob", "employee_id": 2},
{"sum": 8, "day": "2022-01-02", "name": "bob", "employee_id": 2}]
I want my output as:
output = [{"name": "joe", "employee_id": 1, "day": "2022-01-01", "average": 2, "sum": 13},
{"name": "joe", "employee_id": 1, "day": "2022-01-02", "average": 3, "sum": 15},
{"name": "joe", "employee_id": 1, "day": "2022-01-03", "average": 9, "sum": 0},
{"name": "bob", "employee_id": 2, "day": "2022-01-01", "average": 1, "sum": 9},
{"name": "bob", "employee_id": 2, "day": "2022-01-02", "average": 3, "sum": 8}]
The goal is that the output values are put together by day, name, and employee_id.
I've tried:
output = {}
for item in data:
if item["day"] not in output:
output[item["day"]] = item
else:
output[item["day"]].update(item)
print(list(output.values()))
This works in getting the "average" and "sum" and "date" together, but it ends up not including all of the employees and their IDs.
Any help is appreciated
Using collections.defaultdict with dict. Here, take the value of 'day' and 'name' of each dictionary as the key:
>>> from collections import defaultdict
>>> defdict = defaultdict(dict)
>>> for mp in data:
... defdict[mp['day'], mp['name']].update(mp)
...
>>> keys = ('name', 'employee_id', 'day', 'average', 'sum')
>>> [{k: mp[k] for k in keys} for mp in defdict.values()]
[{'name': 'joe', 'employee_id': 1, 'day': '2022-01-01', 'average': 2, 'sum': 13},
{'name': 'joe', 'employee_id': 1, 'day': '2022-01-02', 'average': 3, 'sum': 15},
{'name': 'joe', 'employee_id': 1, 'day': '2022-01-03', 'average': 9, 'sum': 0},
{'name': 'bob', 'employee_id': 2, 'day': '2022-01-01', 'average': 1, 'sum': 9},
{'name': 'bob', 'employee_id': 2, 'day': '2022-01-02', 'average': 3, 'sum': 8}]
For 150w pieces of data, the performance of this solution is still better than that of pandas (at least when converting data into DataFrame, the for loop has completed the work):
In [451]: random.seed(0)
...: names = [''.join(random.choices(string.ascii_lowercase, k=random.randrange(3, 7))) for _ in range(10000)]
...: dates = [str(datetime.date(2022, i, j)) for i in range(7, 10) for j in range(1, 31)]
...: keys = ['sum', 'average']
...:
...: data = [{k: random.randrange(10), 'day': date, 'name': name, 'employee_id': i}
...: for i, name in enumerate(names, 1)
...: for date in sorted(random.sample(dates, random.randrange(60, 90)))
...: for k in keys]
...:
In [452]: len(data)
Out[452]: 1492286
In [453]: %%timeit
...: defdict = defaultdict(dict)
...: for mp in data:
...: defdict[mp['day'], mp['name']].update(mp)
...: keys = ('name', 'employee_id', 'day', 'average', 'sum')
...: [{k: mp[k] for k in keys} for mp in defdict.values()]
...:
...:
926 ms ± 6.38 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
In [454]: %%timeit
...: df = pd.DataFrame(data)
...: pd.merge(df.loc[df['average'].notna()][[ 'name','day','employee_id','average']],
...: df.loc[df['sum'].notna()][['name','day','employee_id','sum']],
...: how='outer'
...: ).to_dict(orient= 'records')
...:
...:
3.58 s ± 19.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
In [455]: %timeit pd.DataFrame(data)
1.26 s ± 17.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
here is one way to do it
# filter using loc where average is not null and where sum is not null, as two separate frame
# merge the two DF
#finally, output as a dict of orient record
pd.merge(df.loc[df['average'].notna()][[ 'name','day','employee_id','average']],
df.loc[df['sum'].notna()][['name','day','employee_id','sum']],
how='outer'
).to_dict(orient= 'records')
[{'name': 'joe',
'day': '2022-01-01',
'employee_id': 1,
'average': 2.0,
'sum': 13.0},
{'name': 'joe',
'day': '2022-01-02',
'employee_id': 1,
'average': 3.0,
'sum': 15.0},
{'name': 'joe',
'day': '2022-01-03',
'employee_id': 1,
'average': 9.0,
'sum': 0.0},
{'name': 'bob',
'day': '2022-01-01',
'employee_id': 2,
'average': 1.0,
'sum': 9.0},
{'name': 'bob',
'day': '2022-01-02',
'employee_id': 2,
'average': 3.0,
'sum': 8.0}]
From the description given the combination "day', "name", "employee_id" acts like a unique combination to which the other two fields should be added. Each incoming dictionary has these and we can use them as a key into a new dictionary, but we need to convert them to something hashable like a json string which we need to produce with sorting to make them unique..
from json import dumps
data = [{"average": 2, "day": "2022-01-01", "employee_id": 1, "name": "joe"},
{"average": 3, "day": "2022-01-02", "name": "joe", "employee_id": 1},
{"average": 9, "day": "2022-01-03", "name": "joe", "employee_id": 1},
{"sum": 13, "day": "2022-01-01", "name": "joe", "employee_id": 1},
{"sum": 15, "day": "2022-01-02", "name": "joe", "employee_id": 1},
{"sum": 0, "day": "2022-01-03", "name": "joe", "employee_id": 1},
{"average": 1, "day": "2022-01-01", "name": "bob", "employee_id": 2},
{"average": 3, "day": "2022-01-02", "name": "bob", "employee_id": 2},
{"sum": 9, "day": "2022-01-01", "name": "bob", "employee_id": 2},
{"sum": 8, "day": "2022-01-02", "name": "bob", "employee_id": 2}]
flattend_employee_summaries = dict()
for employee_summary in data:
key = employee_summary.copy()
if "average" in key:
del key["average"]
if dumps(key, sort_keys=True) not in flattend_employee_summaries:
flattend_employee_summaries[dumps(key, sort_keys=True)] = employee_summary.copy()
else:
flattend_employee_summaries[dumps(key, sort_keys=True)]["average"] = employee_summary["average"]
if "sum" in key:
del key["sum"]
if dumps(key, sort_keys=True) not in flattend_employee_summaries:
flattend_employee_summaries[dumps(key, sort_keys=True)] = employee_summary.copy()
else:
flattend_employee_summaries[dumps(key, sort_keys=True)]["sum"] = employee_summary["sum"]
flattend_employee_summaries = [ summary for summary in flattend_employee_summaries.values()]
print(f'{flattend_employee_summaries=}')
It has been answered, and I suspect this is the long way of repeating Mechanic Pig's solution which I'd recommend. For all of the solutions, I believe we are assuming there is only one average record per employee/day.
employees = dict()
for data_row in data:
if data_row['employee_id'] not in employees:
employees[data_row['employee_id']] = {data_row['day']: {'name':data_row.get('name', 0),
'average': data_row.get('average', 0),
'sum': data_row.get('sum',0)
}
}
else:
data_row_day = data_row['day']
if data_row['day'] not in employees[data_row['employee_id']]:
employees[data_row['employee_id']][data_row_day] = {'name':data_row.get('name', 0),
'average': data_row.get('average', 0),
'sum': data_row.get('sum', 0)
}
else:
current_sum = employees[data_row['employee_id']][data_row_day].get('sum',0)
employees[data_row['employee_id']][data_row_day].update({'sum': current_sum + data_row.get('sum', 0) })
employee_output = list()
for employee_id, employee_dates in employees.items():
for employee_date, employee_details in employee_dates.items():
employee_output.append({"name": employee_details['name'],
"employee_id": employee_id,
"day": employee_date,
"average": employee_details['average'],
"sum": employee_details['sum'],
})
employee_output would then contain:
[{'name': 'joe',
'employee_id': 1,
'day': '2022-01-01',
'average': 2,
'sum': 13},
{'name': 'joe',
'employee_id': 1,
'day': '2022-01-02',
'average': 3,
'sum': 15},
{'name': 'joe',
'employee_id': 1,
'day': '2022-01-03',
'average': 9,
'sum': 0},
{'name': 'bob',
'employee_id': 2,
'day': '2022-01-01',
'average': 1,
'sum': 9},
{'name': 'bob',
'employee_id': 2,
'day': '2022-01-02',
'average': 3,
'sum': 8}]
I am working with a nested data structure which needs to be flattened. The values need to be aggregated so totals are produced across each level of the nested data. I'm trying to do this recursively but it's not clear how best to achieve this?
The following is an example of the data I'm working with.
def get_result():
return {
"a1": {
"b1": {
"c1": {
"d1": 1,
"d2": 1,
},
"c2": {
"d3": 1,
}
},
"b2": {
"c3": {
"d4": 1
}
}
},
"a2": {}
}
The data I'd like to produce would be as follows:
[
{
"key": "a1",
"total": 4
},
{
"key": "b1",
"total": 3
},
{
"key": "c1",
"total": 2
},
{
"key": "d1",
"total": 1
},
{
"key": "d2",
"total": 1
}
{
"key": "c2",
"total": 1
},
{
"key": "d3",
"total": 1
},
{
"key": "b2",
"total": 1
},
{
"key": "c3",
"total": 1
},
{
"key": "d4",
"total": 1
}
]
You can use recursion
from collections import defaultdict
def agg(data):
result = defaultdict(int)
agg_sum = 0
for k, v in data.items():
if isinstance(v, dict):
d, sub = agg(v)
if sub:
result.update(d)
result[k] += sub
agg_sum += sub
else:
result[k] += v
agg_sum += v
return result, agg_sum
You can use a recursive generator function for a shorter solution:
d = {'a1': {'b1': {'c1': {'d1': 1, 'd2': 1}, 'c2': {'d3': 1}}, 'b2': {'c3': {'d4': 1}}}, 'a2': {}}
def get_aggr(d):
return d if not isinstance(d, dict) else sum(map(get_aggr, d.values()))
def aggr_keys(d):
for a, b in d.items():
yield {'key':a, 'total':get_aggr(b)}
yield from (() if not isinstance(b, dict) else aggr_keys(b))
print(list(aggr_keys(d)))
Output:
[{'key': 'a1', 'total': 4},
{'key': 'b1', 'total': 3},
{'key': 'c1', 'total': 2},
{'key': 'd1', 'total': 1},
{'key': 'd2', 'total': 1},
{'key': 'c2', 'total': 1},
{'key': 'd3', 'total': 1},
{'key': 'b2', 'total': 1},
{'key': 'c3', 'total': 1},
{'key': 'd4', 'total': 1},
{'key': 'a2', 'total': 0}]
I have a list of dicts with the same structure
[{"Program Name": "Bulldozer", "Level": 3}, {"Program Name": "Robot", "Level": 1}, {"Program Name": "Bulldozer", "Level": 4}]
What I want is duplicate keys of "Program Name" that have the same value (ex: "Bulldozer" appearing 2x) to be renamed as "Bulldozer (1)", "Bulldozer (2)" and so on.
An Efficient way is to use defaultdict to count the "Program Name", the time complexity
is O(n):
from collections import defaultdict
l = [{"Program Name": "Bulldozer", "Level": 3}, {"Program Name": "Robot", "Level": 1},
{"Program Name": "Bulldozer", "Level": 4}, {"Program Name": "Bulldozer", "Level": 4}, {"Program Name": "Robot", "Level": 1}]
tmp = defaultdict(int)
for i in l:
i["Program Name"] = f'{i["Program Name"]} ({tmp[i["Program Name"]]})' if tmp[i["Program Name"]] else i["Program Name"]
tmp[i["Program Name"].split()[0]] += 1
print(l)
Result:
[{'Program Name': 'Bulldozer', 'Level': 3}, {'Program Name': 'Robot', 'Level': 1}, {'Program Name': 'Bulldozer (1)', 'Level': 4}, {'Program Name': 'Bulldozer (2)', 'Level': 4}, {'Program Name': 'Robot (1)', 'Level': 1}]
Hope this helps:
input = [{"Program Name": "Bulldozer", "Level": 3}, {"Program Name": "Robot", "Level": 1}, {"Program Name": "Bulldozer", "Level": 4}]
def update_input(input):
existing_program_names = {}
for i, d in enumerate(input):
current_list_program_name = d['Program Name']
try:
existing_program_names[current_list_program_name] += 1
except KeyError:
# Program name not in storage yet add it
existing_program_names.update({current_list_program_name: 0})
if existing_program_names[current_list_program_name] > 0 :
ID = existing_program_names[current_list_program_name]
input[i]['Program Name'] = current_list_program_name + ' ({ID})'.format(ID=ID)
else:
pass
return input
output = update_input(input)
yields:
[{'Program Name': 'Bulldozer', 'Level': 3}, {'Program Name': 'Robot', 'Level': 1}, {'Program Name': 'Bulldozer (1)', 'Level': 4}]
You can try this too:
data = [
{"Program Name": "Bulldozer", "Level": 3},
{"Program Name": "Robot", "Level": 1},
{"Program Name": "Bulldozer", "Level": 4},
{"Program Name": "Rozer", "Level": 3},
{"Program Name": "Robot", "Level": 1},
{"Program Name": "Rozer", "Level": 3},
{"Program Name": "Bulldozer", "Level": 3},
{"Program Name": "Robot", "Level": 1},
{"Program Name": "Bulldozer", "Level": 4},
{"Program Name": "Rozer", "Level": 3},
{"Program Name": "Robot", "Level": 1},
{"Program Name": "Rozer", "Level": 3}
]
Approach: 01
import pandas as pd
c = pd.DataFrame(data)
c['group_code'] = c.groupby(['Program Name']).cumcount() + 1
c['Program Name'] = ["{0} ({1})".format(x, y) for (x, y) in c[[
'Program Name', 'group_code']].values]
output = c[['Program Name', 'Level']].to_dict(orient='records')
print(output)
Approach: 02
temp = {}
for item in data:
temp.update(
{
item['Program Name']: temp[item['Program Name']] + 1 if temp.get(item['Program Name']) else 1
}
)
item['Program Name'] = item['Program Name'] + ' (' + str(temp[item['Program Name']]) + ')'
print(data)
output:
[
{"Program Name": "Bulldozer (1)", "Level": 3},
{"Program Name": "Robot (1)", "Level": 1},
{"Program Name": "Bulldozer (2)", "Level": 4},
{"Program Name": "Rozer (1)", "Level": 3},
{"Program Name": "Robot (2)", "Level": 1},
{"Program Name": "Rozer (2)", "Level": 3},
{"Program Name": "Bulldozer (3)", "Level": 3},
{"Program Name": "Robot (3)", "Level": 1},
{"Program Name": "Bulldozer (4)", "Level": 4},
{"Program Name": "Rozer (3)", "Level": 3},
{"Program Name": "Robot (4)", "Level": 1},
{"Program Name": "Rozer (4)", "Level": 3}
]
I would recommend you to use pandas(approach 01) if you have huge amount of data.
Thanks to #jizhihaoSAMA I have managed to find a solution to my problem with a small edit
machines = [{"Program Name": "Bulldozer", "Level": 3}, {"Program Name": "Robot", "Level": 1}, {"Program Name": "Bulldozer", "Level": 4}]
tmp = defaultdict(int)
for i in machines:
name = i["Program Name"].strip(f' ({tmp[i["Program Name"]]})')
i["Program Name"] = f'{name} ({tmp[i["Program Name"]]})' if tmp[name] else i["Program Name"]
tmp[name] += 1
This disables problems with spaces in the program name or any complexed names.
I have the following list of dictionaries:
dict1 = [{"id": 1, "name": "tamara", "age":23},
{"id": 1, "name": "mia", "age":14},
{"id": 1, "name": "teo", "age":33},
{"id": 2, "name": "maya", "age":30}}
I would like to create new list of dictionaries from the existing list of dictionaries where If I have the same "id":1 three times in dict1 then don't repeat them in the list and rather have dict in a dict:
dict2 = [{"id": 1, newkey: [{"name": "tamara", "age":23},
{"name":"mia", "age":14},
{"name": "teo", "age":33}]},
{"id": 2, "name": "maya", "age":30}}
This is what I want to achieve any suggestion how?
You can use itertools.groupby:
import itertools
dict1 = [{"id": 1, "name": "tamara", "age":23}, {"id": 1, "name": "mia", "age":14}, {"id": 1, "name": "teo", "age":33}, {"id": 2, "name": "maya", "age":30}]
new_d = [[a, list(b)] for a, b in itertools.groupby(sorted(dict1, key=lambda x:x['id']), key=lambda x:x['id'])]
dict2 = [{'id':a, 'new_key':[{c:d for c, d in i.items() if c != 'id'} for i in b]} for a, b in new_d]
Output:
[{'new_key': [{'age': 23, 'name': 'tamara'}, {'age': 14, 'name': 'mia'}, {'age': 33, 'name': 'teo'}], 'id': 1}, {'new_key': [{'age': 30, 'name': 'maya'}], 'id': 2}]
Use itertools.groupby
>>> from operator import itemgetter
>>> from itertools import groupby
>>> dict1 = [{"id": 1, "name": "tamara", "age":23}, {"id": 1, "name": "mia", "age":14}, {"id": 1, "name": "teo", "age":33}, {"id": 2, "name": "maya", "age":30}]
>>> [{'id': k, 'new_key':[{k2:v2} for d in list(v) for k2,v2 in d.items() if k2!='id']} for k,v in groupby(dict1, itemgetter('id'))]
# [{'new_key': [{'age': 23}, {'name': 'tamara'}, {'age': 14}, {'name': 'mia'}, {'age': 33}, {'name': 'teo'}], 'id': 1}, {'new_key': [{'age': 30}, {'name': 'maya'}], 'id': 2}]
I am trying to generate a df to produce this below json.
Json data:
{
"name": "flare",
"children": [
{
"name": "K1",
"children": [
{"name": "Exact", "size": 4},
{"name": "synonyms", "size": 14}
]
},
{
"name": "K2",
"children": [
{"name": "Exact", "size": 10},
{"name": "synonyms", "size": 20}
]
},
{
"name": "K3",
"children": [
{"name": "Exact", "size": 0},
{"name": "synonyms", "size": 5}
]
},
{
"name": "K4",
"children": [
{"name": "Exact", "size": 13},
{"name": "synonyms", "size": 15}
]
},
{
"name": "K5",
"children": [
{"name": "Exact", "size": 0},
{"name": "synonyms", "size": 0}
]
}
]
}
input data:
name Exact synonyms
K1 4 14
K2 10 20
K3 0 5
K4 13 15
K5 0 0
I tried creating df with values in the json but I was not able to get the desired json on df.to_json, please help.
You need reshape data by set_index + stack and then use groupby with apply for nested list of dict:
import json
df = (df.set_index('name')
.stack()
.reset_index(level=1)
.rename(columns={'level_1':'name', 0:'size'})
.groupby(level=0).apply(lambda x: x.to_dict(orient='records'))
.reset_index(name='children')
)
print (df)
name children
0 K1 [{'name': 'Exact', 'size': 4}, {'name': 'synon...
1 K2 [{'name': 'Exact', 'size': 10}, {'name': 'syno...
2 K3 [{'name': 'Exact', 'size': 0}, {'name': 'synon...
3 K4 [{'name': 'Exact', 'size': 13}, {'name': 'syno...
4 K5 [{'name': 'Exact', 'size': 0}, {'name': 'synon...
#convert output to dict
j = { "name": "flare", "children": df.to_dict(orient='records')}
#for nice output - easier check
import pprint
pp = pprint.PrettyPrinter(indent=4)
pp.pprint(j)
{ 'children': [ { 'children': [ {'name': 'Exact', 'size': 4},
{'name': 'synonyms', 'size': 14}],
'name': 'K1'},
{ 'children': [ {'name': 'Exact', 'size': 10},
{'name': 'synonyms', 'size': 20}],
'name': 'K2'},
{ 'children': [ {'name': 'Exact', 'size': 0},
{'name': 'synonyms', 'size': 5}],
'name': 'K3'},
{ 'children': [ {'name': 'Exact', 'size': 13},
{'name': 'synonyms', 'size': 15}],
'name': 'K4'},
{ 'children': [ {'name': 'Exact', 'size': 0},
{'name': 'synonyms', 'size': 0}],
'name': 'K5'}],
'name': 'flare'}
#convert data to json and write to file
with open('data.json', 'w') as outfile:
json.dump(j, outfile)