maintain dictionary structure while reducing nested dictionary

maintain dictionary structure while reducing nested dictionary - python

I have a list of pairs of nested dict dd and would like to maintain the structure to a list of dictionaries:
dd = [
[{'id': 'bla',
'detail': [{'name': 'discard', 'amount': '123'},
{'name': 'KEEP_PAIR_1A', 'amount': '2'}]},
{'id': 'bla2',
'detail': [{'name': 'discard', 'amount': '123'},
{'name': 'KEEP_PAIR_1B', 'amount': '1'}]}
],
[{'id': 'bla3',
'detail': [{'name': 'discard', 'amount': '123'},
{'name': 'KEEP_PAIR_2A', 'amount': '3'}]},
{'id': 'bla4',
'detail': [{'name': 'discard', 'amount': '123'},
{'name': 'KEEP_PAIR_2B', 'amount': '4'}]}
]
]
I want to reduce this to a list of paired dictionaries while extracting only some detail. For example, an expected output may look like this:
[{'name': ['KEEP_PAIR_1A', 'KEEP_PAIR_1B'], 'amount': [2, 1]},
{'name': ['KEEP_PAIR_2A', 'KEEP_PAIR_2B'], 'amount': [3, 4]}]
I have run my code:
pair=[]
for all_pairs in dd:
for output_pairs in all_pairs:
for d in output_pairs.get('detail'):
if d['name'] != 'discard':
pair.append(d)
output_pair = {
k: [d.get(k) for d in pair]
for k in set().union(*pair)
}
But it didn't maintain that structure :
{'name': ['KEEP_PAIR_1A', 'KEEP_PAIR_1B', 'KEEP_PAIR_2A', 'KEEP_PAIR_2B'],
'amount': ['2', '1', '3', '4']}
I assume I would need to use some list comprehension to solve this but where in the for loop should I do that to maintain the structure.

Since you want to combine dictionaries in lists, one option is to use dict.setdefault:
pair = []
for all_pairs in dd:
dct = {}
for output_pairs in all_pairs:
for d in output_pairs.get('detail'):
if d['name'] != 'discard':
for k,v in d.items():
dct.setdefault(k, []).append(v)
pair.append(dct)
Output:
[{'name': ['KEEP_PAIR_1A', 'KEEP_PAIR_1B'], 'amount': [2, 1]},
{'name': ['KEEP_PAIR_2A', 'KEEP_PAIR_2B'], 'amount': [3, 4]}]

Related

Remove duplicate dict based on field values

Given the following list of dicts, I want to remove duplicates where all fields are identical except for the id field.
old_data = [
{"id":"01","name":"harry","age":21},
{"id":"02","name":"barry","age":32},
{"id":"03","name":"harry","age":44},
{"id":"04","name":"harry","age":21},
{"id":"05","name":"larry","age":66}
]
To produce the following:
new_data = [
{"id":"01","name":"harry","age":21},
{"id":"02","name":"barry","age":32},
{"id":"03","name":"harry","age":44},
{"id":"05","name":"larry","age":66}
]
My current code only works for cases where all fields of the dictionary are identical:
#! /usr/bin/python
for x in old_data:
if x not in new_d:
new_data.append(x)

Build a dict with the significant part of the dict as the key, then turn the values back into a list:
>>> old_data = [
... {"id":"01","name":"harry","age":21},
... {"id":"02","name":"barry","age":32},
... {"id":"03","name":"harry","age":44},
... {"id":"04","name":"harry","age":21},
... {"id":"05","name":"larry","age":66}
...
>>> sorted({(d["name"], d["age"]): d for d in reversed(old_data)}.values(), key=lambda d: d["id"])
[{'id': '01', 'name': 'harry', 'age': 21}, {'id': '02', 'name': 'barry', 'age': 32}, {'id': '03', 'name': 'harry', 'age': 44}, {'id': '05', 'name': 'larry', 'age': 66}]
If you don't care about which specific ids you keep or how they're sorted, it's simpler:
>>> list({(d["name"], d["age"]): d for d in old_data}.values())
[{'id': '04', 'name': 'harry', 'age': 21}, {'id': '02', 'name': 'barry', 'age': 32}, {'id': '03', 'name': 'harry', 'age': 44}, {'id': '05', 'name': 'larry', 'age': 66}]

try this: I ignore id in my comparison
def remove_duplicate(old_data):
new_data = []
for i in old_data:
found=False
for j in new_data:
if (j['name']==i['name']) & (j['age']==i['age']):
found=True
break;
if found==False:
new_data.append(i)
return new_data
old_data = [
{"id":"01","name":"harry","age":21},
{"id":"02","name":"barry","age":32},
{"id":"03","name":"harry","age":44},
{"id":"04","name":"harry","age":21},
{"id":"05","name":"larry","age":66}
]
print(remove_duplicate(old_data))
output:
[{'id': '01', 'name': 'harry', 'age': 21}, {'id': '02', 'name': 'barry', 'age': 32}, {'id': '03', 'name': 'harry', 'age': 44}, {'id': '05', 'name': 'larry', 'age': 66}]

a straight forward solution could be just to keep track of dicts in list.
old_data = [
{"id":"01","name":"harry","age":21},
{"id":"02","name":"barry","age":32},
{"id":"03","name":"harry","age":44},
{"id":"04","name":"harry","age":21},
{"id":"05","name":"larry","age":66}
]
track_list = []
new_data = []
for obj in old_data:
if [obj['name'], obj['age']] in track_list:
continue
else:
track_list.append([obj['name'], obj['age']])
new_data.append(obj)
print(new_data)
output
[{'id': '01', 'name': 'harry', 'age': 21}, {'id': '02', 'name': 'barry', 'age': 32}, {'id': '03', 'name': 'harry', 'age': 44}, {'id': '05', 'name': 'larry', 'age': 66}]

Only hardcoding 'id', not the other keys:
tmp = {}
for d in old_data:
k = frozenset(d.items() - {('id', d['id'])})
tmp.setdefault(k, d)
new_data = list(tmp.values())

How to create an empty list of dictionaries and populate afterwords?

I need to initialize an empty List of Dictionary(LOD) which must have the following keys in it. "id","name","age", "gender". I want to create a loop/nested loop that starts populating the LOD. For poppulating I have a list which has ID's and the rest of the keys are generated using the random function.
The ID list looks like this: id = ['1','2','3']
The result must look something like this.
LOD = [
{
'id': '1',
'name':'122121',
'age':'2131',
'gender':'121'
},
{
'id': '2',
'name':'122121',
'age':'2131',
'gender':'121'
},
{
'id': '3',
'name':'122121',
'age':'2131',
'gender':'121'
},
]

CJDB already does what you want. But if you'd perhaps prefer another approach:
ids = ['1','2','3']
keys = ["name","age", "gender"]
LOD = []
and then populate your list with dictionaries
for i in ids:
your_dictionary = {"id": i}
for key in keys:
your_dictionary[key] = '{}_rnd_function_output'.format(key)
LOD.append(your_dictionary)
And the output would be
>>> LOD
[{'id': '1',
'name': 'name_rnd_function_output',
'age': 'age_rnd_function_output',
'gender': 'gender_rnd_function_output'},
{'id': '2',
'name': 'name_rnd_function_output',
'age': 'age_rnd_function_output',
'gender': 'gender_rnd_function_output'},
{'id': '3',
'name': 'name_rnd_function_output',
'age': 'age_rnd_function_output',
'gender': 'gender_rnd_function_output'}
]
You might consider having a sub-dictionaries within a dictionary. Your ids would be keys for main dictionary and sub-dictionaries would be values.
LOD = {}
for i in ids:
LOD[i] = {}
for key in keys:
LOD[i][key] = '{}_rnd_function_output'.format(key)
And the output
>>> LOD
{'1': {'name': 'name_rnd_function_output',
'age': 'age_rnd_function_output',
'gender': 'gender_rnd_function_output'},
'2': {'name': 'name_rnd_function_output',
'age': 'age_rnd_function_output',
'gender': 'gender_rnd_function_output'},
'3': {'name': 'name_rnd_function_output',
'age': 'age_rnd_function_output',
'gender': 'gender_rnd_function_output'}}

You can use a dictionary-comprehension for this:
ids = ['1','2','3']
LOD = [
{
'id': i,
'name':'122121',
'age':'2131',
'gender':'121'
} for i in ids
]
Output:
>>> LOD
[{'id': '1', 'name': '122121', 'age': '2131', 'gender': '121'},
{'id': '2', 'name': '122121', 'age': '2131', 'gender': '121'},
{'id': '3', 'name': '122121', 'age': '2131', 'gender': '121'}]
Or, using the random module:
import random
ids = ['1','2','3']
LOD = [
{
'id': i,
'name': str(random.randint(100000, 999999)),
'age': str(random.randint(1000, 9999)),
'gender': str(random.randint(100, 999))
} for i in ids
]
Output:
>>> LOD
[{'id': '1', 'name': '727325', 'age': '5367', 'gender': '238'},
{'id': '2', 'name': '316019', 'age': '8963', 'gender': '702'},
{'id': '3', 'name': '464023', 'age': '4324', 'gender': '155'}]
Note that you should not use id as a variable name as it shadows the builtin python id object.

You can do it by initializing dict objects in list comprehensions
keys = ['id', 'name', 'age', 'gender']
ids = ['1', '2', '3']
LOD = [dict((key, i if key == 'id' else random.randint(1, 100)) for key in keys) for i in ids]
print(LOD)
'''
[{'id': '1', 'name': 34, 'age': 10, 'gender': 57},
{'id': '2', 'name': 64, 'age': 13, 'gender': 21},
{'id': '3', 'name': 11, 'age': 17, 'gender': 2}]
'''

Remove duplicates from list of dictionaries within list of dictionaries

I have list:
my_list = [{'date': '10.06.2016',
'account': [{'name': 'a'},
{'name': 'a'},
{'name': 'b'},
{'name': 'b'}]},
{'date': '22.06.2016',
'account': [{'name': 'a'},
{'name': 'a'}]}]
I want to remove duplicates from the list of dictionaries in 'account':
my_list = [{'date': '10.06.2016',
'account': [{'name': 'a'},
{'name': 'b'}]},
{'date': '22.06.2016',
'account': [{'name': 'a'}]}]
When using set, I get the following error:
TypeError: unhashable type: 'dict'
Can anybody help me with this problem?

This structure is probably over complicated, but it gets the job done.
my_list = [{'date': '10.06.2016',
'account': [{'name': 'a'},
{'name': 'a'},
{'name': 'b'},
{'name': 'b'}]},
{'date': '22.06.2016',
'account': [{'name': 'a'},
{'name': 'a'}]}]
>>> [{'date': date,
'account': [{'name': name} for name in group]
} for group, date in zip([set(account.get('name')
for account in item.get('account'))
for item in my_list],
[d.get('date') for d in my_list])]
[{'account': [{'name': 'a'}, {'name': 'b'}], 'date': '10.06.2016'},
{'account': [{'name': 'a'}], 'date': '22.06.2016'}]

def deduplicate_account_names(l):
for d in l:
names = set(map(lambda d: d.get('name'), d['account']))
d['account'] = [{'name': name} for name in names]
# even shorter:
# def deduplicate_account_names(l):
# for d in l:
# d['account'] = [{'name': name} for name in set(map(lambda d: d.get('name'), d['account']))]
my_list = [{'date': '10.06.2016',
'account': [{'name': 'a'},
{'name': 'a'},
{'name': 'b'},
{'name': 'b'}]},
{'date': '22.06.2016',
'account': [{'name': 'a'},
{'name': 'a'}]}]
deduplicate_account_names(my_list)
print(my_list)
# [ {'date': '10.06.2016',
# 'account': [ {'name': 'a'},
# {'name': 'b'} ] },
# {'date': '22.06.2016',
# 'account': [ {'name': 'a'} ] } ]

Sets can only have hashable members and neither lists nor dicts are - but they can be checked for equality.
you can do
def without_duplicates(inlist):
outlist=[]
for e in inlist:
if e not in outlist:
outlist.append(e)
return outlist
this can be slow for really big lists

Give this code a try:
for d in my_list:
for k in d:
if k == 'account':
v = []
for d2 in d[k]:
if d2 not in v:
v.append(d2)
d[k] = v
This is what you get after running the snippet above:
In [347]: my_list
Out[347]:
[{'account': [{'name': 'a'}, {'name': 'b'}], 'date': '10.06.2016'},
{'account': [{'name': 'a'}], 'date': '22.06.2016'}]

How to remove a json string from list in python

I have two list with particular data I would like to merge them into a single list with out duplicates.
list1 =[{"id": "123","Name": "Sam", "Age": 10},{"id": "124","Name": "Ajay", "Age": 10}]
list2 =[{"id": "123","Name": "Sam"},{"id": "124","Name": "Ajay"},{"id": "125","Name": "Ram"}]
The output list should be like this
output= [{"id": "123","Name": "Sam", "Age": 10},{"id": "124","Name": "Ajay", "Age": 10},{"id": "125","Name": "Ram"}]

Presumably it is the id key that uniquely identifies the information. If so, collect all the info from the two lists in a dictionary, then produce a new list from that:
from itertools import chain
per_id = {}
for info in chain(list1, list2):
per_id.setdefault(info['id'], {}).update(info)
output = list(per_id.values()) # Python 2 and 3 compatible
Demo:
>>> from itertools import chain
>>> list1 = [{'Age': 10, 'id': '123', 'Name': 'Sam'}, {'Age': 10, 'id': '124', 'Name': 'Ajay'}]
>>> list2 = [{'id': '123', 'Name': 'Sam'}, {'id': '124', 'Name': 'Ajay'}, {'id': '125', 'Name': 'Ram'}]
>>> per_id = {}
>>> for info in chain(list1, list2):
... per_id.setdefault(info['id'], {}).update(info)
...
>>> list(per_id.values())
[{'Age': 10, 'id': '123', 'Name': 'Sam'}, {'Age': 10, 'id': '124', 'Name': 'Ajay'}, {'id': '125', 'Name': 'Ram'}]

How to combine values in python list of dictionaries

I have a list of dictionaries that look like this:
l = [{'name': 'john', 'amount': 50}, {'name': 'al', 'amount': 20}, {'name': 'john', 'amount': 80}]
is there any way to combine/merge the matching name values dictionaries and sum the amount also?

You can use a collections.Counter() object to map names to amounts, summing them as you go along:
from collections import Counter
summed = Counter()
for d in l:
summed[d['name']] += d['amount']
result = [{'name': name, 'amount': amount} for name, amount in summed.most_common()]
The result is then also sorted by amount (highest first):
>>> from collections import Counter
>>> l = [{'name': 'john', 'amount': 50}, {'name': 'al', 'amount': 20}, {'name': 'john', 'amount': 80}]
>>> summed = Counter()
>>> for d in l:
... summed[d['name']] += d['amount']
...
>>> summed
Counter({'john': 130, 'al': 20})
>>> [{'name': name, 'amount': amount} for name, amount in summed.most_common()]
[{'amount': 130, 'name': 'john'}, {'amount': 20, 'name': 'al'}]

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

maintain dictionary structure while reducing nested dictionary - python

Related

Remove duplicate dict based on field values

How to create an empty list of dictionaries and populate afterwords?

Remove duplicates from list of dictionaries within list of dictionaries

How to remove a json string from list in python

How to combine values in python list of dictionaries

Categories

Resources