How to remove a json string from list in python - python

I have two list with particular data I would like to merge them into a single list with out duplicates.
list1 =[{"id": "123","Name": "Sam", "Age": 10},{"id": "124","Name": "Ajay", "Age": 10}]
list2 =[{"id": "123","Name": "Sam"},{"id": "124","Name": "Ajay"},{"id": "125","Name": "Ram"}]
The output list should be like this
output= [{"id": "123","Name": "Sam", "Age": 10},{"id": "124","Name": "Ajay", "Age": 10},{"id": "125","Name": "Ram"}]

Presumably it is the id key that uniquely identifies the information. If so, collect all the info from the two lists in a dictionary, then produce a new list from that:
from itertools import chain
per_id = {}
for info in chain(list1, list2):
per_id.setdefault(info['id'], {}).update(info)
output = list(per_id.values()) # Python 2 and 3 compatible
Demo:
>>> from itertools import chain
>>> list1 = [{'Age': 10, 'id': '123', 'Name': 'Sam'}, {'Age': 10, 'id': '124', 'Name': 'Ajay'}]
>>> list2 = [{'id': '123', 'Name': 'Sam'}, {'id': '124', 'Name': 'Ajay'}, {'id': '125', 'Name': 'Ram'}]
>>> per_id = {}
>>> for info in chain(list1, list2):
... per_id.setdefault(info['id'], {}).update(info)
...
>>> list(per_id.values())
[{'Age': 10, 'id': '123', 'Name': 'Sam'}, {'Age': 10, 'id': '124', 'Name': 'Ajay'}, {'id': '125', 'Name': 'Ram'}]

Related

maintain dictionary structure while reducing nested dictionary

I have a list of pairs of nested dict dd and would like to maintain the structure to a list of dictionaries:
dd = [
[{'id': 'bla',
'detail': [{'name': 'discard', 'amount': '123'},
{'name': 'KEEP_PAIR_1A', 'amount': '2'}]},
{'id': 'bla2',
'detail': [{'name': 'discard', 'amount': '123'},
{'name': 'KEEP_PAIR_1B', 'amount': '1'}]}
],
[{'id': 'bla3',
'detail': [{'name': 'discard', 'amount': '123'},
{'name': 'KEEP_PAIR_2A', 'amount': '3'}]},
{'id': 'bla4',
'detail': [{'name': 'discard', 'amount': '123'},
{'name': 'KEEP_PAIR_2B', 'amount': '4'}]}
]
]
I want to reduce this to a list of paired dictionaries while extracting only some detail. For example, an expected output may look like this:
[{'name': ['KEEP_PAIR_1A', 'KEEP_PAIR_1B'], 'amount': [2, 1]},
{'name': ['KEEP_PAIR_2A', 'KEEP_PAIR_2B'], 'amount': [3, 4]}]
I have run my code:
pair=[]
for all_pairs in dd:
for output_pairs in all_pairs:
for d in output_pairs.get('detail'):
if d['name'] != 'discard':
pair.append(d)
output_pair = {
k: [d.get(k) for d in pair]
for k in set().union(*pair)
}
But it didn't maintain that structure :
{'name': ['KEEP_PAIR_1A', 'KEEP_PAIR_1B', 'KEEP_PAIR_2A', 'KEEP_PAIR_2B'],
'amount': ['2', '1', '3', '4']}
I assume I would need to use some list comprehension to solve this but where in the for loop should I do that to maintain the structure.
Since you want to combine dictionaries in lists, one option is to use dict.setdefault:
pair = []
for all_pairs in dd:
dct = {}
for output_pairs in all_pairs:
for d in output_pairs.get('detail'):
if d['name'] != 'discard':
for k,v in d.items():
dct.setdefault(k, []).append(v)
pair.append(dct)
Output:
[{'name': ['KEEP_PAIR_1A', 'KEEP_PAIR_1B'], 'amount': [2, 1]},
{'name': ['KEEP_PAIR_2A', 'KEEP_PAIR_2B'], 'amount': [3, 4]}]

Remove duplicate dict based on field values

Given the following list of dicts, I want to remove duplicates where all fields are identical except for the id field.
old_data = [
{"id":"01","name":"harry","age":21},
{"id":"02","name":"barry","age":32},
{"id":"03","name":"harry","age":44},
{"id":"04","name":"harry","age":21},
{"id":"05","name":"larry","age":66}
]
To produce the following:
new_data = [
{"id":"01","name":"harry","age":21},
{"id":"02","name":"barry","age":32},
{"id":"03","name":"harry","age":44},
{"id":"05","name":"larry","age":66}
]
My current code only works for cases where all fields of the dictionary are identical:
#! /usr/bin/python
for x in old_data:
if x not in new_d:
new_data.append(x)
Build a dict with the significant part of the dict as the key, then turn the values back into a list:
>>> old_data = [
... {"id":"01","name":"harry","age":21},
... {"id":"02","name":"barry","age":32},
... {"id":"03","name":"harry","age":44},
... {"id":"04","name":"harry","age":21},
... {"id":"05","name":"larry","age":66}
...
>>> sorted({(d["name"], d["age"]): d for d in reversed(old_data)}.values(), key=lambda d: d["id"])
[{'id': '01', 'name': 'harry', 'age': 21}, {'id': '02', 'name': 'barry', 'age': 32}, {'id': '03', 'name': 'harry', 'age': 44}, {'id': '05', 'name': 'larry', 'age': 66}]
If you don't care about which specific ids you keep or how they're sorted, it's simpler:
>>> list({(d["name"], d["age"]): d for d in old_data}.values())
[{'id': '04', 'name': 'harry', 'age': 21}, {'id': '02', 'name': 'barry', 'age': 32}, {'id': '03', 'name': 'harry', 'age': 44}, {'id': '05', 'name': 'larry', 'age': 66}]
try this: I ignore id in my comparison
def remove_duplicate(old_data):
new_data = []
for i in old_data:
found=False
for j in new_data:
if (j['name']==i['name']) & (j['age']==i['age']):
found=True
break;
if found==False:
new_data.append(i)
return new_data
old_data = [
{"id":"01","name":"harry","age":21},
{"id":"02","name":"barry","age":32},
{"id":"03","name":"harry","age":44},
{"id":"04","name":"harry","age":21},
{"id":"05","name":"larry","age":66}
]
print(remove_duplicate(old_data))
output:
[{'id': '01', 'name': 'harry', 'age': 21}, {'id': '02', 'name': 'barry', 'age': 32}, {'id': '03', 'name': 'harry', 'age': 44}, {'id': '05', 'name': 'larry', 'age': 66}]
a straight forward solution could be just to keep track of dicts in list.
old_data = [
{"id":"01","name":"harry","age":21},
{"id":"02","name":"barry","age":32},
{"id":"03","name":"harry","age":44},
{"id":"04","name":"harry","age":21},
{"id":"05","name":"larry","age":66}
]
track_list = []
new_data = []
for obj in old_data:
if [obj['name'], obj['age']] in track_list:
continue
else:
track_list.append([obj['name'], obj['age']])
new_data.append(obj)
print(new_data)
output
[{'id': '01', 'name': 'harry', 'age': 21}, {'id': '02', 'name': 'barry', 'age': 32}, {'id': '03', 'name': 'harry', 'age': 44}, {'id': '05', 'name': 'larry', 'age': 66}]
Only hardcoding 'id', not the other keys:
tmp = {}
for d in old_data:
k = frozenset(d.items() - {('id', d['id'])})
tmp.setdefault(k, d)
new_data = list(tmp.values())

How to convert list of dict into two lists?

For example:
persons = [{'id': 1, 'name': 'john'}, {'id': 2, 'name': 'mary'}, {'id': 3, 'name': 'tom'}]
I want to get two lists from it:
ids = [1, 2, 3]
names = ['john', 'mary', 'tom']
What I did:
names = [d['name'] for d in persons]
ids = [d['id'] for d in persons]
Is there a better way to do it?
I'd stick with using list comprehension or use #Woodford technique
ids,name = [dcts['id'] for dcts in persons],[dcts['name'] for dcts in persons]
output
[1, 2, 3]
['john', 'mary', 'tom']
What you did works fine. Another way to handle this (not necessarily better, depending on your needs) is to store your data in a more efficient dictionary and pull the names/ids out of it when you need them:
>>> persons = [{'id': 1, 'name': 'john'}, {'id': 2, 'name': 'mary'}, {'id': 3, 'name': 'tom'}]
>>> p2 = {x['id']: x['name'] for x in persons}
>>> p2
{1: 'john', 2: 'mary', 3: 'tom'}
>>> list(p2.keys())
[1, 2, 3]
>>> list(p2.values())
['john', 'mary', 'tom']
You can do it with pandas in a vectorized fashion:
import pandas as pd
persons = [{'id': 1, 'name': 'john'}, {'id': 2, 'name': 'mary'}, {'id': 3, 'name': 'tom'}]
df = pd.DataFrame(persons)
id_list = df.id.tolist() #[1, 2, 3]
name_list = df.name.tolist() #['john', 'mary', 'tom']
An alternative, inspired by this question, is
ids, names = zip(*map(lambda x: x.values(), persons))
that return tuples. If you need lists
ids, names = map(list, zip(*map(lambda x: x.values(), persons)))
It is a little bit slower on my laptop using python3.9 than the accepted answer but it might be useful.
It sounds like you're trying to iterate through the values of your list while unpacking your dictionaries:
persons = [{'id': 1, 'name': 'john'}, {'id': 2, 'name': 'mary'}, {'id': 3, 'name': 'tom'}]
for x in persons:
id, name = x.values()
ids.append(id)
names.append(name)

How to create an empty list of dictionaries and populate afterwords?

I need to initialize an empty List of Dictionary(LOD) which must have the following keys in it. "id","name","age", "gender". I want to create a loop/nested loop that starts populating the LOD. For poppulating I have a list which has ID's and the rest of the keys are generated using the random function.
The ID list looks like this: id = ['1','2','3']
The result must look something like this.
LOD = [
{
'id': '1',
'name':'122121',
'age':'2131',
'gender':'121'
},
{
'id': '2',
'name':'122121',
'age':'2131',
'gender':'121'
},
{
'id': '3',
'name':'122121',
'age':'2131',
'gender':'121'
},
]
CJDB already does what you want. But if you'd perhaps prefer another approach:
ids = ['1','2','3']
keys = ["name","age", "gender"]
LOD = []
and then populate your list with dictionaries
for i in ids:
your_dictionary = {"id": i}
for key in keys:
your_dictionary[key] = '{}_rnd_function_output'.format(key)
LOD.append(your_dictionary)
And the output would be
>>> LOD
[{'id': '1',
'name': 'name_rnd_function_output',
'age': 'age_rnd_function_output',
'gender': 'gender_rnd_function_output'},
{'id': '2',
'name': 'name_rnd_function_output',
'age': 'age_rnd_function_output',
'gender': 'gender_rnd_function_output'},
{'id': '3',
'name': 'name_rnd_function_output',
'age': 'age_rnd_function_output',
'gender': 'gender_rnd_function_output'}
]
You might consider having a sub-dictionaries within a dictionary. Your ids would be keys for main dictionary and sub-dictionaries would be values.
LOD = {}
for i in ids:
LOD[i] = {}
for key in keys:
LOD[i][key] = '{}_rnd_function_output'.format(key)
And the output
>>> LOD
{'1': {'name': 'name_rnd_function_output',
'age': 'age_rnd_function_output',
'gender': 'gender_rnd_function_output'},
'2': {'name': 'name_rnd_function_output',
'age': 'age_rnd_function_output',
'gender': 'gender_rnd_function_output'},
'3': {'name': 'name_rnd_function_output',
'age': 'age_rnd_function_output',
'gender': 'gender_rnd_function_output'}}
You can use a dictionary-comprehension for this:
ids = ['1','2','3']
LOD = [
{
'id': i,
'name':'122121',
'age':'2131',
'gender':'121'
} for i in ids
]
Output:
>>> LOD
[{'id': '1', 'name': '122121', 'age': '2131', 'gender': '121'},
{'id': '2', 'name': '122121', 'age': '2131', 'gender': '121'},
{'id': '3', 'name': '122121', 'age': '2131', 'gender': '121'}]
Or, using the random module:
import random
ids = ['1','2','3']
LOD = [
{
'id': i,
'name': str(random.randint(100000, 999999)),
'age': str(random.randint(1000, 9999)),
'gender': str(random.randint(100, 999))
} for i in ids
]
Output:
>>> LOD
[{'id': '1', 'name': '727325', 'age': '5367', 'gender': '238'},
{'id': '2', 'name': '316019', 'age': '8963', 'gender': '702'},
{'id': '3', 'name': '464023', 'age': '4324', 'gender': '155'}]
Note that you should not use id as a variable name as it shadows the builtin python id object.
You can do it by initializing dict objects in list comprehensions
keys = ['id', 'name', 'age', 'gender']
ids = ['1', '2', '3']
LOD = [dict((key, i if key == 'id' else random.randint(1, 100)) for key in keys) for i in ids]
print(LOD)
'''
[{'id': '1', 'name': 34, 'age': 10, 'gender': 57},
{'id': '2', 'name': 64, 'age': 13, 'gender': 21},
{'id': '3', 'name': 11, 'age': 17, 'gender': 2}]
'''

How to reorder a list in Python based on its content

I have a list of dictionaries in python like this;
l = [{'name': 'John', 'age': 23},
{'name': 'Steve', 'age': 35},
{'name': 'Helen'},
{'name': 'George'},
{'name': 'Jessica', 'age': 23}]
What I am trying to achieve here is reorder the elements of l in such a way that each entry containing the key age move to the end of the list like this;
End result:
l = [{'name': 'Helen'},
{'name': 'George'},
{'name': 'Jessica', 'age': 23},
{'name': 'John', 'age': 23},
{'name': 'Steve', 'age': 35}]
How can I do this?
You can sort the list:
l.sort(key=lambda d: 'age' in d)
The key returns either True or False, based on the presence of the 'age' key; True is sorted after False. Python's sort is stable, leaving the rest of the relative ordering intact.
Demo:
>>> from pprint import pprint
>>> l = [{'name': 'John', 'age': 23},
... {'name': 'Steve', 'age': 35},
... {'name': 'Helen'},
... {'name': 'George'},
... {'name': 'Jessica', 'age': 23}]
>>> l.sort(key=lambda d: 'age' in d)
>>> pprint(l)
[{'name': 'Helen'},
{'name': 'George'},
{'age': 23, 'name': 'John'},
{'age': 35, 'name': 'Steve'},
{'age': 23, 'name': 'Jessica'}]
If you also wanted to sort by age, then retrieve the age value and return a suitable stable sentinel for those entries that do not have an age, but which will be sorted first. float('-inf') will always be sorted before any other number, for example:
l.sort(key=lambda d: d.get('age', float('-inf')))
Again, entries without an age are left in their original relative order:
>>> l.sort(key=lambda d: d.get('age', float('-inf')))
>>> pprint(l)
[{'name': 'Helen'},
{'name': 'George'},
{'age': 23, 'name': 'John'},
{'age': 23, 'name': 'Jessica'},
{'age': 35, 'name': 'Steve'}]

Categories