Get the number of dict values in a nested dict - python

I have the following json object:
[{
'firstname': 'Jimmie',
'lastname': 'Barninger',
'zip_code': 12345,
'colors': ['2014-01-01', '2015-01-01'],
'ids': {
'44': 'OK',
'51': 'OK'
},
'address': {
'state': 'MI',
'town': 'Dearborn'
},
'other': {
'ids': {
'1': 'OK',
'103': 'OK'
},
}
}, {
'firstname': 'John',
'lastname': 'Doe',
'zip_code': 90027,
'colors': None,
'ids': {
'91': 'OK',
'103': 'OK'
},
'address': {
'state': 'CA',
'town': 'Los Angeles'
},
'other': {
'ids': {
'91': 'OK',
'103': 'OK'
},
}
}]
I would like to be able to get the number of unique key values that each dict has. In the above, the number would be:
address: 2 # ['state', 'town']
ids: 4 # ['44', '51', '91', '103']
other.ids 3 # ['1', '103', '91']
I've been having trouble iterating of the objects to figure this out, especially if there is an item within a list. What I've been trying thus far is something like the below, though it doesn't currently work I'm pasting it for reference:
def count_per_key(obj, _c=None):
if _c is None: unique_values_per_key = {}
if isinstance(obj, list):
return [count_per_key(l) for l in obj]
elif not isinstance(obj, dict):
pass
else:
for key, value in obj.items():
if not isinstance(value, dict):
continue
elif isinstance(value, dict):
if key not in unique_values_per_key: unique_values_per_key[key] = set()
unique_values_per_key[key].union(set(value.keys()))
return count_per_key(value)
elif isinstance(value, list):
return [count_per_key(o) for o in value]
return unique_values_per_key

You can use recursion with a generator:
from collections import defaultdict
d = [{'firstname': 'Jimmie', 'lastname': 'Barninger', 'zip_code': 12345, 'colors': ['2014-01-01', '2015-01-01'], 'ids': {'44': 'OK', '51': 'OK'}, 'address': {'state': 'MI', 'town': 'Dearborn'}, 'other': {'ids': {'1': 'OK', '103': 'OK'}}}, {'firstname': 'John', 'lastname': 'Doe', 'zip_code': 90027, 'colors': None, 'ids': {'91': 'OK', '103': 'OK'}, 'address': {'state': 'CA', 'town': 'Los Angeles'}, 'other': {'ids': {'91': 'OK', '103': 'OK'}}}]
def get_vals(d, _path = []):
for a, b in getattr(d, 'items', lambda :{})():
if a in {'ids', 'address'}:
yield ['.'.join(_path+[a]), list(b.keys())]
else:
yield from get_vals(b, _path+[a])
c = defaultdict(list)
results = [i for b in d for i in get_vals(b)]
for a, b in results:
c[a].extend(b)
_r = [[a, set(list(b))] for a, b in c.items()]
new_r = [[a, b, len(b)] for a, b in _r]
Output:
[
['ids', {'91', '44', '51', '103'}, 4],
['address', {'state', 'town'}, 2],
['other.ids', {'1', '91', '103'}, 3]
]

l= [{'firstname': 'Jimmie', 'lastname': 'Barninger', 'zip_code': 12345, 'colors': ['2014-01-01', '2015-01-01'], 'ids': {'44': 'OK', '51': 'OK'}, 'address': {'state': 'MI', 'town': 'Dearborn'}, 'other': {'ids': {'1': 'OK', '103': 'OK'}}}, {'firstname': 'John', 'lastname': 'Doe', 'zip_code': 90027, 'colors': None, 'ids': {'91': 'OK', '103': 'OK'}, 'address': {'state': 'CA', 'town': 'Los Angeles'}, 'other': {'ids': {'91': 'OK', '103': 'OK'}}}]
def find_dicts(d,parent=''):
for k,v in d.items():
if isinstance(v,dict):
if parent is not '':
identifier=str(parent)+'.'+str(k)
else:
identifier=str(k)
yield {identifier:[x for x in v.keys()]}
yield from find_dicts(v,k)
else:
pass
s=[list(find_dicts(d)) for d in l]
dict_names=[list(y.keys())[0] for y in s[0]]
final_dict={name:[] for name in dict_names}
for li in s:
for di in li:
di_key=list(di.keys())[0]
di_values=list(di.values())[0]
for k,v in final_dict.items():
if k == di_key:
for value in di_values:
if value not in final_dict[k]:
final_dict[k].append(value)
for k,v in final_dict.items():
print(k,":",len(v),v)
Output
ids : 4 ['44', '51', '91', '103']
address : 2 ['town', 'state']
other.ids : 3 ['103', '1', '91']
other : 1 ['ids']

Related

maintain dictionary structure while reducing nested dictionary

I have a list of pairs of nested dict dd and would like to maintain the structure to a list of dictionaries:
dd = [
[{'id': 'bla',
'detail': [{'name': 'discard', 'amount': '123'},
{'name': 'KEEP_PAIR_1A', 'amount': '2'}]},
{'id': 'bla2',
'detail': [{'name': 'discard', 'amount': '123'},
{'name': 'KEEP_PAIR_1B', 'amount': '1'}]}
],
[{'id': 'bla3',
'detail': [{'name': 'discard', 'amount': '123'},
{'name': 'KEEP_PAIR_2A', 'amount': '3'}]},
{'id': 'bla4',
'detail': [{'name': 'discard', 'amount': '123'},
{'name': 'KEEP_PAIR_2B', 'amount': '4'}]}
]
]
I want to reduce this to a list of paired dictionaries while extracting only some detail. For example, an expected output may look like this:
[{'name': ['KEEP_PAIR_1A', 'KEEP_PAIR_1B'], 'amount': [2, 1]},
{'name': ['KEEP_PAIR_2A', 'KEEP_PAIR_2B'], 'amount': [3, 4]}]
I have run my code:
pair=[]
for all_pairs in dd:
for output_pairs in all_pairs:
for d in output_pairs.get('detail'):
if d['name'] != 'discard':
pair.append(d)
output_pair = {
k: [d.get(k) for d in pair]
for k in set().union(*pair)
}
But it didn't maintain that structure :
{'name': ['KEEP_PAIR_1A', 'KEEP_PAIR_1B', 'KEEP_PAIR_2A', 'KEEP_PAIR_2B'],
'amount': ['2', '1', '3', '4']}
I assume I would need to use some list comprehension to solve this but where in the for loop should I do that to maintain the structure.
Since you want to combine dictionaries in lists, one option is to use dict.setdefault:
pair = []
for all_pairs in dd:
dct = {}
for output_pairs in all_pairs:
for d in output_pairs.get('detail'):
if d['name'] != 'discard':
for k,v in d.items():
dct.setdefault(k, []).append(v)
pair.append(dct)
Output:
[{'name': ['KEEP_PAIR_1A', 'KEEP_PAIR_1B'], 'amount': [2, 1]},
{'name': ['KEEP_PAIR_2A', 'KEEP_PAIR_2B'], 'amount': [3, 4]}]

Remove duplicate dict based on field values

Given the following list of dicts, I want to remove duplicates where all fields are identical except for the id field.
old_data = [
{"id":"01","name":"harry","age":21},
{"id":"02","name":"barry","age":32},
{"id":"03","name":"harry","age":44},
{"id":"04","name":"harry","age":21},
{"id":"05","name":"larry","age":66}
]
To produce the following:
new_data = [
{"id":"01","name":"harry","age":21},
{"id":"02","name":"barry","age":32},
{"id":"03","name":"harry","age":44},
{"id":"05","name":"larry","age":66}
]
My current code only works for cases where all fields of the dictionary are identical:
#! /usr/bin/python
for x in old_data:
if x not in new_d:
new_data.append(x)
Build a dict with the significant part of the dict as the key, then turn the values back into a list:
>>> old_data = [
... {"id":"01","name":"harry","age":21},
... {"id":"02","name":"barry","age":32},
... {"id":"03","name":"harry","age":44},
... {"id":"04","name":"harry","age":21},
... {"id":"05","name":"larry","age":66}
...
>>> sorted({(d["name"], d["age"]): d for d in reversed(old_data)}.values(), key=lambda d: d["id"])
[{'id': '01', 'name': 'harry', 'age': 21}, {'id': '02', 'name': 'barry', 'age': 32}, {'id': '03', 'name': 'harry', 'age': 44}, {'id': '05', 'name': 'larry', 'age': 66}]
If you don't care about which specific ids you keep or how they're sorted, it's simpler:
>>> list({(d["name"], d["age"]): d for d in old_data}.values())
[{'id': '04', 'name': 'harry', 'age': 21}, {'id': '02', 'name': 'barry', 'age': 32}, {'id': '03', 'name': 'harry', 'age': 44}, {'id': '05', 'name': 'larry', 'age': 66}]
try this: I ignore id in my comparison
def remove_duplicate(old_data):
new_data = []
for i in old_data:
found=False
for j in new_data:
if (j['name']==i['name']) & (j['age']==i['age']):
found=True
break;
if found==False:
new_data.append(i)
return new_data
old_data = [
{"id":"01","name":"harry","age":21},
{"id":"02","name":"barry","age":32},
{"id":"03","name":"harry","age":44},
{"id":"04","name":"harry","age":21},
{"id":"05","name":"larry","age":66}
]
print(remove_duplicate(old_data))
output:
[{'id': '01', 'name': 'harry', 'age': 21}, {'id': '02', 'name': 'barry', 'age': 32}, {'id': '03', 'name': 'harry', 'age': 44}, {'id': '05', 'name': 'larry', 'age': 66}]
a straight forward solution could be just to keep track of dicts in list.
old_data = [
{"id":"01","name":"harry","age":21},
{"id":"02","name":"barry","age":32},
{"id":"03","name":"harry","age":44},
{"id":"04","name":"harry","age":21},
{"id":"05","name":"larry","age":66}
]
track_list = []
new_data = []
for obj in old_data:
if [obj['name'], obj['age']] in track_list:
continue
else:
track_list.append([obj['name'], obj['age']])
new_data.append(obj)
print(new_data)
output
[{'id': '01', 'name': 'harry', 'age': 21}, {'id': '02', 'name': 'barry', 'age': 32}, {'id': '03', 'name': 'harry', 'age': 44}, {'id': '05', 'name': 'larry', 'age': 66}]
Only hardcoding 'id', not the other keys:
tmp = {}
for d in old_data:
k = frozenset(d.items() - {('id', d['id'])})
tmp.setdefault(k, d)
new_data = list(tmp.values())

Update dictionary keys inside a list based on another dictionary key value pairs

I have a list which has nested dictionary inside it and also a dictionary with respective key pair values.
I am trying to map the key from dict2 to keys for the dictionary elements inside the list.
list = [{'name': 'Megan', 'Age': '28', 'occupation': 'yes', 'race': 'american', 'children': 'yes'}, {'name': 'Ryan', 'Age': '25', 'occupation': 'no', 'race': 'american', 'intern': 'yes'}]
The respective dictionary which holds the correct keys is
dict_map = {'occupation': 'service', 'intern': 'employee', 'race': 'ethnicity'}
I am new to python so far I am trying to go through stackoverflow pages to get an output tried few as well but not able to get the desired result so far.
The closet I got was with this Python Dictionary: How to update dictionary value, base on key - using separate dictionary keys
The final output should be:
[{'name': 'Megan', 'Age': '28', 'service': 'yes', 'ethnicity': 'american', 'children': 'yes'}, {'name': 'Ryan', 'Age': '25', 'service': 'no', 'ethnicity': 'american', 'employee': 'yes'}]
you could try this:
note that i renamed your list to lst (list is abuilt-in type that you should never overwrite!)
lst = [
{
"name": "Megan",
"Age": "28",
"occupation": "yes",
"race": "american",
"children": "yes",
},
{
"name": "Ryan",
"Age": "25",
"occupation": "no",
"race": "american",
"intern": "yes",
},
]
for dct in lst:
for old_key, new_key in dict_map.items():
if old_key not in dct:
continue
dct[new_key] = dct[old_key]
del dct[old_key]
Using a list comprehension with dict.get
Ex:
lst = [{'name': 'Megan', 'Age': '28', 'occupation': 'yes', 'race': 'american', 'children': 'yes'}, {'name': 'Ryan', 'Age': '25', 'occupation': 'no', 'race': 'american', 'intern': 'yes'}]
dict_map = {'occupation': 'service', 'intern': 'employee', 'race': 'ethnicity'}
result = [{dict_map.get(k, k): v for k, v in i.items()} for i in lst]
print(result)
Output:
[{'Age': '28',
'children': 'yes',
'ethnicity': 'american',
'name': 'Megan',
'service': 'yes'},
{'Age': '25',
'employee': 'yes',
'ethnicity': 'american',
'name': 'Ryan',
'service': 'no'}]

Grouping a list of dictionaries by the same value in Python 3

Given a list of dictionaries:
players= [
{ "name": 'matt', 'school': 'WSU', 'homestate': 'CT', 'position': 'RB' },
{ "name": 'jack', 'school': 'ASU', 'homestate': 'AL', 'position': 'QB' },
{ "name": 'john', 'school': 'WSU', 'homestate': 'MD', 'position': 'LB' },
{ "name": 'kevin', 'school': 'ALU', 'homestate': 'PA', 'position': 'LB' },
{ "name": 'brady', 'school': 'UM', 'homestate': 'CA', 'position': 'QB' },
]
How do I group them into groups by matching their matching dictionary values, such that it spews out:
Matching Value 1:
name: [matt, john, kevin],
school: [WSU, WSU, ALU],
homestate: [CT, MD, PA]
position: [RB, LB, LB]
Matching Value 2:
name: [jack, brady],
school: [ASU, UM],
homestate: [AL, CA]
position: [QB, QB]
Notice that the matching values are arbitrary; that is, it can be found anywhere. Maybe its in school or in position, or maybe in both.
I tried grouping them by doing:
from collections import defaultdict
result_dictionary = {}
for i in players:
for key, value in i.items():
result_dictionary.setdefault(key, []).append(value)
Which gives out:
{'name': ['matt', 'jack', 'john', 'kevin', 'brady'],
'school': ['WSU', 'ASU', 'WSU', 'ALU', 'UM'],
'homestate': ['CT', 'AL', 'MD', 'PA', 'CA'],
'position': ['RB', 'QB', 'LB', 'QB', 'QB']}
But I'm stuck on how do I further manipulate the output to match the required output I stated above, and I am sure there are better, simpler approach in doing it.
Just use collections.defaultdict that you already imported:
In [21]: from collections import defaultdict
...: result = defaultdict(lambda: defaultdict(list))
...: for d in players:
...: for k,v in d.items():
...: result[d['school']][k].append(v)
...:
In [22]: result
Out[22]:
defaultdict(<function __main__.<lambda>>,
{'ASU': defaultdict(list,
{'homestate': ['AL'],
'name': ['jack'],
'position': ['QB'],
'school': ['ASU']}),
'WSU': defaultdict(list,
{'homestate': ['CT', 'MD'],
'name': ['matt', 'john'],
'position': ['RB', 'LB'],
'school': ['WSU', 'WSU']})})
You can find the most common occurring header value and use the latter value as a focal point for further grouping:
import itertools
players= [
{ "name": 'matt', 'school': 'WSU', 'homestate': 'CT', 'position': 'RB' },
{ "name": 'jack', 'school': 'ASU', 'homestate': 'AL', 'position': 'QB' },
{ "name": 'john', 'school': 'WSU', 'homestate': 'MD', 'position': 'LB' },
{ "name": 'kevin', 'school': 'ALU', 'homestate': 'PA', 'position': 'S' },
{ "name": 'brady', 'school': 'UM', 'homestate': 'CA', 'position': 'QB' },
]
headers = ['name', 'school', 'homestate', 'position']
final_header = [[a, max(b, key=lambda x:b.count(x))] for a, b in zip(headers, zip(*[[i[b] for b in headers] for i in players])) if len(set(b)) < len(b)]
d = [[list(b) for _, b in itertools.groupby(filter(lambda x:x[i] == c, players), key=lambda x:x[i])][0] for i, c in final_header]
last_results = {'pattern {}'.format(i):{d[0][0]:[j[-1] for j in d] for c, d in zip(headers, zip(*map(dict.items, h)))} for i, h in enumerate(d, start=1)}
Output:
{'pattern 2':
{'homestate': ['AL', 'CA'],
'school': ['ASU', 'UM'],
'name': ['jack', 'brady'],
'position': ['QB', 'QB']},
'pattern 1':
{'homestate': ['CT', 'MD'],
'school': ['WSU', 'WSU'],
'name': ['matt', 'john'],
'position': ['RB', 'LB']}
}

Remove duplicates from list of dictionaries within list of dictionaries

I have list:
my_list = [{'date': '10.06.2016',
'account': [{'name': 'a'},
{'name': 'a'},
{'name': 'b'},
{'name': 'b'}]},
{'date': '22.06.2016',
'account': [{'name': 'a'},
{'name': 'a'}]}]
I want to remove duplicates from the list of dictionaries in 'account':
my_list = [{'date': '10.06.2016',
'account': [{'name': 'a'},
{'name': 'b'}]},
{'date': '22.06.2016',
'account': [{'name': 'a'}]}]
When using set, I get the following error:
TypeError: unhashable type: 'dict'
Can anybody help me with this problem?
This structure is probably over complicated, but it gets the job done.
my_list = [{'date': '10.06.2016',
'account': [{'name': 'a'},
{'name': 'a'},
{'name': 'b'},
{'name': 'b'}]},
{'date': '22.06.2016',
'account': [{'name': 'a'},
{'name': 'a'}]}]
>>> [{'date': date,
'account': [{'name': name} for name in group]
} for group, date in zip([set(account.get('name')
for account in item.get('account'))
for item in my_list],
[d.get('date') for d in my_list])]
[{'account': [{'name': 'a'}, {'name': 'b'}], 'date': '10.06.2016'},
{'account': [{'name': 'a'}], 'date': '22.06.2016'}]
def deduplicate_account_names(l):
for d in l:
names = set(map(lambda d: d.get('name'), d['account']))
d['account'] = [{'name': name} for name in names]
# even shorter:
# def deduplicate_account_names(l):
# for d in l:
# d['account'] = [{'name': name} for name in set(map(lambda d: d.get('name'), d['account']))]
my_list = [{'date': '10.06.2016',
'account': [{'name': 'a'},
{'name': 'a'},
{'name': 'b'},
{'name': 'b'}]},
{'date': '22.06.2016',
'account': [{'name': 'a'},
{'name': 'a'}]}]
deduplicate_account_names(my_list)
print(my_list)
# [ {'date': '10.06.2016',
# 'account': [ {'name': 'a'},
# {'name': 'b'} ] },
# {'date': '22.06.2016',
# 'account': [ {'name': 'a'} ] } ]
Sets can only have hashable members and neither lists nor dicts are - but they can be checked for equality.
you can do
def without_duplicates(inlist):
outlist=[]
for e in inlist:
if e not in outlist:
outlist.append(e)
return outlist
this can be slow for really big lists
Give this code a try:
for d in my_list:
for k in d:
if k == 'account':
v = []
for d2 in d[k]:
if d2 not in v:
v.append(d2)
d[k] = v
This is what you get after running the snippet above:
In [347]: my_list
Out[347]:
[{'account': [{'name': 'a'}, {'name': 'b'}], 'date': '10.06.2016'},
{'account': [{'name': 'a'}], 'date': '22.06.2016'}]

Categories