Create nested dictionaries of arbitrary length - python

I have the following problem. I have a nested dictionary like the following
A = {'A':{'STATES':['0','1','2']}, 'B':{'STATES':['10','20']}}
What I want to build eventually is a nested dictionary with all the possible combinations of STATES. So for a known number of keys in A is trivial as this
_dict = {}
for s in A['A']['STATES']:
if s not in _dict:
_dict[s] = {}
for s1 in A['B']['STATES']:
_dict[s][s1] = 0
This gives
{'0': {'10': 0, '20': 0}, '1': {'10': 0, '20': 0}, '2': {'10': 0, '20': 0}}
which is what I want. However I do not know the number of keys in A beforehand. What it would be an efficient solution to to the same with an arbitrary number of elements in A?
EDIT
For instance with three elements I would have
{'0': {'10': {'100':0}, '20': {'100':0}, '1': {'10': {'100':0}, '20': {'100':0}, '2': {'10': {'100':0}, '20': {'100':0}}

This problem is a little complex, but it can be splitted up in three parts:
Parse all the values, mapping a list to every valid key.
Get the list of all the combinations in the order of dictionary insertion.
Translate the list of tuples into a nested dictionary, looping over the values inside the tuple itself - because we don't know its length.
import itertools
A = {'A':{'STATES':['0','1','2']}, 'B':{'STATES':['10','20']}, 'C':{'STATES':['100']}}
# 1. get your dictionary A, but reduced, so that
# for every key you have a list of states if the key "STATES" exists
Ared = {k: A[k]["STATES"] for k in A if A[k].get("STATES")}
print(Ared) # {'A': ['0', '1', '2'], 'B': ['10', '20'], 'C': ['100']}
# 2. get all the combinations
combs = list(itertools.product(*Ared.values()))
print(combs) # [('0', '10', '100'), ('0', '20', '100'), ('1', '10', '100'), ('1', '20', '100'), ('2', '10', '100'), ('2', '20', '100')]
# 3. translate them into a nested dictionary
d = dict()
for comb in combs:
old_dict = d
for i, key in enumerate(comb):
if i == len(comb) - 1:
old_dict[key] = 0
elif not old_dict.get(key):
old_dict[key] = {}
old_dict = old_dict[key]
print(d) # {'0': {'10': {'100': 0}, '20': {'100': 0}}, '1': {'10': {'100': 0}, '20': {'100': 0}}, '2': {'10': {'100': 0}, '20': {'100': 0}}}

You can use recursion:
A = {'A':{'STATES':['0','1','2']}, 'B':{'STATES':['10','20']}}
def combos(d):
return 0 if not d else {i:combos(d[1:]) for i in d[0]}
print(combos([j['STATES'] for j in A.values()]))
Output:
{'0': {'10': 0, '20': 0}, '1': {'10': 0, '20': 0}, '2': {'10': 0, '20': 0}}
With more than two keys:
A = {'A':{'STATES':['0','1','2']}, 'B':{'STATES':['10','20']}, 'C':{'STATES':['100']}}
print(combos([j['STATES'] for j in A.values()]))
Output:
{'0': {'10': {'100': 0}, '20': {'100': 0}}, '1': {'10': {'100': 0}, '20': {'100': 0}}, '2': {'10': {'100': 0}, '20': {'100': 0}}}

Related

pythonic way to break a dict which value is a list to several dict

description
I need to break a dict which value is a list to several dict, keep the other parts.
The key I want to break may have different name,but the cond only and always get one value which type is list.
example
input
cond = {"type":"image","questionType":["3","4","5"]}
cond = {"type":"example","fieldToBreak":["1","2","3"],"fieldInt":1,"fieldFloat":0.1}
output
[
{'type': 'image', 'questionType': '3'},
{'type': 'image', 'questionType': '4'},
{'type': 'image', 'questionType': '5'}
]
[
{'type': 'example', 'fieldToBreak': '1', 'fieldInt': 1, 'fieldFloat': 0.1},
{'type': 'example', 'fieldToBreak': '2', 'fieldInt': 1, 'fieldFloat': 0.1},
{'type': 'example', 'fieldToBreak': '3', 'fieldInt': 1, 'fieldFloat': 0.1}
]
what I have tried
cond_queue = []
for k,v in cond.items():
if isinstance(v,list):
for ele in v:
cond_copy = cond.copy()
cond_copy[k] = ele
cond_queue.append(cond_copy)
break
It works, but I think it is not the best pythonic solution.
question:
Any better pythonic solution?
Possible approach utilizing python's built-in functions and standard library. The code should work with any number of keys. It creates all combinations of values' elements in case of multiple lists presented in the original dict. Not sure if this logic a correct one.
import itertools
def dict_to_inflated_list(d):
ans, keys, vals = list(), list(), list()
# copy keys and 'listified' values in the same order
for k, v in d.items():
keys.append(k)
vals.append(v if isinstance(v, list) else [v])
# iterate over all possible combinations of elements of all 'listified' values
for combination in itertools.product(*vals):
ans.append({k: v for k, v in zip(keys, combination)})
return ans
if __name__ == '__main__':
cond = {'type': 'image', 'questionType': ['3', '4', '5']}
print(dict_to_inflated_list(cond))
cond = {'a': 0, 'b': [1, 2], 'c': [10, 20]}
print(dict_to_inflated_list(cond))
Output:
[{'type': 'image', 'questionType': '3'}, {'type': 'image', 'questionType': '4'}, {'type': 'image', 'questionType': '5'}]
[{'a': 0, 'b': 1, 'c': 10}, {'a': 0, 'b': 1, 'c': 20}, {'a': 0, 'b': 2, 'c': 10}, {'a': 0, 'b': 2, 'c': 20}]
something like the below (the solution is based on the input from the post which I assume represents the general case)
cond = {"type": "image", "questionType": ["3", "4", "5"]}
data = [{"type": "image", "questionType": e} for e in cond['questionType']]
print(data)
output
[{'type': 'image', 'questionType': '3'}, {'type': 'image', 'questionType': '4'}, {'type': 'image', 'questionType': '5'}]
This little function does the job without any extra argument except the input dictionary
def unpack_dict(d):
n = [len(v) for k,v in d.items() if type(v) is list][0] #number of items in the list
r = []
for i in range(n):
_d = {}
for k,v in d.items():
if type(v) is list:
_d[k] = v[i]
else:
_d[k] = v
r.append(_d)
return r
cond = {"type":"example","fieldToBreak":["1","2","3"],"fieldInt":1,"fieldFloat":0.1}
unpack_dict(cond)
[{'type': 'example', 'fieldToBreak': '1', 'fieldInt': 1, 'fieldFloat': 0.1},
{'type': 'example', 'fieldToBreak': '2', 'fieldInt': 1, 'fieldFloat': 0.1},
{'type': 'example', 'fieldToBreak': '3', 'fieldInt': 1, 'fieldFloat': 0.1}]
The function determines how many items (n) there are in the list entry and uses that info to extract the right value to be inserted in the dictionary. Looping over n (for i in range(n):) is used to append the correct number of dictionaries in the final output. That's it. Quite simple to read and understand.
try this:
lens = 0
for index, item in enumerate(cond):
if isinstance(cond[item], list):
lens = len(cond[item])
idx = index
break
print([{k : v if i!=idx else v[j] for i,(k,v) in enumerate(cond.items()) } for j in range(lens)])
output:
# cond = {"type":"image","questionType":["3","4","5"]}
[{'type': 'image', 'questionType': '3'},
{'type': 'image', 'questionType': '4'},
{'type': 'image', 'questionType': '5'}]
# cond = {"type":"example","fieldToBreak":["1","2","3"],"fieldInt":1,"fieldFloat":0.1}
[{'type': 'example', 'fieldToBreak': '1', 'fieldInt': 1, 'fieldFloat': 0.1},
{'type': 'example', 'fieldToBreak': '2', 'fieldInt': 1, 'fieldFloat': 0.1},
{'type': 'example', 'fieldToBreak': '3', 'fieldInt': 1, 'fieldFloat': 0.1}]
if dict have another shape:
# cond = {"questionType":["3","4","5"], "type":"image"}
[{'questionType': '3', 'type': 'image'},
{'questionType': '4', 'type': 'image'},
{'questionType': '5', 'type': 'image'}]

Create a dictionary of dictionaries with string numbers

I want to create a dictionary of dictionaries with a list of numbers in the last one. I'm adding the numbers 1 - 254 as strings to a dictionary so the number is the key and the value will be another dictionary with the keys as 1 - 254 as strings again and finally a list as the value with the numbers 1 - 254 as strings. I cant figure out how to do this. There will be a total of around 16 million elements when its all been executed. I can't figure this out.
def fill_ip_list(dict_list):
""" Create an IP list of lists approximately 15 million numbers from 1 - 255, three lists deep. """
ips = []
count = 0
print("Populating the IP store. Please wait...")
for octet_one in dict_list:
ips.append({str(octet_one["prefix"]): {}})
for octet_two in octet_one:
for x in range(1, 254 + 1):
octet_one.update({x: {}})
print(ips)
Output:
[{'1': {}}, {'2': {}}, {'3': {}}, {'4': {}}, {'5': {}}, {'7': {}}...
Expected output:
[{'1': {'1': '1'}, {'2': '2'}, {'3': '3'}...}]
[{'2': {'1': '1'}, {'2': '2'}, {'3': '3'}...}]
[{'3': {'1': '1'}, {'2': '2'}, {'3': '3'}...}]
[{'4': {'1': '1'}, {'2': '2'}, {'3': '3'}...}]
.
.
.

Count values in dict of pandas series

I have a pandas series of dicts like this:
print(df['genres'])
0 {'0': '1', '1': '4', '2': '23'}
1 {'0': '1', '1': '25', '2': '4', '3': '37'}
2 {'0': '9'}
print(type(df['genres']))
<class 'pandas.core.series.Series'>
print(type(df['genres'][0]))
<class 'dict'>
I want to count the values to get something like this:
{'1': 2, '4': 2, '9': 1, '23': 1, '25': 1, '37': 1}
I tried the following:
print(Counter(chain.from_iterable(df.genres.values)))
Counter({'0': 3, '1': 2, '2': 2, '3': 1})
print(pd.Series(df['genres']).value_counts())
{'0': '1', '1': '4', '2': '23'} 1
{'0': '1', '1': '25', '2': '4', '3': '37'} 1
{'0': '9'} 1
I think it is pretty easy for someone more experienced than me. But I really don't get it ...
Try:
pd.DataFrame(list(df.genres)).stack().value_counts().to_dict()
Output:
{'1': 2, '4': 2, '37': 1, '9': 1, '23': 1, '25': 1}

Python - Sum the value in the list of dictionary based on the same key

I have a list of dictionaries which looks like:
data = [{'stat3': '5', 'stat2': '4', 'player': '1'},
{'stat3': '8', 'stat2': '1', 'player': '1'},
{'stat3': '6', 'stat2': '1', 'player': '3'},
{'stat3': '3', 'stat2': '7', 'player': '3'}]
And I want to get a nested dictionary whose keys are the value from the key('player') and whose values are dictionaries of aggregated stats.
The output should:
{'3': {'stat3': 9, 'stat2': 8, 'player': '3'},
'1': {'stat3': 13, 'stat2': 5, 'player': '1'}}
The following is my code:
from collections import defaultdict
result = {}
total_stat = defaultdict(int)
for dict in data:
total_stat[dict['player']] += int(dict['stat3'])
total_stat[dict['player']] += int(dict['stat2'])
total_stat = ([{'player': info, 'stat3': total_stat[info],
'stat2': total_stat[info]} for info in
sorted(total_stat, reverse=True)])
for item in total_stat:
result.update({item['player']: item})
print(result)
However, I got this:
{'3': {'player': '3', 'stat3': 17, 'stat2': 17},
'1': {'player': '1', 'stat3': 18, 'stat2': 18}}
How could I make it right? Or are there other approaches?
Your data is rather a DataFrame, a natural pandas solution is :
In [34]: pd.DataFrame.from_records(data).astype(int).groupby('player').sum().T.to_dict()
Out[34]: {1: {'stat2': 5, 'stat3': 13}, 3: {'stat2': 8, 'stat3': 9}}
Just use a more nested default-factory:
>>> total_stat = defaultdict(lambda : defaultdict(int))
>>> value_fields = 'stat2', 'stat3'
>>> for datum in data:
... player_data = total_stat[datum['player']]
... for k in value_fields:
... player_data[k] += int(datum[k])
...
>>> from pprint import pprint
>>> pprint(total_stat)
defaultdict(<function <lambda> at 0x1023490d0>,
{'1': defaultdict(<class 'int'>, {'stat2': 5, 'stat3': 13}),
'3': defaultdict(<class 'int'>, {'stat2': 8, 'stat3': 9})})
This solution use a nested dictionary. The out is a {player: Counter} dictionary, where as Counter itself is another dictionary {stat: score}
import collections
def split_player_stat(dict_object):
"""
Split a row of data into player, stat
>>> split_player_stat({'stat3': '5', 'stat2': '4', 'player': '1'})
'1', {'stat3': 5, 'stat2': 4}
"""
key = dict_object['player']
value = {k: int(v) for k, v in dict_object.items() if k != 'player'}
return key, value
data = [{'stat3': '5', 'stat2': '4', 'player': '1'},
{'stat3': '8', 'stat2': '1', 'player': '1'},
{'stat3': '6', 'stat2': '1', 'player': '3'},
{'stat3': '3', 'stat2': '7', 'player': '3'}]
out = collections.defaultdict(collections.Counter)
for player_stat in data:
player, stat = split_player_stat(player_stat)
out[player].update(stat)
print(out)
The magic of this solution is done by the collections.defaultdict and collections.Counter classes, both behaves like dictionaries.
Not the best code, nor the more pythonic, but I think you should be able to walk through it and figure out where your code went wrong.
def sum_stats_by_player(data):
result = {}
for dictionary in data:
print(f"evaluating dictionary {dictionary}")
player = dictionary["player"]
stat3 = int(dictionary["stat3"])
stat2 = int(dictionary["stat2"])
# if the player isn't in our result
if player not in result:
print(f"\tfirst time player {player}")
result[player] = {} # add the player as an empty dictionary
result[player]["player"] = player
if "stat3" not in result[player]:
print(f"\tfirst time stat3 {stat3}")
result[player]["stat3"] = stat3
else:
print(f"\tupdating stat3 { result[player]['stat3'] + stat3}")
result[player]["stat3"] += stat3
if "stat2" not in result[player]:
print(f"\tfirst time stat2 {stat2}")
result[player]["stat2"] = stat2
else:
print(f"\tupdating stat2 { result[player]['stat2'] + stat2}")
result[player]["stat2"] += stat2
return result
data = [{'stat3': '5', 'stat2': '4', 'player': '1'},
{'stat3': '8', 'stat2': '1', 'player': '1'},
{'stat3': '6', 'stat2': '1', 'player': '3'},
{'stat3': '3', 'stat2': '7', 'player': '3'}]
print(sum_stats_by_player(data))
Most of the solution here are making the problem too complex. Let's make it simple and more readable. Here you go:
In [26]: result = {}
In [27]: req_key = 'player'
In [29]: for dct in data:
...: player_val = dct.pop(req_key)
...: result.setdefault(player_val, {req_key: player_val})
...: for k, v in dct.items():
...: result[player_val][k] = result[player_val].get(k, 0) + int(v)
In [30]: result
Out[30]:
{'1': {'player': '1', 'stat2': 5, 'stat3': 13},
'3': {'player': '3', 'stat2': 8, 'stat3': 9}}
Here you go simple and clean. For this simple problem no need of imports. Now coming to the program:
result.setdefault(player_val, {'player': player_val})
It sets the default value as "player": 3 or "player": 1 if there is no such key in the result.
result[player_val][k] = result[player_val].get(k, 0) + int(v)
This adds up the value for keys with common values.
Another version using Counter
import itertools
from collections import Counter
def count_group(group):
c = Counter()
for g in group:
g_i = dict([(k, int(v)) for k, v in g.items() if k != 'player'])
c.update(g_i)
return dict(c)
sorted_data = sorted(data, key=lambda x:x['player'])
results = [(k, count_group(g)) for k, g in itertools.groupby(sorted_data, lambda x: x['player'])]
print(results)
To give
[('1', {'stat3': 13, 'stat2': 5}), ('3', {'stat3': 9, 'stat2': 8})]
Two loops would allow you to:
group your data by a primary key
aggregate all secondary information
These two tasks are accomplished in the aggregate_statistics function shown below.
from collections import Counter
from pprint import pprint
def main():
data = [{'player': 1, 'stat2': 4, 'stat3': 5},
{'player': 1, 'stat2': 1, 'stat3': 8},
{'player': 3, 'stat2': 1, 'stat3': 6},
{'player': 3, 'stat2': 7, 'stat3': 3}]
new_data = aggregate_statistics(data, 'player')
pprint(new_data)
def aggregate_statistics(table, key):
records_by_key = {}
for record in table:
data = record.copy()
records_by_key.setdefault(data.pop(key), []).append(Counter(data))
new_data = []
for second_key, value in records_by_key.items():
start, *remaining = value
for record in remaining:
start.update(record)
new_data.append(dict(start, **{key: second_key}))
return new_data
if __name__ == '__main__':
main()

Dynamically join of dictionary

a="90342"
# this used to generate a dict of each char in a and it indexs
modchar=[{i:a.index(i)} for i in a ]
#modchar=[{'9': 0}, {'0': 1}, {'3': 2}, {'4': 3}, {'2': 4}]
# below produce combination now this combination
def combination(x,n):
return list(itertools.combinations(x,n))
combination(modchar,1)
#output [({'9': 0},), ({'0': 1},), ({'3': 2},), ({'4': 3},), ({'2': 4},)]
combination(modchar,2)
#output [({'9': 0}, {'0': 1}), ({'9': 0}, {'3': 2}), ({'9': 0}, {'4': 3}), ({'9': 0}, {'2': 4}), ({'0': 1}, {'3': 2}), ({'0': 1}, {'4': 3}), ({'0': 1}, {'2': 4}), ({'3': 2}, {'4': 3}), ({'3': 2}, {'2': 4}), ({'4': 3}, {'2': 4})]
combination(modchar,3)
#output [({'9': 0}, {'0': 1}, {'3': 2}), ({'9': 0}, {'0': 1}, {'4': 3}), ({'9': 0}, {'0': 1}, {'2': 4}), ({'9': 0}, {'3': 2}, {'4': 3}),....]
if u look at each result in the list first element is tuple of dict.what i want to do is to combine the dictionary inside the tuple and make it as single dict
i have tried
map(lambda x:dict(x[0],**x[1]),list(itertools.combinations(x,n)))
above works only for tuple of two dicts.
how can i produce a code dynamically it should combine all dicts and produce single dict irrespictive of n value in combination(x,n)
expected output: for n=2
[({'9': 0,'0': 1}) ....]
expected output: for n=3
[({'9': 0,'0': 1,'3': 2})..]
Here's a way to do it:
combos = combinations(modchar,3)
def combineDictTuple(dt):
d = {}
for item in dt:
d.update(item)
return d
newCombos = [combineDictTuple(dt) for dt in combos]
# OUTPUT: [{'9': 0, '0': 1, '3': 2}, {'9': 0, '0': 1, '4': 3}, {'9': 0, '0': 1, '2': 4}, {'9': 0, '3': 2, '4': 3}, {'9': 0, '3': 2, '2': 4}, {'9': 0, '2': 4, '4': 3}, {'0': 1, '3': 2, '4': 3}, {'0': 1, '3': 2, '2': 4}, {'0': 1, '2': 4, '4': 3}, {'3': 2, '2': 4, '4': 3}]
This should do what you want:
>>> def update_with_return(d1, d2):
... d1.update(d2)
... return d1
...
>>> reduce(update_with_return, ({'a': 1}, {'b':2}), dict())
{'a': 1, 'b': 2}

Categories