python itertools groupby return tuple - python

I need to parse the flatten structure and create nested structure using the list of keys provided. I have solved the problem but I am looking for an improvement and I would like to learn what I can change in my code. Can somebody review it and refactor using better knowledge?
src_data = [
{
"key1": "XX",
"key2": "X111",
"key3": "1aa",
"key4": 1
},
{
"key1": "YY",
"key2": "Y111",
"key3": "1bb",
"key4": 11
},
{
"key1": "ZZ",
"key2": "Z111",
"key3": "1cc",
"key4": 2.4
},
{
"key1": "AA",
"key2": "A111",
"key3": "1cc",
"key4": 33333.2122
},
{
"key1": "BB",
"key2": "B111",
"key3": "1bb",
"key4": 2
},
]
this is my code I developed so far creating the final result.
def plant_tree(ll):
master_tree = {}
for i in ll:
tree = master_tree
for n in i:
if n not in tree:
tree[n] = {}
tree = tree[n]
return master_tree
def make_nested_object(tt, var):
elo = lambda l: reduce(lambda x, y: {y: x}, l[::-1], var)
return {'n_path': tt, 'n_structure': elo(tt)}
def getFromDict(dataDict, mapList):
return reduce(operator.getitem, mapList, dataDict)
def set_nested_item(dataDict, mapList, val):
"""Set item in nested dictionary"""
reduce(getitem, mapList[:-1], dataDict)[mapList[-1]] = val
return dataDict
def update_tree(data_tree):
# MAKE NESTED OBJECT
out = (make_nested_object(k, v) for k,v, in res_out.items())
for dd in out:
leaf_data = dd['n_structure']
leaf_path = dd['n_path']
data_tree = set_nested_item(data_tree, leaf_path, getFromDict(leaf_data, leaf_path))
return data_tree
this is the customed itemgeter function from this question
def customed_itemgetter(*args):
# this handles the case when one key is provided
f = itemgetter(*args)
if len(args) > 2:
return f
return lambda obj: (f(obj),)
define the nesting level
nesting_keys = ['key1', 'key3', 'key2']
grouper = customed_itemgetter(*nesting_keys)
ii = groupby(sorted(src_data, key=grouper), grouper)
res_out = {key: [{k:v for k,v in i.items() if k not in nesting_keys} for i in group] for key,group in ii}
#
ll = ([dd[x] for x in nesting_keys] for dd in src_data)
data_tree = plant_tree(ll)
get results
result = update_tree(data_tree)
How can I improve my code?

If the itemgetter [Python-doc] is given a single element, it returns that single element, and does not wrap it in a singleton-tuple.
We can however construct a function for that, like:
from operator import itemgetter
def itemgetter2(*args):
f = itemgetter(*args)
if len(args) > 2:
return f
return lambda obj: (f(obj),)
then we can thus use the new itemgetter2, like:
grouper = itemgetter2(*ll)
ii = groupby(sorted(src_data, key=grouper), grouper)
EDIT: Based on your question however, you want to perform multilevel grouping, we can make a function for that, like:
def multigroup(groups, iterable, index=0):
if len(groups) <= index:
return list(iterable)
else:
f = itemgetter(groups[index])
i1 = index + 1
return {
k: multigroup(groups, vs, index=i1)
for k, vs in groupby(sorted(iterable, key=f), f)
}
For the data_src in the question, this then generates:
>>> multigroup(['a', 'b'], src_data)
{1: {2: [{'a': 1, 'b': 2, 'z': 3}]}, 2: {3: [{'a': 2, 'b': 3, 'e': 2}]}, 4: {3: [{'a': 4, 'x': 3, 'b': 3}]}}
You can post-process the values in the list(..) call however. We can for example generate dictionaries without the elements in the grouping columns:
def multigroup(groups, iterable):
group_set = set(groups)
fs = [itemgetter(group) for group in groups]
def mg(iterable, index=0):
if len(groups) <= index:
return [
{k: v for k, v in item.items() if k not in group_set}
for item in iterable
]
else:
i1 = index + 1
return {
k: mg(vs, index=i1)
for k, vs in groupby(sorted(iterable, key=fs[index]), fs[index])
}
return mg(iterable)
For the given sample input, we get:
>>> multigroup(['a', 'b'], src_data)
{1: {2: [{'z': 3}]}, 2: {3: [{'e': 2}]}, 4: {3: [{'x': 3}]}}
or for the new sample data:
>>> pprint(multigroup(['key1', 'key3', 'key2'], src_data))
{'AA': {'1cc': {'A111': [{'key4': 33333.2122}]}},
'BB': {'1bb': {'B111': [{'key4': 2}]}},
'XX': {'1aa': {'X111': [{'key4': 1}]}},
'YY': {'1bb': {'Y111': [{'key4': 11}]}},
'ZZ': {'1cc': {'Z111': [{'key4': 2.4}]}}}

Related

Get key values for certain fields in JSON response

My json data would look like this:
{
"a":1,
"b":[
{
"c":2,
"d":{
"e":3
},
"f":{
"g":4
},
"h":[
{
"i":5
},
{
"j":6
}
]
}
]
}
Is there a way I can get values for certain fields in the response along with their keys. So from this response, the fields for which I expect values are a, c,e,g,i,j along with the respective keys.
Eg: [a:1,c:2,e:3,g:4,i:5,j:6]. Could this be done?
My response contained something like:
{
"a":1,
"b":[
{
"c":2,
"d":{
"e":3
},
"f":{
"g":4,
"k":[
"l","m"]
},
"h":[
{
"i":5
},
{
"j":6
}
]
}
]
}
Which resulted in the error. I have made the following fix for it.
def get_key_value(dct, res_dct, lst):
for k,v in dct.items():
if isinstance(v, list):
for d in v:
if isinstance(d,dict):
get_key_value(d, res_dct, lst)
else:
lst.append(f'{k}:{v}')
elif isinstance(v, dict):
get_key_value(v, res_dct, lst)
else:
res_dct[k] = v
# If you want to store in 'list' you can store as string
lst.append(f'{k}:{v}')
res_dct = {}
lst = []
get_key_value(staging_dict, res_dct, lst)
You can use a recursive function and store key & value if only value not list or dict.
def get_key_value(dct, res_dct, lst):
for k,v in dct.items():
if isinstance(v, list):
for d in v:
get_key_value(d, res_dct, lst)
elif isinstance(v, dict):
get_key_value(v, res_dct, lst)
else:
res_dct[k] = v
# If you want to store in 'list' you can store as string
lst.append(f'{k}:{v}')
res_dct = {}
lst = []
get_key_value(dct, res_dct, lst)
print(res_dct)
print(lst)
Output:
# res_dct
{'a': 1, 'c': 2, 'e': 3, 'g': 4, 'i': 5, 'j': 6}
# lst
['a:1', 'c:2', 'e:3', 'g:4', 'i:5', 'j:6']

Can we have dictionaries as elements of a list, if so how do we call out a specific value of the of a specific dictionary? [duplicate]

Assume I have this:
[
{"name": "Tom", "age": 10},
{"name": "Mark", "age": 5},
{"name": "Pam", "age": 7}
]
and by searching "Pam" as name, I want to retrieve the related dictionary: {name: "Pam", age: 7}
How to achieve this ?
You can use a generator expression:
>>> dicts = [
... { "name": "Tom", "age": 10 },
... { "name": "Mark", "age": 5 },
... { "name": "Pam", "age": 7 },
... { "name": "Dick", "age": 12 }
... ]
>>> next(item for item in dicts if item["name"] == "Pam")
{'age': 7, 'name': 'Pam'}
If you need to handle the item not being there, then you can do what user Matt suggested in his comment and provide a default using a slightly different API:
next((item for item in dicts if item["name"] == "Pam"), None)
And to find the index of the item, rather than the item itself, you can enumerate() the list:
next((i for i, item in enumerate(dicts) if item["name"] == "Pam"), None)
This looks to me the most pythonic way:
people = [
{'name': "Tom", 'age': 10},
{'name': "Mark", 'age': 5},
{'name': "Pam", 'age': 7}
]
filter(lambda person: person['name'] == 'Pam', people)
result (returned as a list in Python 2):
[{'age': 7, 'name': 'Pam'}]
Note: In Python 3, a filter object is returned. So the python3 solution would be:
list(filter(lambda person: person['name'] == 'Pam', people))
#Frédéric Hamidi's answer is great. In Python 3.x the syntax for .next() changed slightly. Thus a slight modification:
>>> dicts = [
{ "name": "Tom", "age": 10 },
{ "name": "Mark", "age": 5 },
{ "name": "Pam", "age": 7 },
{ "name": "Dick", "age": 12 }
]
>>> next(item for item in dicts if item["name"] == "Pam")
{'age': 7, 'name': 'Pam'}
As mentioned in the comments by #Matt, you can add a default value as such:
>>> next((item for item in dicts if item["name"] == "Pam"), False)
{'name': 'Pam', 'age': 7}
>>> next((item for item in dicts if item["name"] == "Sam"), False)
False
>>>
You can use a list comprehension:
def search(name, people):
return [element for element in people if element['name'] == name]
I tested various methods to go through a list of dictionaries and return the dictionaries where key x has a certain value.
Results:
Speed: list comprehension > generator expression >> normal list iteration >>> filter.
All scale linear with the number of dicts in the list (10x list size -> 10x time).
The keys per dictionary does not affect speed significantly for large amounts (thousands) of keys. Please see this graph I calculated: https://imgur.com/a/quQzv (method names see below).
All tests done with Python 3.6.4, W7x64.
from random import randint
from timeit import timeit
list_dicts = []
for _ in range(1000): # number of dicts in the list
dict_tmp = {}
for i in range(10): # number of keys for each dict
dict_tmp[f"key{i}"] = randint(0,50)
list_dicts.append( dict_tmp )
def a():
# normal iteration over all elements
for dict_ in list_dicts:
if dict_["key3"] == 20:
pass
def b():
# use 'generator'
for dict_ in (x for x in list_dicts if x["key3"] == 20):
pass
def c():
# use 'list'
for dict_ in [x for x in list_dicts if x["key3"] == 20]:
pass
def d():
# use 'filter'
for dict_ in filter(lambda x: x['key3'] == 20, list_dicts):
pass
Results:
1.7303 # normal list iteration
1.3849 # generator expression
1.3158 # list comprehension
7.7848 # filter
people = [
{'name': "Tom", 'age': 10},
{'name': "Mark", 'age': 5},
{'name': "Pam", 'age': 7}
]
def search(name):
for p in people:
if p['name'] == name:
return p
search("Pam")
Have you ever tried out the pandas package? It's perfect for this kind of search task and optimized too.
import pandas as pd
listOfDicts = [
{"name": "Tom", "age": 10},
{"name": "Mark", "age": 5},
{"name": "Pam", "age": 7}
]
# Create a data frame, keys are used as column headers.
# Dict items with the same key are entered into the same respective column.
df = pd.DataFrame(listOfDicts)
# The pandas dataframe allows you to pick out specific values like so:
df2 = df[ (df['name'] == 'Pam') & (df['age'] == 7) ]
# Alternate syntax, same thing
df2 = df[ (df.name == 'Pam') & (df.age == 7) ]
I've added a little bit of benchmarking below to illustrate pandas' faster runtimes on a larger scale i.e. 100k+ entries:
setup_large = 'dicts = [];\
[dicts.extend(({ "name": "Tom", "age": 10 },{ "name": "Mark", "age": 5 },\
{ "name": "Pam", "age": 7 },{ "name": "Dick", "age": 12 })) for _ in range(25000)];\
from operator import itemgetter;import pandas as pd;\
df = pd.DataFrame(dicts);'
setup_small = 'dicts = [];\
dicts.extend(({ "name": "Tom", "age": 10 },{ "name": "Mark", "age": 5 },\
{ "name": "Pam", "age": 7 },{ "name": "Dick", "age": 12 }));\
from operator import itemgetter;import pandas as pd;\
df = pd.DataFrame(dicts);'
method1 = '[item for item in dicts if item["name"] == "Pam"]'
method2 = 'df[df["name"] == "Pam"]'
import timeit
t = timeit.Timer(method1, setup_small)
print('Small Method LC: ' + str(t.timeit(100)))
t = timeit.Timer(method2, setup_small)
print('Small Method Pandas: ' + str(t.timeit(100)))
t = timeit.Timer(method1, setup_large)
print('Large Method LC: ' + str(t.timeit(100)))
t = timeit.Timer(method2, setup_large)
print('Large Method Pandas: ' + str(t.timeit(100)))
#Small Method LC: 0.000191926956177
#Small Method Pandas: 0.044392824173
#Large Method LC: 1.98827004433
#Large Method Pandas: 0.324505090714
To add just a tiny bit to #FrédéricHamidi.
In case you are not sure a key is in the the list of dicts, something like this would help:
next((item for item in dicts if item.get("name") and item["name"] == "Pam"), None)
Simply using list comprehension:
[i for i in dct if i['name'] == 'Pam'][0]
Sample code:
dct = [
{'name': 'Tom', 'age': 10},
{'name': 'Mark', 'age': 5},
{'name': 'Pam', 'age': 7}
]
print([i for i in dct if i['name'] == 'Pam'][0])
> {'age': 7, 'name': 'Pam'}
You can achieve this with the usage of filter and next methods in Python.
filter method filters the given sequence and returns an iterator.
next method accepts an iterator and returns the next element in the list.
So you can find the element by,
my_dict = [
{"name": "Tom", "age": 10},
{"name": "Mark", "age": 5},
{"name": "Pam", "age": 7}
]
next(filter(lambda obj: obj.get('name') == 'Pam', my_dict), None)
and the output is,
{'name': 'Pam', 'age': 7}
Note: The above code will return None incase if the name we are searching is not found.
One simple way using list comprehensions is , if l is the list
l = [
{"name": "Tom", "age": 10},
{"name": "Mark", "age": 5},
{"name": "Pam", "age": 7}
]
then
[d['age'] for d in l if d['name']=='Tom']
def dsearch(lod, **kw):
return filter(lambda i: all((i[k] == v for (k, v) in kw.items())), lod)
lod=[{'a':33, 'b':'test2', 'c':'a.ing333'},
{'a':22, 'b':'ihaha', 'c':'fbgval'},
{'a':33, 'b':'TEst1', 'c':'s.ing123'},
{'a':22, 'b':'ihaha', 'c':'dfdvbfjkv'}]
list(dsearch(lod, a=22))
[{'a': 22, 'b': 'ihaha', 'c': 'fbgval'},
{'a': 22, 'b': 'ihaha', 'c': 'dfdvbfjkv'}]
list(dsearch(lod, a=22, b='ihaha'))
[{'a': 22, 'b': 'ihaha', 'c': 'fbgval'},
{'a': 22, 'b': 'ihaha', 'c': 'dfdvbfjkv'}]
list(dsearch(lod, a=22, c='fbgval'))
[{'a': 22, 'b': 'ihaha', 'c': 'fbgval'}]
This is a general way of searching a value in a list of dictionaries:
def search_dictionaries(key, value, list_of_dictionaries):
return [element for element in list_of_dictionaries if element[key] == value]
dicts=[
{"name": "Tom", "age": 10},
{"name": "Mark", "age": 5},
{"name": "Pam", "age": 7}
]
from collections import defaultdict
dicts_by_name=defaultdict(list)
for d in dicts:
dicts_by_name[d['name']]=d
print dicts_by_name['Tom']
#output
#>>>
#{'age': 10, 'name': 'Tom'}
names = [{'name':'Tom', 'age': 10}, {'name': 'Mark', 'age': 5}, {'name': 'Pam', 'age': 7}]
resultlist = [d for d in names if d.get('name', '') == 'Pam']
first_result = resultlist[0]
This is one way...
You can try this:
''' lst: list of dictionaries '''
lst = [{"name": "Tom", "age": 10}, {"name": "Mark", "age": 5}, {"name": "Pam", "age": 7}]
search = raw_input("What name: ") #Input name that needs to be searched (say 'Pam')
print [ lst[i] for i in range(len(lst)) if(lst[i]["name"]==search) ][0] #Output
>>> {'age': 7, 'name': 'Pam'}
Put the accepted answer in a function to easy re-use
def get_item(collection, key, target):
return next((item for item in collection if item[key] == target), None)
Or also as a lambda
get_item_lambda = lambda collection, key, target : next((item for item in collection if item[key] == target), None)
Result
key = "name"
target = "Pam"
print(get_item(target_list, key, target))
print(get_item_lambda(target_list, key, target))
#{'name': 'Pam', 'age': 7}
#{'name': 'Pam', 'age': 7}
In case the key may not be in the target dictionary use dict.get and avoid KeyError
def get_item(collection, key, target):
return next((item for item in collection if item.get(key, None) == target), None)
get_item_lambda = lambda collection, key, target : next((item for item in collection if item.get(key, None) == target), None)
My first thought would be that you might want to consider creating a dictionary of these dictionaries ... if, for example, you were going to be searching it more a than small number of times.
However that might be a premature optimization. What would be wrong with:
def get_records(key, store=dict()):
'''Return a list of all records containing name==key from our store
'''
assert key is not None
return [d for d in store if d['name']==key]
Most (if not all) implementations proposed here have two flaws:
They assume only one key to be passed for searching, while it may be interesting to have more for complex dict
They assume all keys passed for searching exist in the dicts, hence they don't deal correctly with KeyError occuring when it is not.
An updated proposition:
def find_first_in_list(objects, **kwargs):
return next((obj for obj in objects if
len(set(obj.keys()).intersection(kwargs.keys())) > 0 and
all([obj[k] == v for k, v in kwargs.items() if k in obj.keys()])),
None)
Maybe not the most pythonic, but at least a bit more failsafe.
Usage:
>>> obj1 = find_first_in_list(list_of_dict, name='Pam', age=7)
>>> obj2 = find_first_in_list(list_of_dict, name='Pam', age=27)
>>> obj3 = find_first_in_list(list_of_dict, name='Pam', address='nowhere')
>>>
>>> print(obj1, obj2, obj3)
{"name": "Pam", "age": 7}, None, {"name": "Pam", "age": 7}
The gist.
Here is a comparison using iterating throuhg list, using filter+lambda or refactoring(if needed or valid to your case) your code to dict of dicts rather than list of dicts
import time
# Build list of dicts
list_of_dicts = list()
for i in range(100000):
list_of_dicts.append({'id': i, 'name': 'Tom'})
# Build dict of dicts
dict_of_dicts = dict()
for i in range(100000):
dict_of_dicts[i] = {'name': 'Tom'}
# Find the one with ID of 99
# 1. iterate through the list
lod_ts = time.time()
for elem in list_of_dicts:
if elem['id'] == 99999:
break
lod_tf = time.time()
lod_td = lod_tf - lod_ts
# 2. Use filter
f_ts = time.time()
x = filter(lambda k: k['id'] == 99999, list_of_dicts)
f_tf = time.time()
f_td = f_tf- f_ts
# 3. find it in dict of dicts
dod_ts = time.time()
x = dict_of_dicts[99999]
dod_tf = time.time()
dod_td = dod_tf - dod_ts
print 'List of Dictionries took: %s' % lod_td
print 'Using filter took: %s' % f_td
print 'Dict of Dicts took: %s' % dod_td
And the output is this:
List of Dictionries took: 0.0099310874939
Using filter took: 0.0121960639954
Dict of Dicts took: 4.05311584473e-06
Conclusion:
Clearly having a dictionary of dicts is the most efficient way to be able to search in those cases, where you know say you will be searching by id's only.
interestingly using filter is the slowest solution.
I would create a dict of dicts like so:
names = ["Tom", "Mark", "Pam"]
ages = [10, 5, 7]
my_d = {}
for i, j in zip(names, ages):
my_d[i] = {"name": i, "age": j}
or, using exactly the same info as in the posted question:
info_list = [{"name": "Tom", "age": 10}, {"name": "Mark", "age": 5}, {"name": "Pam", "age": 7}]
my_d = {}
for d in info_list:
my_d[d["name"]] = d
Then you could do my_d["Pam"] and get {"name": "Pam", "age": 7}
Ducks will be a lot faster than a list comprehension or filter. It builds an index on your objects so lookups don't need to scan every item.
pip install ducks
from ducks import Dex
dicts = [
{"name": "Tom", "age": 10},
{"name": "Mark", "age": 5},
{"name": "Pam", "age": 7}
]
# Build the index
dex = Dex(dicts, {'name': str, 'age': int})
# Find matching objects
dex[{'name': 'Pam', 'age': 7}]
Result: [{'name': 'Pam', 'age': 7}]
You have to go through all elements of the list. There is not a shortcut!
Unless somewhere else you keep a dictionary of the names pointing to the items of the list, but then you have to take care of the consequences of popping an element from your list.
I found this thread when I was searching for an answer to the same
question. While I realize that it's a late answer, I thought I'd
contribute it in case it's useful to anyone else:
def find_dict_in_list(dicts, default=None, **kwargs):
"""Find first matching :obj:`dict` in :obj:`list`.
:param list dicts: List of dictionaries.
:param dict default: Optional. Default dictionary to return.
Defaults to `None`.
:param **kwargs: `key=value` pairs to match in :obj:`dict`.
:returns: First matching :obj:`dict` from `dicts`.
:rtype: dict
"""
rval = default
for d in dicts:
is_found = False
# Search for keys in dict.
for k, v in kwargs.items():
if d.get(k, None) == v:
is_found = True
else:
is_found = False
break
if is_found:
rval = d
break
return rval
if __name__ == '__main__':
# Tests
dicts = []
keys = 'spam eggs shrubbery knight'.split()
start = 0
for _ in range(4):
dct = {k: v for k, v in zip(keys, range(start, start+4))}
dicts.append(dct)
start += 4
# Find each dict based on 'spam' key only.
for x in range(len(dicts)):
spam = x*4
assert find_dict_in_list(dicts, spam=spam) == dicts[x]
# Find each dict based on 'spam' and 'shrubbery' keys.
for x in range(len(dicts)):
spam = x*4
assert find_dict_in_list(dicts, spam=spam, shrubbery=spam+2) == dicts[x]
# Search for one correct key, one incorrect key:
for x in range(len(dicts)):
spam = x*4
assert find_dict_in_list(dicts, spam=spam, shrubbery=spam+1) is None
# Search for non-existent dict.
for x in range(len(dicts)):
spam = x+100
assert find_dict_in_list(dicts, spam=spam) is None

Python: recursively append dictionary to another

I've searched and found this Append a dictionary to a dictionary but that clobbers keys from b if they exist in a..
I'd like to essentially recursively append 1 dictionary to another, where:
keys are unique (obviously, it's a dictionary), but each dictionary is fully represented in the result such that a.keys() and b.keys() are both subsets of c.keys()
if the same key is in both dictionaries, the resulting key contains a list of values from both, such that a[key] and b[key] are in c[key]
the values could be another dictionary, (but nothing deeper than 1 level), in which case the same logic should apply (append values) such that a[key1][key2] and b[key1][key2] are in c[key][key2]
The basic example is where 2 dictionary have keys that don't overlap, and I can accomplish that in multiple ways.. c = {**a, **b} for example, so I haven't covered that below
A trickier case:
a = {
"key1": "value_a1"
"key2": "value_a2"
}
b = {
"key1": "value_b1"
"key3": "value_b3"
}
c = combine(a, b)
c >> {
"key1": ["value_a1", "value_b1"],
"key2": "value_a2",
"key3": "value_b3"
}
An even trickier case
a = {
"key1": {
"sub_key_1": ["sub_value_a1", "sub_value_a2"],
"sub_key_2": "sub_value_a3"
},
"key2": "value_a2"
}
b = {
"key1": {
"sub_key_1": ["sub_value_a1", "sub_value_b1"],
"sub_key_2": "sub_value_b3"
},
"key3": "value_b3" # I'm okay with converting this to a list even if it's not one
}
c = combine(a, b)
c >> {
"key1": {
"sub_key_1": ["sub_value_a1", "sub_value_a2", "sub_value_b1"], #sub_value_a1 is not duplicated
"sub_key_2": ["sub_value_a3", "sub_value_b3"]
},
"key2": "value_a2",
"key3": "value_b3" # ["value_b3"] this would be okay, following from the code comment above
}
Caveats:
Python 3.6
The examples show lists being created as_needed, but I'm okay with every non-dict value being a list, as mentioned in the code comments
The values within the lists will always be strings
I tried to explain as best I could but can elaborate more if needed. Been working on this for a few days and keep getting stuck on the sub key part
There is no simple built-in way of doing this, but you can recreate the logic in python.
def combine_lists(a: list, b: list) -> list:
return a + [i for i in b if i not in a]
def combine_strs(a: str, b: str) -> str:
if a == b:
return a
return [a, b]
class EMPTY:
"A sentinel representing an empty value."
def combine_dicts(a: dict, b: dict) -> dict:
output = {}
keys = list(a) + [k for k in b if k not in a]
for key in keys:
aval = a.get(key, EMPTY)
bval = b.get(key, EMPTY)
if isinstance(aval, list) and isinstance(bval, list):
output[key] = combine_lists(aval, bval)
elif isinstance(aval, str) and isinstance(bval, str):
output[key] = combine_strs(aval, bval)
elif isinstance(aval, dict) and isinstance(bval, dict):
output[key] = combine_dicts(aval, bval)
elif bval is EMPTY:
output[key] = aval
elif aval is EMPTY:
output[key] = bval
else:
raise RuntimeError(
f"Cannot combine types: {type(aval)} and {type(bval)}"
)
return output
Sounds like you want a specialised version of dict. So, you could subclass it to give you the behaviour you want. Being a bit of a Python noob, I started with the answer here : Subclassing Python dictionary to override __setitem__
Then I added the behaviour in your couple of examples.
I also added a MultiValue class which is a subclass of list. This makes it easy to tell if a value in the dict already has multiple values. Also it removes duplicates, as it looks like you don't want them.
class MultiValue(list):
# Class to hold multiple values for a dictionary key. Prevents duplicates.
def append(self, value):
if isinstance(value, MultiValue):
for v in value:
if not v in self:
super(MultiValue, self).append(v)
else:
super(MultiValue, self).append(value)
class MultiValueDict(dict):
# dict which converts a key's value to a MultiValue when the key already exists.
def __init__(self, *args, **kwargs):
self.update(*args, **kwargs)
def __setitem__(self, key, value):
# optional processing here
if key in self:
existing_value = self[key]
if isinstance(existing_value, MultiValueDict) and isinstance(value, dict):
existing_value.update(value)
return
if isinstance(existing_value, MultiValue):
existing_value.append(value)
value = existing_value
else:
value = MultiValue([existing_value, value])
super(MultiValueDict, self).__setitem__(key, value)
def update(self, *args, **kwargs):
if args:
if len(args) > 1:
raise TypeError("update expected at most 1 arguments, "
"got %d" % len(args))
other = dict(args[0])
for key in other:
self[key] = other[key]
for key in kwargs:
self[key] = kwargs[key]
def setdefault(self, key, value=None):
if key not in self:
self[key] = value
return self[key]
Example 1:
a = {
"key1": "value_a1",
"key2": "value_a2"
}
b = {
"key1": "value_b1",
"key3": "value_b3"
}
# combine by creating a MultiValueDict then using update to add b to it.
c = MultiValueDict(a)
c.update(b)
print(c)
# gives {'key1': ['value_a1', 'value_b1'], 'key2': 'value_a2', 'key3': 'value_b3'}
Example 2: The value for key1 is created as a MultiValueDict and the value for the sub_key_1 is a MultiValue, so this may not fit what you're trying to do. It depends how you're building you data set.
a = {
"key1": MultiValueDict({
"sub_key_1": MultiValue(["sub_value_a1", "sub_value_a2"]),
"sub_key_2": "sub_value_a3"
}),
"key2": "value_a2"
}
b = {
"key1": MultiValueDict({
"sub_key_1": MultiValue(["sub_value_a1", "sub_value_b1"]),
"sub_key_2": "sub_value_b3"
}),
"key3": "value_b3" # I'm okay with converting this to a list even if it's not one
}
c = MultiValueDict(a)
c.update(b)
print(c)
# gives {'key1': {'sub_key_1': ['sub_value_a1', 'sub_value_a2', 'sub_value_b1'], 'sub_key_2': ['sub_value_a3', 'sub_value_b3']}, 'key2': 'value_a2', 'key3': 'value_b3'}
a = {
"key1": "value_a1",
"key2": "value_a2"
}
b = {
"key1": "value_b1",
"key3": "value_b3"
}
def appendValues(ax,cx):
if type(ax)==list:#is key's value in a, a list?
cx.extend(ax)#if it is a list then extend
else:#key's value in a, os not a list
cx.append(ax)#so use append
cx=list(set(cx))#make values unique with set
return cx
def combine(a,b):
c={}
for x in b:#first copy b keys and values to c
c[x]=b[x]
for x in a:#now combine a with c
if not x in c:#this key is not in c
c[x]=a[x]#so add it
else:#key exists in c
if type(c[x])==list:#is key's value in c ,a list?
c[x]=appendValues(a[x],c[x])
elif type(c[x])==dict:#is key's value in c a dictionary?
c[x]=combine(c[x],a[x])#combine dictionaries
else:#so key';'s value is not list or dict
c[x]=[c[x]]#make value a list
c[x]=appendValues(a[x],c[x])
return c
c = combine(a, b)
print(c)
print("==========================")
a = {
"key1": {
"sub_key_1": ["sub_value_a1", "sub_value_a2"],
"sub_key_2": "sub_value_a3"
},
"key2": "value_a2"
}
b = {
"key1": {
"sub_key_1": ["sub_value_a1", "sub_value_b1"],
"sub_key_2": "sub_value_b3"
},
"key3": "value_b3" # I'm okay with converting this to a list even if it's not one
}
c = combine(a, b)
print(c)

Find key in nested dictionary mixed with lists

I get JSON Data back from an API. The dataset is large and nested. I can access the Datenreihen key like this:
jsondata.get("Ergebnis")[0].get("Kontakte").get("Datenreihen")
As you can see, this is a mix of dictionaries and lists.
I tried the following, but with lists it does not work :-(.
def recursive_lookup(k, d):
if k in d:
return d[k]
for v in d.values():
if isinstance(v, dict):
return recursive_lookup(k, v)
return None
# Works
recursive_lookup("Ergebnis", jsondata)
# Returns None
recursive_lookup("Datenreihen", jsondata)
Is there an easy way to access and key in my dictionary, no matter how deeply my object is nested?
This is exampledata:
{
"Success":true,
"Ergebnis":[
{
"ErgA1a: KPI Zeitreihe":{
"Message":"",
"MitZielgruppe":true,
"Beschriftung":[
"2019 KW 27",
"2019 KW 28",
"2019 KW 29"
],
"Datenreihen":{
"Gesamt":{
"Name":"Sympathie [#4]\n(Sehr sympathisch, Sympathisch)",
"Werte":[
39.922142815641145,
37.751410794385762,
38.35504885993484
]
}
}
}
}
],
"rest":[
{
"test":"bla"
}
]
}
data.get("ErgebnisseAnalyse")[0].get("ErgA1a: KPI Zeitreihe")
recursive_lookup("ErgA1a: KPI Zeitreihe", data)
Recursive function to find value in nested dictionary based upon key field
Code
def find_item(obj, field):
"""
Takes a dict with nested lists and dicts,
and searches all dicts for a key of the field
provided.
"""
if isinstance(obj, dict):
for k, v in obj.items():
if k == field:
yield v
elif isinstance(v, dict) or isinstance(v, list):
yield from find_item(v, field)
elif isinstance(obj, list):
for v in obj:
yield from find_item(v, field)
Usage
value = next(find_item(dictionary_object, field), None)
Test
# Nested dictionary
dic = {
"a": [{"b": {"c": 1}},
{"d": 2}],
"e": 3}
# Values form various fields
print(next(find_item(dic, "a"), None)) # Output: [{'b': {'c': 1}}, {'d': 2}]
print(next(find_item(dic, "b"), None)) # Output: {'c': 1}
print(next(find_item(dic, "c"), None)) # Output: 1
print(next(find_item(dic, "d"), None)) # Output: 2
print(next(find_item(dic, "e"), None)) # Output: 3
print(next(find_item(dic, "h"), None)) # Output: None

Python - Unflatten dict

I have this multi-dimensional dict:
a = {'a' : 'b', 'c' : {'d' : 'e'}}
And written simple function to flatten that dict:
def __flatten(self, dictionary, level = []):
tmp_dict = {}
for key, val in dictionary.items():
if type(val) == dict:
tmp_dict.update(self.__flatten(val, level + [key]))
else:
tmp_dict['.'.join(level + [key])] = val
return tmp_dict
After call this function with dict a i get in result:
{'a' : 'b', 'c.d' : 'e'}
Now, after making few instructions on this flattened dict i need to build new, multi-dimensional dict from that flattened. Example:
>> unflatten({'a' : 0, 'c.d' : 1))
{'a' : 0, 'c' : {'d' : 1}}
The only problem I have is that i do not have a function unflatten :)
Can anyone help with this? I have no idea how to do it.
EDIT:
Another example:
{'a' : 'b', 'c.d.e.f.g.h.i.j.k.l.m.n.o.p.r.s.t.u.w' : 'z'}
Should be after unflatten:
{'a': 'b', 'c': {'d': {'e': {'f': {'g': {'h': {'i': {'j': {'k': {'l': {'m': {'n': {'o': {'p': {'r': {'s': {'t': {'u': {'w': 'z'}}}}}}}}}}}}}}}}}}}
And another:
{'a' : 'b', 'c.d' : 'z', 'c.e' : 1}
To:
{'a' : 'b', 'c' : {'d' : 'z', 'e' : 1}}
This greatly increases the difficulty of the task, i know. Thats why i had problem with this and found no solution in hours..
def unflatten(dictionary):
resultDict = dict()
for key, value in dictionary.items():
parts = key.split(".")
d = resultDict
for part in parts[:-1]:
if part not in d:
d[part] = dict()
d = d[part]
d[parts[-1]] = value
return resultDict
from collections import defaultdict
def unflatten(d):
ret = defaultdict(dict)
for k,v in d.items():
k1,delim,k2 = k.partition('.')
if delim:
ret[k1].update({k2:v})
else:
ret[k1] = v
return ret
Here's one utilizing Python 3.5+ features, like typing and destructuring assignments. Try the tests out on repl.it.
from typing import Any, Dict
def unflatten(
d: Dict[str, Any],
base: Dict[str, Any] = None,
) -> Dict[str, Any]:
"""Convert any keys containing dotted paths to nested dicts
>>> unflatten({'a': 12, 'b': 13, 'c': 14}) # no expansion
{'a': 12, 'b': 13, 'c': 14}
>>> unflatten({'a.b.c': 12}) # dotted path expansion
{'a': {'b': {'c': 12}}}
>>> unflatten({'a.b.c': 12, 'a': {'b.d': 13}}) # merging
{'a': {'b': {'c': 12, 'd': 13}}}
>>> unflatten({'a.b': 12, 'a': {'b': 13}}) # insertion-order overwrites
{'a': {'b': 13}}
>>> unflatten({'a': {}}) # insertion-order overwrites
{'a': {}}
"""
if base is None:
base = {}
for key, value in d.items():
root = base
###
# If a dotted path is encountered, create nested dicts for all but
# the last level, then change root to that last level, and key to
# the final key in the path.
#
# This allows one final setitem at the bottom of the loop.
#
if '.' in key:
*parts, key = key.split('.')
for part in parts:
root.setdefault(part, {})
root = root[part]
if isinstance(value, dict):
value = unflatten(value, root.get(key, {}))
root[key] = value
return base
I wrote one years ago in Python 2 and 3 which I've adapted below. It was for making it easier to check if a given dictionary is a subset of a larger dictionary irrespective of whether provided in flattened or scaffolded form.
A bonus feature: Should there be consecutive integer indexes (as in 0, 1, 2, 3, 4 etc.), this will also convert them back into lists as well.
def unflatten_dictionary(field_dict):
field_dict = dict(field_dict)
new_field_dict = dict()
field_keys = list(field_dict)
field_keys.sort()
for each_key in field_keys:
field_value = field_dict[each_key]
processed_key = str(each_key)
current_key = None
current_subkey = None
for i in range(len(processed_key)):
if processed_key[i] == "[":
current_key = processed_key[:i]
start_subscript_index = i + 1
end_subscript_index = processed_key.index("]")
current_subkey = int(processed_key[start_subscript_index : end_subscript_index])
# reserve the remainder descendant keys to be processed later in a recursive call
if len(processed_key[end_subscript_index:]) > 1:
current_subkey = "{}.{}".format(current_subkey, processed_key[end_subscript_index + 2:])
break
# next child key is a dictionary
elif processed_key[i] == ".":
split_work = processed_key.split(".", 1)
if len(split_work) > 1:
current_key, current_subkey = split_work
else:
current_key = split_work[0]
break
if current_subkey is not None:
if current_key.isdigit():
current_key = int(current_key)
if current_key not in new_field_dict:
new_field_dict[current_key] = dict()
new_field_dict[current_key][current_subkey] = field_value
else:
new_field_dict[each_key] = field_value
# Recursively unflatten each dictionary on each depth before returning back to the caller.
all_digits = True
highest_digit = -1
for each_key, each_item in new_field_dict.items():
if isinstance(each_item, dict):
new_field_dict[each_key] = unflatten_dictionary(each_item)
# validate the keys can safely converted to a sequential list.
all_digits &= str(each_key).isdigit()
if all_digits:
next_digit = int(each_key)
if next_digit > highest_digit:
highest_digit = next_digit
# If all digits and can be sequential order, convert to list.
if all_digits and highest_digit == (len(new_field_dict) - 1):
digit_keys = list(new_field_dict)
digit_keys.sort()
new_list = []
for k in digit_keys:
i = int(k)
if len(new_list) <= i:
# Pre-populate missing list elements if the array index keys are out of order
# and the current element is ahead of the current length boundary.
while len(new_list) <= i:
new_list.append(None)
new_list[i] = new_field_dict[k]
new_field_dict = new_list
return new_field_dict
# Test
if __name__ == '__main__':
input_dict = {'a[0]': 1,
'a[1]': 10,
'a[2]': 5,
'b': 10,
'c.test.0': "hi",
'c.test.1': "bye",
"c.head.shoulders": "richard",
"c.head.knees": 'toes',
"z.trick.or[0]": "treat",
"z.trick.or[1]": "halloween",
"z.trick.and.then[0]": "he",
"z.trick.and.then[1]": "it",
"some[0].nested.field[0]": 42,
"some[0].nested.field[1]": 43,
"some[2].nested.field[0]": 44,
"mixed": {
"statement": "test",
"break[0]": True,
"break[1]": False,
}}
expected_dict = {'a': [1, 10, 5],
'b': 10,
'c': {
'test': ['hi', 'bye'],
'head': {
'shoulders': 'richard',
'knees' : 'toes'
}
},
'z': {
'trick': {
'or': ["treat", "halloween"],
'and': {
'then': ["he", "it"]
}
}
},
'some': {
0: {
'nested': {
'field': [42, 43]
}
},
2: {
'nested': {
'field': [44]
}
}
},
"mixed": {
"statement": "test",
"break": [True, False]
}}
# test
print("Input:")
print(input_dict)
print("====================================")
print("Output:")
actual_dict = unflatten_dictionary(input_dict)
print(actual_dict)
print("====================================")
print(f"Test passed? {expected_dict==actual_dict}")
As a rough-draft (could use a little improvement in variable name choice, and perhaps robustness, but it works for the example given):
def unflatten(d):
result = {}
for k,v in d.iteritems():
if '.' in k:
k1, k2 = k.split('.', 1)
v = {k2: v}
k = k1
result[k] = v
return result

Categories