Creating dynamic nested dictionary of counts

Creating dynamic nested dictionary of counts - python

I have a text file abc.txt:
abc/pqr/lmn/xyz:pass
abc/pqr/lmn/bcd:pass
I need to parse these statements and output should be in nested dictionary as below:
{'abc':{'pqr':{'lmn':{'xyz':{'pass':1},{'bcd':{'pass':1}}}}}}
where 1 is 'pass' count.
I'm able to do as much as this:
import re
d={}
p=re.compile('[a-zA-z]+')
for line in open('abc.txt'):
for key in p.findall(line):
d['key']={}

Check out the setdefault method on dictionaries.
d = {}
d.setdefault('pqr', {}).setdefault('lmn', {}).setdefault('xyz', {})['pass'] = 1
d.setdefault('pqr', {}).setdefault('lmn', {}).setdefault('bcd', {})['pass'] = 1
d
gives
{'pqr': {'lmn': {'bcd': {'pass': 1}, 'xyz': {'pass': 1}}}}

Here's an updated version of my answer in which leaves of the tree data-structure are now different from those in rest of it. Instead of the tree being strictly a dict-of-nested-dicts, the "leaves" on each branch are now instances of a different subclass of dict named collections.Counter which are useful for counting the number of times each of their keys occur. I did this because of your response to my question about what should happen if the last part of each line was something other than ":pass" (which was "we have to put new count for that key").
Nested dictionaries are often called Tree data-structures and can be defined recursively — the root is a dictionary as are the branches. The following uses a dict subclass instead of a plain dict because it makes constructing them easier since you don't need to special case the creation of the first branch of next level down (except I still do when adding the "leaves" because they are a different subclass, collections.Counter).
from collections import Counter
from functools import reduce
import re
# (Optional) trick to make Counter subclass print like a regular dict.
class Counter(Counter):
def __repr__(self):
return dict(self).__repr__()
# Borrowed from answer # https://stackoverflow.com/a/19829714/355230
class Tree(dict):
def __missing__(self, key):
value = self[key] = type(self)()
return value
# Utility functions based on answer # https://stackoverflow.com/a/14692747/355230
def nested_dict_get(nested_dict, keys):
return reduce(lambda d, k: d[k], keys, nested_dict)
def nested_dict_set(nested_dict, keys, value):
nested_dict_get(nested_dict, keys[:-1])[keys[-1]] = value
def nested_dict_update_count(nested_dict, keys):
counter = nested_dict_get(nested_dict, keys[:-1])
if counter: # Update existing Counter.
counter.update([keys[-1]])
else: # Create a new Counter.
nested_dict_set(nested_dict, keys[:-1], Counter([keys[-1]]))
d = Tree()
pat = re.compile(r'[a-zA-z]+')
with open('abc.txt') as file:
for line in file:
nested_dict_update_count(d, [w for w in pat.findall(line.rstrip())])
print(d) # Prints like a regular dict.
To test the leaf-counting capabilities of the revised code, I used the following test file which includes the same line twice, once ending again with :pass and another ending in :fail.
Expanded abc.txt test file:
abc/pqr/lmn/xyz:pass
abc/pqr/lmn/bcd:pass
abc/pqr/lmn/xyz:fail
abc/pqr/lmn/xyz:pass
Output:
{'abc': {'pqr': {'lmn': {'bcd': {'pass': 1}, 'xyz': {'fail': 1, 'pass': 2}}}}}

If i understand your question:
sources = ["abc/pqr/lmn/xyz:pass", "abc/pqr/lmn/bcd:pass", "abc/pqr/lmn/xyz:pass"]
def prepare_source(source):
path, value = source.split(':')
elements = path.split('/')
return elements, value
def add_key(elements, value):
result = dict()
if len(elements) > 1:
result[elements[0]] = add_key(elements[1:], value)
else:
result[elements[0]] = {value: 1}
return result
# base merge function get from here:
# http://stackoverflow.com/questions/7204805/dictionaries-of-dictionaries-merge
def merge(a, b, path=None):
"merges b into a"
if path is None: path = []
for key in b:
if key in a:
if isinstance(a[key], dict) and isinstance(b[key], dict):
merge(a[key], b[key], path + [str(key)])
elif isinstance(a[key], int) and isinstance(b[key], int):
a[key] += b[key]
else:
raise Exception('Conflict at %s' % '.'.join(path + [str(key)]))
else:
a[key] = b[key]
return a
result = dict()
for source in sources:
result = merge(result, add_key(*prepare_source(source)))
print result
Output will be:
{'abc': {'pqr': {'lmn': {'bcd': {'pass': 1}, 'xyz': {'pass': 2}}}}}

Related

append a nested python dict with a full path key without overwrite exiting keys or create a new key if the current key is not exist [duplicate]

I have a complex dictionary structure which I would like to access via a list of keys to address the correct item.
dataDict = {
"a":{
"r": 1,
"s": 2,
"t": 3
},
"b":{
"u": 1,
"v": {
"x": 1,
"y": 2,
"z": 3
},
"w": 3
}
}
maplist = ["a", "r"]
or
maplist = ["b", "v", "y"]
I have made the following code which works but I'm sure there is a better and more efficient way to do this if anyone has an idea.
# Get a given data from a dictionary with position provided as a list
def getFromDict(dataDict, mapList):
for k in mapList: dataDict = dataDict[k]
return dataDict
# Set a given data in a dictionary with position provided as a list
def setInDict(dataDict, mapList, value):
for k in mapList[:-1]: dataDict = dataDict[k]
dataDict[mapList[-1]] = value

Use reduce() to traverse the dictionary:
from functools import reduce # forward compatibility for Python 3
import operator
def getFromDict(dataDict, mapList):
return reduce(operator.getitem, mapList, dataDict)
and reuse getFromDict to find the location to store the value for setInDict():
def setInDict(dataDict, mapList, value):
getFromDict(dataDict, mapList[:-1])[mapList[-1]] = value
All but the last element in mapList is needed to find the 'parent' dictionary to add the value to, then use the last element to set the value to the right key.
Demo:
>>> getFromDict(dataDict, ["a", "r"])
1
>>> getFromDict(dataDict, ["b", "v", "y"])
2
>>> setInDict(dataDict, ["b", "v", "w"], 4)
>>> import pprint
>>> pprint.pprint(dataDict)
{'a': {'r': 1, 's': 2, 't': 3},
'b': {'u': 1, 'v': {'w': 4, 'x': 1, 'y': 2, 'z': 3}, 'w': 3}}
Note that the Python PEP8 style guide prescribes snake_case names for functions. The above works equally well for lists or a mix of dictionaries and lists, so the names should really be get_by_path() and set_by_path():
from functools import reduce # forward compatibility for Python 3
import operator
def get_by_path(root, items):
"""Access a nested object in root by item sequence."""
return reduce(operator.getitem, items, root)
def set_by_path(root, items, value):
"""Set a value in a nested object in root by item sequence."""
get_by_path(root, items[:-1])[items[-1]] = value
And for completion's sake, a function to delete a key:
def del_by_path(root, items):
"""Delete a key-value in a nested object in root by item sequence."""
del get_by_path(root, items[:-1])[items[-1]]

It seems more pythonic to use a for loop.
See the quote from What’s New In Python 3.0.
Removed reduce(). Use functools.reduce() if you really need it; however, 99 percent of the time an explicit for loop is more readable.
def nested_get(dic, keys):
for key in keys:
dic = dic[key]
return dic
def nested_set(dic, keys, value):
for key in keys[:-1]:
dic = dic.setdefault(key, {})
dic[keys[-1]] = value
def nested_del(dic, keys):
for key in keys[:-1]:
dic = dic[key]
del dic[keys[-1]]
Note that the accepted solution doesn't set non-existing nested keys (it raises KeyError). Using the approach above will create non-existing nodes instead.
The code works in both Python 2 and 3.

Using reduce is clever, but the OP's set method may have issues if the parent keys do not pre-exist in the nested dictionary. Since this is the first SO post I saw for this subject in my google search, I would like to make it slightly better.
The set method in ( Setting a value in a nested python dictionary given a list of indices and value ) seems more robust to missing parental keys. To copy it over:
def nested_set(dic, keys, value):
for key in keys[:-1]:
dic = dic.setdefault(key, {})
dic[keys[-1]] = value
Also, it can be convenient to have a method that traverses the key tree and get all the absolute key paths, for which I have created:
def keysInDict(dataDict, parent=[]):
if not isinstance(dataDict, dict):
return [tuple(parent)]
else:
return reduce(list.__add__,
[keysInDict(v,parent+[k]) for k,v in dataDict.items()], [])
One use of it is to convert the nested tree to a pandas DataFrame, using the following code (assuming that all leafs in the nested dictionary have the same depth).
def dict_to_df(dataDict):
ret = []
for k in keysInDict(dataDict):
v = np.array( getFromDict(dataDict, k), )
v = pd.DataFrame(v)
v.columns = pd.MultiIndex.from_product(list(k) + [v.columns])
ret.append(v)
return reduce(pd.DataFrame.join, ret)

This library may be helpful: https://github.com/akesterson/dpath-python
A python library for accessing and searching dictionaries via
/slashed/paths ala xpath
Basically it lets you glob over a dictionary as if it were a
filesystem.

How about using recursive functions?
To get a value:
def getFromDict(dataDict, maplist):
first, rest = maplist[0], maplist[1:]
if rest:
# if `rest` is not empty, run the function recursively
return getFromDict(dataDict[first], rest)
else:
return dataDict[first]
And to set a value:
def setInDict(dataDict, maplist, value):
first, rest = maplist[0], maplist[1:]
if rest:
try:
if not isinstance(dataDict[first], dict):
# if the key is not a dict, then make it a dict
dataDict[first] = {}
except KeyError:
# if key doesn't exist, create one
dataDict[first] = {}
setInDict(dataDict[first], rest, value)
else:
dataDict[first] = value

Solved this with recursion:
def get(d,l):
if len(l)==1: return d[l[0]]
return get(d[l[0]],l[1:])
Using your example:
dataDict = {
"a":{
"r": 1,
"s": 2,
"t": 3
},
"b":{
"u": 1,
"v": {
"x": 1,
"y": 2,
"z": 3
},
"w": 3
}
}
maplist1 = ["a", "r"]
maplist2 = ["b", "v", "y"]
print(get(dataDict, maplist1)) # 1
print(get(dataDict, maplist2)) # 2

Instead of taking a performance hit each time you want to look up a value, how about you flatten the dictionary once then simply look up the key like b:v:y
def flatten(mydict,sep = ':'):
new_dict = {}
for key,value in mydict.items():
if isinstance(value,dict):
_dict = {sep.join([key, _key]):_value for _key, _value in flatten(value).items()}
new_dict.update(_dict)
else:
new_dict[key]=value
return new_dict
dataDict = {
"a":{
"r": 1,
"s": 2,
"t": 3
},
"b":{
"u": 1,
"v": {
"x": 1,
"y": 2,
"z": 3
},
"w": 3
}
}
flat_dict = flatten(dataDict)
print flat_dict
{'b:w': 3, 'b:u': 1, 'b:v:y': 2, 'b:v:x': 1, 'b:v:z': 3, 'a:r': 1, 'a:s': 2, 'a:t': 3}
This way you can simply look up items using flat_dict['b:v:y'] which will give you 1.
And instead of traversing the dictionary on each lookup, you may be able to speed this up by flattening the dictionary and saving the output so that a lookup from cold start would mean loading up the flattened dictionary and simply performing a key/value lookup with no traversal.

Check out NestedDict from the ndicts package (I am the author), it does exactly what you ask for.
from ndicts import NestedDict
data_dict = {
"a":{
"r": 1,
"s": 2,
"t": 3
},
"b":{
"u": 1,
"v": {
"x": 1,
"y": 2,
"z": 3
},
"w": 3
}
}
nd = NestedDict(data_dict)
You can now access keys using comma separated values.
>>> nd["a", "r"]
1
>>> nd["b", "v"]
{"x": 1, "y": 2, "z": 3}

Pure Python style, without any import:
def nested_set(element, value, *keys):
if type(element) is not dict:
raise AttributeError('nested_set() expects dict as first argument.')
if len(keys) < 2:
raise AttributeError('nested_set() expects at least three arguments, not enough given.')
_keys = keys[:-1]
_element = element
for key in _keys:
_element = _element[key]
_element[keys[-1]] = value
example = {"foo": { "bar": { "baz": "ok" } } }
keys = ['foo', 'bar']
nested_set(example, "yay", *keys)
print(example)
Output
{'foo': {'bar': 'yay'}}

An alternative way if you don't want to raise errors if one of the keys is absent (so that your main code can run without interruption):
def get_value(self,your_dict,*keys):
curr_dict_ = your_dict
for k in keys:
v = curr_dict.get(k,None)
if v is None:
break
if isinstance(v,dict):
curr_dict = v
return v
In this case, if any of the input keys is not present, None is returned, which can be used as a check in your main code to perform an alternative task.

It's satisfying to see these answers for having two static methods for setting & getting nested attributes. These solutions are way better than using nested trees https://gist.github.com/hrldcpr/2012250
Here's my implementation.
Usage:
To set nested attribute call sattr(my_dict, 1, 2, 3, 5) is equal to my_dict[1][2][3][4]=5
To get a nested attribute call gattr(my_dict, 1, 2)
def gattr(d, *attrs):
"""
This method receives a dict and list of attributes to return the innermost value of the give dict
"""
try:
for at in attrs:
d = d[at]
return d
except(KeyError, TypeError):
return None
def sattr(d, *attrs):
"""
Adds "val" to dict in the hierarchy mentioned via *attrs
For ex:
sattr(animals, "cat", "leg","fingers", 4) is equivalent to animals["cat"]["leg"]["fingers"]=4
This method creates necessary objects until it reaches the final depth
This behaviour is also known as autovivification and plenty of implementation are around
This implementation addresses the corner case of replacing existing primitives
https://gist.github.com/hrldcpr/2012250#gistcomment-1779319
"""
for attr in attrs[:-2]:
if type(d.get(attr)) is not dict:
d[attr] = {}
d = d[attr]
d[attrs[-2]] = attrs[-1]

You can use pydash:
import pydash as _
_.get(dataDict, ["b", "v", "y"], default='Default')
https://pydash.readthedocs.io/en/latest/api.html

If you also want the ability to work with arbitrary json including nested lists and dicts, and nicely handle invalid lookup paths, here's my solution:
from functools import reduce
def get_furthest(s, path):
'''
Gets the furthest value along a given key path in a subscriptable structure.
subscriptable, list -> any
:param s: the subscriptable structure to examine
:param path: the lookup path to follow
:return: a tuple of the value at the furthest valid key, and whether the full path is valid
'''
def step_key(acc, key):
s = acc[0]
if isinstance(s, str):
return (s, False)
try:
return (s[key], acc[1])
except LookupError:
return (s, False)
return reduce(step_key, path, (s, True))
def get_val(s, path):
val, successful = get_furthest(s, path)
if successful:
return val
else:
raise LookupError('Invalid lookup path: {}'.format(path))
def set_val(s, path, value):
get_val(s, path[:-1])[path[-1]] = value

How about check and then set dict element without processing all indexes twice?
Solution:
def nested_yield(nested, keys_list):
"""
Get current nested data by send(None) method. Allows change it to Value by calling send(Value) next time
:param nested: list or dict of lists or dicts
:param keys_list: list of indexes/keys
"""
if not len(keys_list): # assign to 1st level list
if isinstance(nested, list):
while True:
nested[:] = yield nested
else:
raise IndexError('Only lists can take element without key')
last_key = keys_list.pop()
for key in keys_list:
nested = nested[key]
while True:
try:
nested[last_key] = yield nested[last_key]
except IndexError as e:
print('no index {} in {}'.format(last_key, nested))
yield None
Example workflow:
ny = nested_yield(nested_dict, nested_address)
data_element = ny.send(None)
if data_element:
# process element
...
else:
# extend/update nested data
ny.send(new_data_element)
...
ny.close()
Test
>>> cfg= {'Options': [[1,[0]],[2,[4,[8,16]]],[3,[9]]]}
ny = nested_yield(cfg, ['Options',1,1,1])
ny.send(None)
[8, 16]
>>> ny.send('Hello!')
'Hello!'
>>> cfg
{'Options': [[1, [0]], [2, [4, 'Hello!']], [3, [9]]]}
>>> ny.close()

Very late to the party, but posting in case this may help someone in the future. For my use case, the following function worked the best. Works to pull any data type out of dictionary
dict is the dictionary containing our value
list is a list of "steps" towards our value
def getnestedvalue(dict, list):
length = len(list)
try:
for depth, key in enumerate(list):
if depth == length - 1:
output = dict[key]
return output
dict = dict[key]
except (KeyError, TypeError):
return None
return None

I'd rather use simple recursion function:
def get_value_by_path(data, maplist):
if not maplist:
return data
for key in maplist:
if key in data:
return get_value_by_path(data[key], maplist[1:])

a method for concatenating strings:
def get_sub_object_from_path(dict_name, map_list):
for i in map_list:
_string = "['%s']" % i
dict_name += _string
value = eval(dict_name)
return value
#Sample:
_dict = {'new': 'person', 'time': {'for': 'one'}}
map_list = ['time', 'for']
print get_sub_object_from_path("_dict",map_list)
#Output:
#one

Extending #DomTomCat and others' approach, these functional (ie, return modified data via deepcopy without affecting the input) setter and mapper works for nested dict and list.
setter:
def set_at_path(data0, keys, value):
data = deepcopy(data0)
if len(keys)>1:
if isinstance(data,dict):
return {k:(set_by_path(v,keys[1:],value) if k==keys[0] else v) for k,v in data.items()}
if isinstance(data,list):
return [set_by_path(x[1],keys[1:],value) if x[0]==keys[0] else x[1] for x in enumerate(data)]
else:
data[keys[-1]]=value
return data
mapper:
def map_at_path(data0, keys, f):
data = deepcopy(data0)
if len(keys)>1:
if isinstance(data,dict):
return {k:(map_at_path(v,keys[1:],f) if k==keys[0] else v) for k,v in data.items()}
if isinstance(data,list):
return [map_at_path(x[1],keys[1:],f) if x[0]==keys[0] else x[1] for x in enumerate(data)]
else:
data[keys[-1]]=f(data[keys[-1]])
return data

I use this
def get_dictionary_value(dictionary_temp, variable_dictionary_keys):
try:
if(len(variable_dictionary_keys) == 0):
return str(dictionary_temp)
variable_dictionary_key = variable_dictionary_keys[0]
variable_dictionary_keys.remove(variable_dictionary_key)
return get_dictionary_value(dictionary_temp[variable_dictionary_key] , variable_dictionary_keys)
except Exception as variable_exception:
logging.error(variable_exception)
return ''

You can make use of the eval function in python.
def nested_parse(nest, map_list):
nestq = "nest['" + "']['".join(map_list) + "']"
return eval(nestq, {'__builtins__':None}, {'nest':nest})
Explanation
For your example query: maplist = ["b", "v", "y"]
nestq will be "nest['b']['v']['y']" where nest is the nested dictionary.
The eval builtin function executes the given string. However, it is important to be careful about possible vulnerabilities that arise from use of eval function. Discussion can be found here:
https://nedbatchelder.com/blog/201206/eval_really_is_dangerous.html
https://www.journaldev.com/22504/python-eval-function
In the nested_parse() function, I have made sure that no __builtins__ globals are available and only local variable that is available is the nest dictionary.

how to process api url query string with a dot (.) [duplicate]

I have a complex dictionary structure which I would like to access via a list of keys to address the correct item.
dataDict = {
"a":{
"r": 1,
"s": 2,
"t": 3
},
"b":{
"u": 1,
"v": {
"x": 1,
"y": 2,
"z": 3
},
"w": 3
}
}
maplist = ["a", "r"]
or
maplist = ["b", "v", "y"]
I have made the following code which works but I'm sure there is a better and more efficient way to do this if anyone has an idea.
# Get a given data from a dictionary with position provided as a list
def getFromDict(dataDict, mapList):
for k in mapList: dataDict = dataDict[k]
return dataDict
# Set a given data in a dictionary with position provided as a list
def setInDict(dataDict, mapList, value):
for k in mapList[:-1]: dataDict = dataDict[k]
dataDict[mapList[-1]] = value

Use reduce() to traverse the dictionary:
from functools import reduce # forward compatibility for Python 3
import operator
def getFromDict(dataDict, mapList):
return reduce(operator.getitem, mapList, dataDict)
and reuse getFromDict to find the location to store the value for setInDict():
def setInDict(dataDict, mapList, value):
getFromDict(dataDict, mapList[:-1])[mapList[-1]] = value
All but the last element in mapList is needed to find the 'parent' dictionary to add the value to, then use the last element to set the value to the right key.
Demo:
>>> getFromDict(dataDict, ["a", "r"])
1
>>> getFromDict(dataDict, ["b", "v", "y"])
2
>>> setInDict(dataDict, ["b", "v", "w"], 4)
>>> import pprint
>>> pprint.pprint(dataDict)
{'a': {'r': 1, 's': 2, 't': 3},
'b': {'u': 1, 'v': {'w': 4, 'x': 1, 'y': 2, 'z': 3}, 'w': 3}}
Note that the Python PEP8 style guide prescribes snake_case names for functions. The above works equally well for lists or a mix of dictionaries and lists, so the names should really be get_by_path() and set_by_path():
from functools import reduce # forward compatibility for Python 3
import operator
def get_by_path(root, items):
"""Access a nested object in root by item sequence."""
return reduce(operator.getitem, items, root)
def set_by_path(root, items, value):
"""Set a value in a nested object in root by item sequence."""
get_by_path(root, items[:-1])[items[-1]] = value
And for completion's sake, a function to delete a key:
def del_by_path(root, items):
"""Delete a key-value in a nested object in root by item sequence."""
del get_by_path(root, items[:-1])[items[-1]]

It seems more pythonic to use a for loop.
See the quote from What’s New In Python 3.0.
Removed reduce(). Use functools.reduce() if you really need it; however, 99 percent of the time an explicit for loop is more readable.
def nested_get(dic, keys):
for key in keys:
dic = dic[key]
return dic
def nested_set(dic, keys, value):
for key in keys[:-1]:
dic = dic.setdefault(key, {})
dic[keys[-1]] = value
def nested_del(dic, keys):
for key in keys[:-1]:
dic = dic[key]
del dic[keys[-1]]
Note that the accepted solution doesn't set non-existing nested keys (it raises KeyError). Using the approach above will create non-existing nodes instead.
The code works in both Python 2 and 3.

Using reduce is clever, but the OP's set method may have issues if the parent keys do not pre-exist in the nested dictionary. Since this is the first SO post I saw for this subject in my google search, I would like to make it slightly better.
The set method in ( Setting a value in a nested python dictionary given a list of indices and value ) seems more robust to missing parental keys. To copy it over:
def nested_set(dic, keys, value):
for key in keys[:-1]:
dic = dic.setdefault(key, {})
dic[keys[-1]] = value
Also, it can be convenient to have a method that traverses the key tree and get all the absolute key paths, for which I have created:
def keysInDict(dataDict, parent=[]):
if not isinstance(dataDict, dict):
return [tuple(parent)]
else:
return reduce(list.__add__,
[keysInDict(v,parent+[k]) for k,v in dataDict.items()], [])
One use of it is to convert the nested tree to a pandas DataFrame, using the following code (assuming that all leafs in the nested dictionary have the same depth).
def dict_to_df(dataDict):
ret = []
for k in keysInDict(dataDict):
v = np.array( getFromDict(dataDict, k), )
v = pd.DataFrame(v)
v.columns = pd.MultiIndex.from_product(list(k) + [v.columns])
ret.append(v)
return reduce(pd.DataFrame.join, ret)

This library may be helpful: https://github.com/akesterson/dpath-python
A python library for accessing and searching dictionaries via
/slashed/paths ala xpath
Basically it lets you glob over a dictionary as if it were a
filesystem.

How about using recursive functions?
To get a value:
def getFromDict(dataDict, maplist):
first, rest = maplist[0], maplist[1:]
if rest:
# if `rest` is not empty, run the function recursively
return getFromDict(dataDict[first], rest)
else:
return dataDict[first]
And to set a value:
def setInDict(dataDict, maplist, value):
first, rest = maplist[0], maplist[1:]
if rest:
try:
if not isinstance(dataDict[first], dict):
# if the key is not a dict, then make it a dict
dataDict[first] = {}
except KeyError:
# if key doesn't exist, create one
dataDict[first] = {}
setInDict(dataDict[first], rest, value)
else:
dataDict[first] = value

Solved this with recursion:
def get(d,l):
if len(l)==1: return d[l[0]]
return get(d[l[0]],l[1:])
Using your example:
dataDict = {
"a":{
"r": 1,
"s": 2,
"t": 3
},
"b":{
"u": 1,
"v": {
"x": 1,
"y": 2,
"z": 3
},
"w": 3
}
}
maplist1 = ["a", "r"]
maplist2 = ["b", "v", "y"]
print(get(dataDict, maplist1)) # 1
print(get(dataDict, maplist2)) # 2

Instead of taking a performance hit each time you want to look up a value, how about you flatten the dictionary once then simply look up the key like b:v:y
def flatten(mydict,sep = ':'):
new_dict = {}
for key,value in mydict.items():
if isinstance(value,dict):
_dict = {sep.join([key, _key]):_value for _key, _value in flatten(value).items()}
new_dict.update(_dict)
else:
new_dict[key]=value
return new_dict
dataDict = {
"a":{
"r": 1,
"s": 2,
"t": 3
},
"b":{
"u": 1,
"v": {
"x": 1,
"y": 2,
"z": 3
},
"w": 3
}
}
flat_dict = flatten(dataDict)
print flat_dict
{'b:w': 3, 'b:u': 1, 'b:v:y': 2, 'b:v:x': 1, 'b:v:z': 3, 'a:r': 1, 'a:s': 2, 'a:t': 3}
This way you can simply look up items using flat_dict['b:v:y'] which will give you 1.
And instead of traversing the dictionary on each lookup, you may be able to speed this up by flattening the dictionary and saving the output so that a lookup from cold start would mean loading up the flattened dictionary and simply performing a key/value lookup with no traversal.

Check out NestedDict from the ndicts package (I am the author), it does exactly what you ask for.
from ndicts import NestedDict
data_dict = {
"a":{
"r": 1,
"s": 2,
"t": 3
},
"b":{
"u": 1,
"v": {
"x": 1,
"y": 2,
"z": 3
},
"w": 3
}
}
nd = NestedDict(data_dict)
You can now access keys using comma separated values.
>>> nd["a", "r"]
1
>>> nd["b", "v"]
{"x": 1, "y": 2, "z": 3}

Pure Python style, without any import:
def nested_set(element, value, *keys):
if type(element) is not dict:
raise AttributeError('nested_set() expects dict as first argument.')
if len(keys) < 2:
raise AttributeError('nested_set() expects at least three arguments, not enough given.')
_keys = keys[:-1]
_element = element
for key in _keys:
_element = _element[key]
_element[keys[-1]] = value
example = {"foo": { "bar": { "baz": "ok" } } }
keys = ['foo', 'bar']
nested_set(example, "yay", *keys)
print(example)
Output
{'foo': {'bar': 'yay'}}

An alternative way if you don't want to raise errors if one of the keys is absent (so that your main code can run without interruption):
def get_value(self,your_dict,*keys):
curr_dict_ = your_dict
for k in keys:
v = curr_dict.get(k,None)
if v is None:
break
if isinstance(v,dict):
curr_dict = v
return v
In this case, if any of the input keys is not present, None is returned, which can be used as a check in your main code to perform an alternative task.

It's satisfying to see these answers for having two static methods for setting & getting nested attributes. These solutions are way better than using nested trees https://gist.github.com/hrldcpr/2012250
Here's my implementation.
Usage:
To set nested attribute call sattr(my_dict, 1, 2, 3, 5) is equal to my_dict[1][2][3][4]=5
To get a nested attribute call gattr(my_dict, 1, 2)
def gattr(d, *attrs):
"""
This method receives a dict and list of attributes to return the innermost value of the give dict
"""
try:
for at in attrs:
d = d[at]
return d
except(KeyError, TypeError):
return None
def sattr(d, *attrs):
"""
Adds "val" to dict in the hierarchy mentioned via *attrs
For ex:
sattr(animals, "cat", "leg","fingers", 4) is equivalent to animals["cat"]["leg"]["fingers"]=4
This method creates necessary objects until it reaches the final depth
This behaviour is also known as autovivification and plenty of implementation are around
This implementation addresses the corner case of replacing existing primitives
https://gist.github.com/hrldcpr/2012250#gistcomment-1779319
"""
for attr in attrs[:-2]:
if type(d.get(attr)) is not dict:
d[attr] = {}
d = d[attr]
d[attrs[-2]] = attrs[-1]

You can use pydash:
import pydash as _
_.get(dataDict, ["b", "v", "y"], default='Default')
https://pydash.readthedocs.io/en/latest/api.html

If you also want the ability to work with arbitrary json including nested lists and dicts, and nicely handle invalid lookup paths, here's my solution:
from functools import reduce
def get_furthest(s, path):
'''
Gets the furthest value along a given key path in a subscriptable structure.
subscriptable, list -> any
:param s: the subscriptable structure to examine
:param path: the lookup path to follow
:return: a tuple of the value at the furthest valid key, and whether the full path is valid
'''
def step_key(acc, key):
s = acc[0]
if isinstance(s, str):
return (s, False)
try:
return (s[key], acc[1])
except LookupError:
return (s, False)
return reduce(step_key, path, (s, True))
def get_val(s, path):
val, successful = get_furthest(s, path)
if successful:
return val
else:
raise LookupError('Invalid lookup path: {}'.format(path))
def set_val(s, path, value):
get_val(s, path[:-1])[path[-1]] = value

How about check and then set dict element without processing all indexes twice?
Solution:
def nested_yield(nested, keys_list):
"""
Get current nested data by send(None) method. Allows change it to Value by calling send(Value) next time
:param nested: list or dict of lists or dicts
:param keys_list: list of indexes/keys
"""
if not len(keys_list): # assign to 1st level list
if isinstance(nested, list):
while True:
nested[:] = yield nested
else:
raise IndexError('Only lists can take element without key')
last_key = keys_list.pop()
for key in keys_list:
nested = nested[key]
while True:
try:
nested[last_key] = yield nested[last_key]
except IndexError as e:
print('no index {} in {}'.format(last_key, nested))
yield None
Example workflow:
ny = nested_yield(nested_dict, nested_address)
data_element = ny.send(None)
if data_element:
# process element
...
else:
# extend/update nested data
ny.send(new_data_element)
...
ny.close()
Test
>>> cfg= {'Options': [[1,[0]],[2,[4,[8,16]]],[3,[9]]]}
ny = nested_yield(cfg, ['Options',1,1,1])
ny.send(None)
[8, 16]
>>> ny.send('Hello!')
'Hello!'
>>> cfg
{'Options': [[1, [0]], [2, [4, 'Hello!']], [3, [9]]]}
>>> ny.close()

Very late to the party, but posting in case this may help someone in the future. For my use case, the following function worked the best. Works to pull any data type out of dictionary
dict is the dictionary containing our value
list is a list of "steps" towards our value
def getnestedvalue(dict, list):
length = len(list)
try:
for depth, key in enumerate(list):
if depth == length - 1:
output = dict[key]
return output
dict = dict[key]
except (KeyError, TypeError):
return None
return None

I'd rather use simple recursion function:
def get_value_by_path(data, maplist):
if not maplist:
return data
for key in maplist:
if key in data:
return get_value_by_path(data[key], maplist[1:])

a method for concatenating strings:
def get_sub_object_from_path(dict_name, map_list):
for i in map_list:
_string = "['%s']" % i
dict_name += _string
value = eval(dict_name)
return value
#Sample:
_dict = {'new': 'person', 'time': {'for': 'one'}}
map_list = ['time', 'for']
print get_sub_object_from_path("_dict",map_list)
#Output:
#one

Extending #DomTomCat and others' approach, these functional (ie, return modified data via deepcopy without affecting the input) setter and mapper works for nested dict and list.
setter:
def set_at_path(data0, keys, value):
data = deepcopy(data0)
if len(keys)>1:
if isinstance(data,dict):
return {k:(set_by_path(v,keys[1:],value) if k==keys[0] else v) for k,v in data.items()}
if isinstance(data,list):
return [set_by_path(x[1],keys[1:],value) if x[0]==keys[0] else x[1] for x in enumerate(data)]
else:
data[keys[-1]]=value
return data
mapper:
def map_at_path(data0, keys, f):
data = deepcopy(data0)
if len(keys)>1:
if isinstance(data,dict):
return {k:(map_at_path(v,keys[1:],f) if k==keys[0] else v) for k,v in data.items()}
if isinstance(data,list):
return [map_at_path(x[1],keys[1:],f) if x[0]==keys[0] else x[1] for x in enumerate(data)]
else:
data[keys[-1]]=f(data[keys[-1]])
return data

I use this
def get_dictionary_value(dictionary_temp, variable_dictionary_keys):
try:
if(len(variable_dictionary_keys) == 0):
return str(dictionary_temp)
variable_dictionary_key = variable_dictionary_keys[0]
variable_dictionary_keys.remove(variable_dictionary_key)
return get_dictionary_value(dictionary_temp[variable_dictionary_key] , variable_dictionary_keys)
except Exception as variable_exception:
logging.error(variable_exception)
return ''

You can make use of the eval function in python.
def nested_parse(nest, map_list):
nestq = "nest['" + "']['".join(map_list) + "']"
return eval(nestq, {'__builtins__':None}, {'nest':nest})
Explanation
For your example query: maplist = ["b", "v", "y"]
nestq will be "nest['b']['v']['y']" where nest is the nested dictionary.
The eval builtin function executes the given string. However, it is important to be careful about possible vulnerabilities that arise from use of eval function. Discussion can be found here:
https://nedbatchelder.com/blog/201206/eval_really_is_dangerous.html
https://www.journaldev.com/22504/python-eval-function
In the nested_parse() function, I have made sure that no __builtins__ globals are available and only local variable that is available is the nest dictionary.

Python Dict: Get value associated with with a list of keys, where each subsequent key resides in the previous key's value [duplicate]

I have a complex dictionary structure which I would like to access via a list of keys to address the correct item.
dataDict = {
"a":{
"r": 1,
"s": 2,
"t": 3
},
"b":{
"u": 1,
"v": {
"x": 1,
"y": 2,
"z": 3
},
"w": 3
}
}
maplist = ["a", "r"]
or
maplist = ["b", "v", "y"]
I have made the following code which works but I'm sure there is a better and more efficient way to do this if anyone has an idea.
# Get a given data from a dictionary with position provided as a list
def getFromDict(dataDict, mapList):
for k in mapList: dataDict = dataDict[k]
return dataDict
# Set a given data in a dictionary with position provided as a list
def setInDict(dataDict, mapList, value):
for k in mapList[:-1]: dataDict = dataDict[k]
dataDict[mapList[-1]] = value

Use reduce() to traverse the dictionary:
from functools import reduce # forward compatibility for Python 3
import operator
def getFromDict(dataDict, mapList):
return reduce(operator.getitem, mapList, dataDict)
and reuse getFromDict to find the location to store the value for setInDict():
def setInDict(dataDict, mapList, value):
getFromDict(dataDict, mapList[:-1])[mapList[-1]] = value
All but the last element in mapList is needed to find the 'parent' dictionary to add the value to, then use the last element to set the value to the right key.
Demo:
>>> getFromDict(dataDict, ["a", "r"])
1
>>> getFromDict(dataDict, ["b", "v", "y"])
2
>>> setInDict(dataDict, ["b", "v", "w"], 4)
>>> import pprint
>>> pprint.pprint(dataDict)
{'a': {'r': 1, 's': 2, 't': 3},
'b': {'u': 1, 'v': {'w': 4, 'x': 1, 'y': 2, 'z': 3}, 'w': 3}}
Note that the Python PEP8 style guide prescribes snake_case names for functions. The above works equally well for lists or a mix of dictionaries and lists, so the names should really be get_by_path() and set_by_path():
from functools import reduce # forward compatibility for Python 3
import operator
def get_by_path(root, items):
"""Access a nested object in root by item sequence."""
return reduce(operator.getitem, items, root)
def set_by_path(root, items, value):
"""Set a value in a nested object in root by item sequence."""
get_by_path(root, items[:-1])[items[-1]] = value
And for completion's sake, a function to delete a key:
def del_by_path(root, items):
"""Delete a key-value in a nested object in root by item sequence."""
del get_by_path(root, items[:-1])[items[-1]]

It seems more pythonic to use a for loop.
See the quote from What’s New In Python 3.0.
Removed reduce(). Use functools.reduce() if you really need it; however, 99 percent of the time an explicit for loop is more readable.
def nested_get(dic, keys):
for key in keys:
dic = dic[key]
return dic
def nested_set(dic, keys, value):
for key in keys[:-1]:
dic = dic.setdefault(key, {})
dic[keys[-1]] = value
def nested_del(dic, keys):
for key in keys[:-1]:
dic = dic[key]
del dic[keys[-1]]
Note that the accepted solution doesn't set non-existing nested keys (it raises KeyError). Using the approach above will create non-existing nodes instead.
The code works in both Python 2 and 3.

Using reduce is clever, but the OP's set method may have issues if the parent keys do not pre-exist in the nested dictionary. Since this is the first SO post I saw for this subject in my google search, I would like to make it slightly better.
The set method in ( Setting a value in a nested python dictionary given a list of indices and value ) seems more robust to missing parental keys. To copy it over:
def nested_set(dic, keys, value):
for key in keys[:-1]:
dic = dic.setdefault(key, {})
dic[keys[-1]] = value
Also, it can be convenient to have a method that traverses the key tree and get all the absolute key paths, for which I have created:
def keysInDict(dataDict, parent=[]):
if not isinstance(dataDict, dict):
return [tuple(parent)]
else:
return reduce(list.__add__,
[keysInDict(v,parent+[k]) for k,v in dataDict.items()], [])
One use of it is to convert the nested tree to a pandas DataFrame, using the following code (assuming that all leafs in the nested dictionary have the same depth).
def dict_to_df(dataDict):
ret = []
for k in keysInDict(dataDict):
v = np.array( getFromDict(dataDict, k), )
v = pd.DataFrame(v)
v.columns = pd.MultiIndex.from_product(list(k) + [v.columns])
ret.append(v)
return reduce(pd.DataFrame.join, ret)

This library may be helpful: https://github.com/akesterson/dpath-python
A python library for accessing and searching dictionaries via
/slashed/paths ala xpath
Basically it lets you glob over a dictionary as if it were a
filesystem.

How about using recursive functions?
To get a value:
def getFromDict(dataDict, maplist):
first, rest = maplist[0], maplist[1:]
if rest:
# if `rest` is not empty, run the function recursively
return getFromDict(dataDict[first], rest)
else:
return dataDict[first]
And to set a value:
def setInDict(dataDict, maplist, value):
first, rest = maplist[0], maplist[1:]
if rest:
try:
if not isinstance(dataDict[first], dict):
# if the key is not a dict, then make it a dict
dataDict[first] = {}
except KeyError:
# if key doesn't exist, create one
dataDict[first] = {}
setInDict(dataDict[first], rest, value)
else:
dataDict[first] = value

Solved this with recursion:
def get(d,l):
if len(l)==1: return d[l[0]]
return get(d[l[0]],l[1:])
Using your example:
dataDict = {
"a":{
"r": 1,
"s": 2,
"t": 3
},
"b":{
"u": 1,
"v": {
"x": 1,
"y": 2,
"z": 3
},
"w": 3
}
}
maplist1 = ["a", "r"]
maplist2 = ["b", "v", "y"]
print(get(dataDict, maplist1)) # 1
print(get(dataDict, maplist2)) # 2

Instead of taking a performance hit each time you want to look up a value, how about you flatten the dictionary once then simply look up the key like b:v:y
def flatten(mydict,sep = ':'):
new_dict = {}
for key,value in mydict.items():
if isinstance(value,dict):
_dict = {sep.join([key, _key]):_value for _key, _value in flatten(value).items()}
new_dict.update(_dict)
else:
new_dict[key]=value
return new_dict
dataDict = {
"a":{
"r": 1,
"s": 2,
"t": 3
},
"b":{
"u": 1,
"v": {
"x": 1,
"y": 2,
"z": 3
},
"w": 3
}
}
flat_dict = flatten(dataDict)
print flat_dict
{'b:w': 3, 'b:u': 1, 'b:v:y': 2, 'b:v:x': 1, 'b:v:z': 3, 'a:r': 1, 'a:s': 2, 'a:t': 3}
This way you can simply look up items using flat_dict['b:v:y'] which will give you 1.
And instead of traversing the dictionary on each lookup, you may be able to speed this up by flattening the dictionary and saving the output so that a lookup from cold start would mean loading up the flattened dictionary and simply performing a key/value lookup with no traversal.

Check out NestedDict from the ndicts package (I am the author), it does exactly what you ask for.
from ndicts import NestedDict
data_dict = {
"a":{
"r": 1,
"s": 2,
"t": 3
},
"b":{
"u": 1,
"v": {
"x": 1,
"y": 2,
"z": 3
},
"w": 3
}
}
nd = NestedDict(data_dict)
You can now access keys using comma separated values.
>>> nd["a", "r"]
1
>>> nd["b", "v"]
{"x": 1, "y": 2, "z": 3}

Pure Python style, without any import:
def nested_set(element, value, *keys):
if type(element) is not dict:
raise AttributeError('nested_set() expects dict as first argument.')
if len(keys) < 2:
raise AttributeError('nested_set() expects at least three arguments, not enough given.')
_keys = keys[:-1]
_element = element
for key in _keys:
_element = _element[key]
_element[keys[-1]] = value
example = {"foo": { "bar": { "baz": "ok" } } }
keys = ['foo', 'bar']
nested_set(example, "yay", *keys)
print(example)
Output
{'foo': {'bar': 'yay'}}

An alternative way if you don't want to raise errors if one of the keys is absent (so that your main code can run without interruption):
def get_value(self,your_dict,*keys):
curr_dict_ = your_dict
for k in keys:
v = curr_dict.get(k,None)
if v is None:
break
if isinstance(v,dict):
curr_dict = v
return v
In this case, if any of the input keys is not present, None is returned, which can be used as a check in your main code to perform an alternative task.

It's satisfying to see these answers for having two static methods for setting & getting nested attributes. These solutions are way better than using nested trees https://gist.github.com/hrldcpr/2012250
Here's my implementation.
Usage:
To set nested attribute call sattr(my_dict, 1, 2, 3, 5) is equal to my_dict[1][2][3][4]=5
To get a nested attribute call gattr(my_dict, 1, 2)
def gattr(d, *attrs):
"""
This method receives a dict and list of attributes to return the innermost value of the give dict
"""
try:
for at in attrs:
d = d[at]
return d
except(KeyError, TypeError):
return None
def sattr(d, *attrs):
"""
Adds "val" to dict in the hierarchy mentioned via *attrs
For ex:
sattr(animals, "cat", "leg","fingers", 4) is equivalent to animals["cat"]["leg"]["fingers"]=4
This method creates necessary objects until it reaches the final depth
This behaviour is also known as autovivification and plenty of implementation are around
This implementation addresses the corner case of replacing existing primitives
https://gist.github.com/hrldcpr/2012250#gistcomment-1779319
"""
for attr in attrs[:-2]:
if type(d.get(attr)) is not dict:
d[attr] = {}
d = d[attr]
d[attrs[-2]] = attrs[-1]

You can use pydash:
import pydash as _
_.get(dataDict, ["b", "v", "y"], default='Default')
https://pydash.readthedocs.io/en/latest/api.html

If you also want the ability to work with arbitrary json including nested lists and dicts, and nicely handle invalid lookup paths, here's my solution:
from functools import reduce
def get_furthest(s, path):
'''
Gets the furthest value along a given key path in a subscriptable structure.
subscriptable, list -> any
:param s: the subscriptable structure to examine
:param path: the lookup path to follow
:return: a tuple of the value at the furthest valid key, and whether the full path is valid
'''
def step_key(acc, key):
s = acc[0]
if isinstance(s, str):
return (s, False)
try:
return (s[key], acc[1])
except LookupError:
return (s, False)
return reduce(step_key, path, (s, True))
def get_val(s, path):
val, successful = get_furthest(s, path)
if successful:
return val
else:
raise LookupError('Invalid lookup path: {}'.format(path))
def set_val(s, path, value):
get_val(s, path[:-1])[path[-1]] = value

How about check and then set dict element without processing all indexes twice?
Solution:
def nested_yield(nested, keys_list):
"""
Get current nested data by send(None) method. Allows change it to Value by calling send(Value) next time
:param nested: list or dict of lists or dicts
:param keys_list: list of indexes/keys
"""
if not len(keys_list): # assign to 1st level list
if isinstance(nested, list):
while True:
nested[:] = yield nested
else:
raise IndexError('Only lists can take element without key')
last_key = keys_list.pop()
for key in keys_list:
nested = nested[key]
while True:
try:
nested[last_key] = yield nested[last_key]
except IndexError as e:
print('no index {} in {}'.format(last_key, nested))
yield None
Example workflow:
ny = nested_yield(nested_dict, nested_address)
data_element = ny.send(None)
if data_element:
# process element
...
else:
# extend/update nested data
ny.send(new_data_element)
...
ny.close()
Test
>>> cfg= {'Options': [[1,[0]],[2,[4,[8,16]]],[3,[9]]]}
ny = nested_yield(cfg, ['Options',1,1,1])
ny.send(None)
[8, 16]
>>> ny.send('Hello!')
'Hello!'
>>> cfg
{'Options': [[1, [0]], [2, [4, 'Hello!']], [3, [9]]]}
>>> ny.close()

Very late to the party, but posting in case this may help someone in the future. For my use case, the following function worked the best. Works to pull any data type out of dictionary
dict is the dictionary containing our value
list is a list of "steps" towards our value
def getnestedvalue(dict, list):
length = len(list)
try:
for depth, key in enumerate(list):
if depth == length - 1:
output = dict[key]
return output
dict = dict[key]
except (KeyError, TypeError):
return None
return None

I'd rather use simple recursion function:
def get_value_by_path(data, maplist):
if not maplist:
return data
for key in maplist:
if key in data:
return get_value_by_path(data[key], maplist[1:])

a method for concatenating strings:
def get_sub_object_from_path(dict_name, map_list):
for i in map_list:
_string = "['%s']" % i
dict_name += _string
value = eval(dict_name)
return value
#Sample:
_dict = {'new': 'person', 'time': {'for': 'one'}}
map_list = ['time', 'for']
print get_sub_object_from_path("_dict",map_list)
#Output:
#one

Extending #DomTomCat and others' approach, these functional (ie, return modified data via deepcopy without affecting the input) setter and mapper works for nested dict and list.
setter:
def set_at_path(data0, keys, value):
data = deepcopy(data0)
if len(keys)>1:
if isinstance(data,dict):
return {k:(set_by_path(v,keys[1:],value) if k==keys[0] else v) for k,v in data.items()}
if isinstance(data,list):
return [set_by_path(x[1],keys[1:],value) if x[0]==keys[0] else x[1] for x in enumerate(data)]
else:
data[keys[-1]]=value
return data
mapper:
def map_at_path(data0, keys, f):
data = deepcopy(data0)
if len(keys)>1:
if isinstance(data,dict):
return {k:(map_at_path(v,keys[1:],f) if k==keys[0] else v) for k,v in data.items()}
if isinstance(data,list):
return [map_at_path(x[1],keys[1:],f) if x[0]==keys[0] else x[1] for x in enumerate(data)]
else:
data[keys[-1]]=f(data[keys[-1]])
return data

I use this
def get_dictionary_value(dictionary_temp, variable_dictionary_keys):
try:
if(len(variable_dictionary_keys) == 0):
return str(dictionary_temp)
variable_dictionary_key = variable_dictionary_keys[0]
variable_dictionary_keys.remove(variable_dictionary_key)
return get_dictionary_value(dictionary_temp[variable_dictionary_key] , variable_dictionary_keys)
except Exception as variable_exception:
logging.error(variable_exception)
return ''

You can make use of the eval function in python.
def nested_parse(nest, map_list):
nestq = "nest['" + "']['".join(map_list) + "']"
return eval(nestq, {'__builtins__':None}, {'nest':nest})
Explanation
For your example query: maplist = ["b", "v", "y"]
nestq will be "nest['b']['v']['y']" where nest is the nested dictionary.
The eval builtin function executes the given string. However, it is important to be careful about possible vulnerabilities that arise from use of eval function. Discussion can be found here:
https://nedbatchelder.com/blog/201206/eval_really_is_dangerous.html
https://www.journaldev.com/22504/python-eval-function
In the nested_parse() function, I have made sure that no __builtins__ globals are available and only local variable that is available is the nest dictionary.

access nested dictionary with a list of keys [duplicate]

I have a complex dictionary structure which I would like to access via a list of keys to address the correct item.
dataDict = {
"a":{
"r": 1,
"s": 2,
"t": 3
},
"b":{
"u": 1,
"v": {
"x": 1,
"y": 2,
"z": 3
},
"w": 3
}
}
maplist = ["a", "r"]
or
maplist = ["b", "v", "y"]
I have made the following code which works but I'm sure there is a better and more efficient way to do this if anyone has an idea.
# Get a given data from a dictionary with position provided as a list
def getFromDict(dataDict, mapList):
for k in mapList: dataDict = dataDict[k]
return dataDict
# Set a given data in a dictionary with position provided as a list
def setInDict(dataDict, mapList, value):
for k in mapList[:-1]: dataDict = dataDict[k]
dataDict[mapList[-1]] = value

Use reduce() to traverse the dictionary:
from functools import reduce # forward compatibility for Python 3
import operator
def getFromDict(dataDict, mapList):
return reduce(operator.getitem, mapList, dataDict)
and reuse getFromDict to find the location to store the value for setInDict():
def setInDict(dataDict, mapList, value):
getFromDict(dataDict, mapList[:-1])[mapList[-1]] = value
All but the last element in mapList is needed to find the 'parent' dictionary to add the value to, then use the last element to set the value to the right key.
Demo:
>>> getFromDict(dataDict, ["a", "r"])
1
>>> getFromDict(dataDict, ["b", "v", "y"])
2
>>> setInDict(dataDict, ["b", "v", "w"], 4)
>>> import pprint
>>> pprint.pprint(dataDict)
{'a': {'r': 1, 's': 2, 't': 3},
'b': {'u': 1, 'v': {'w': 4, 'x': 1, 'y': 2, 'z': 3}, 'w': 3}}
Note that the Python PEP8 style guide prescribes snake_case names for functions. The above works equally well for lists or a mix of dictionaries and lists, so the names should really be get_by_path() and set_by_path():
from functools import reduce # forward compatibility for Python 3
import operator
def get_by_path(root, items):
"""Access a nested object in root by item sequence."""
return reduce(operator.getitem, items, root)
def set_by_path(root, items, value):
"""Set a value in a nested object in root by item sequence."""
get_by_path(root, items[:-1])[items[-1]] = value
And for completion's sake, a function to delete a key:
def del_by_path(root, items):
"""Delete a key-value in a nested object in root by item sequence."""
del get_by_path(root, items[:-1])[items[-1]]

It seems more pythonic to use a for loop.
See the quote from What’s New In Python 3.0.
Removed reduce(). Use functools.reduce() if you really need it; however, 99 percent of the time an explicit for loop is more readable.
def nested_get(dic, keys):
for key in keys:
dic = dic[key]
return dic
def nested_set(dic, keys, value):
for key in keys[:-1]:
dic = dic.setdefault(key, {})
dic[keys[-1]] = value
def nested_del(dic, keys):
for key in keys[:-1]:
dic = dic[key]
del dic[keys[-1]]
Note that the accepted solution doesn't set non-existing nested keys (it raises KeyError). Using the approach above will create non-existing nodes instead.
The code works in both Python 2 and 3.

Using reduce is clever, but the OP's set method may have issues if the parent keys do not pre-exist in the nested dictionary. Since this is the first SO post I saw for this subject in my google search, I would like to make it slightly better.
The set method in ( Setting a value in a nested python dictionary given a list of indices and value ) seems more robust to missing parental keys. To copy it over:
def nested_set(dic, keys, value):
for key in keys[:-1]:
dic = dic.setdefault(key, {})
dic[keys[-1]] = value
Also, it can be convenient to have a method that traverses the key tree and get all the absolute key paths, for which I have created:
def keysInDict(dataDict, parent=[]):
if not isinstance(dataDict, dict):
return [tuple(parent)]
else:
return reduce(list.__add__,
[keysInDict(v,parent+[k]) for k,v in dataDict.items()], [])
One use of it is to convert the nested tree to a pandas DataFrame, using the following code (assuming that all leafs in the nested dictionary have the same depth).
def dict_to_df(dataDict):
ret = []
for k in keysInDict(dataDict):
v = np.array( getFromDict(dataDict, k), )
v = pd.DataFrame(v)
v.columns = pd.MultiIndex.from_product(list(k) + [v.columns])
ret.append(v)
return reduce(pd.DataFrame.join, ret)

This library may be helpful: https://github.com/akesterson/dpath-python
A python library for accessing and searching dictionaries via
/slashed/paths ala xpath
Basically it lets you glob over a dictionary as if it were a
filesystem.

How about using recursive functions?
To get a value:
def getFromDict(dataDict, maplist):
first, rest = maplist[0], maplist[1:]
if rest:
# if `rest` is not empty, run the function recursively
return getFromDict(dataDict[first], rest)
else:
return dataDict[first]
And to set a value:
def setInDict(dataDict, maplist, value):
first, rest = maplist[0], maplist[1:]
if rest:
try:
if not isinstance(dataDict[first], dict):
# if the key is not a dict, then make it a dict
dataDict[first] = {}
except KeyError:
# if key doesn't exist, create one
dataDict[first] = {}
setInDict(dataDict[first], rest, value)
else:
dataDict[first] = value

Solved this with recursion:
def get(d,l):
if len(l)==1: return d[l[0]]
return get(d[l[0]],l[1:])
Using your example:
dataDict = {
"a":{
"r": 1,
"s": 2,
"t": 3
},
"b":{
"u": 1,
"v": {
"x": 1,
"y": 2,
"z": 3
},
"w": 3
}
}
maplist1 = ["a", "r"]
maplist2 = ["b", "v", "y"]
print(get(dataDict, maplist1)) # 1
print(get(dataDict, maplist2)) # 2

Instead of taking a performance hit each time you want to look up a value, how about you flatten the dictionary once then simply look up the key like b:v:y
def flatten(mydict,sep = ':'):
new_dict = {}
for key,value in mydict.items():
if isinstance(value,dict):
_dict = {sep.join([key, _key]):_value for _key, _value in flatten(value).items()}
new_dict.update(_dict)
else:
new_dict[key]=value
return new_dict
dataDict = {
"a":{
"r": 1,
"s": 2,
"t": 3
},
"b":{
"u": 1,
"v": {
"x": 1,
"y": 2,
"z": 3
},
"w": 3
}
}
flat_dict = flatten(dataDict)
print flat_dict
{'b:w': 3, 'b:u': 1, 'b:v:y': 2, 'b:v:x': 1, 'b:v:z': 3, 'a:r': 1, 'a:s': 2, 'a:t': 3}
This way you can simply look up items using flat_dict['b:v:y'] which will give you 1.
And instead of traversing the dictionary on each lookup, you may be able to speed this up by flattening the dictionary and saving the output so that a lookup from cold start would mean loading up the flattened dictionary and simply performing a key/value lookup with no traversal.

Check out NestedDict from the ndicts package (I am the author), it does exactly what you ask for.
from ndicts import NestedDict
data_dict = {
"a":{
"r": 1,
"s": 2,
"t": 3
},
"b":{
"u": 1,
"v": {
"x": 1,
"y": 2,
"z": 3
},
"w": 3
}
}
nd = NestedDict(data_dict)
You can now access keys using comma separated values.
>>> nd["a", "r"]
1
>>> nd["b", "v"]
{"x": 1, "y": 2, "z": 3}

Pure Python style, without any import:
def nested_set(element, value, *keys):
if type(element) is not dict:
raise AttributeError('nested_set() expects dict as first argument.')
if len(keys) < 2:
raise AttributeError('nested_set() expects at least three arguments, not enough given.')
_keys = keys[:-1]
_element = element
for key in _keys:
_element = _element[key]
_element[keys[-1]] = value
example = {"foo": { "bar": { "baz": "ok" } } }
keys = ['foo', 'bar']
nested_set(example, "yay", *keys)
print(example)
Output
{'foo': {'bar': 'yay'}}

An alternative way if you don't want to raise errors if one of the keys is absent (so that your main code can run without interruption):
def get_value(self,your_dict,*keys):
curr_dict_ = your_dict
for k in keys:
v = curr_dict.get(k,None)
if v is None:
break
if isinstance(v,dict):
curr_dict = v
return v
In this case, if any of the input keys is not present, None is returned, which can be used as a check in your main code to perform an alternative task.

It's satisfying to see these answers for having two static methods for setting & getting nested attributes. These solutions are way better than using nested trees https://gist.github.com/hrldcpr/2012250
Here's my implementation.
Usage:
To set nested attribute call sattr(my_dict, 1, 2, 3, 5) is equal to my_dict[1][2][3][4]=5
To get a nested attribute call gattr(my_dict, 1, 2)
def gattr(d, *attrs):
"""
This method receives a dict and list of attributes to return the innermost value of the give dict
"""
try:
for at in attrs:
d = d[at]
return d
except(KeyError, TypeError):
return None
def sattr(d, *attrs):
"""
Adds "val" to dict in the hierarchy mentioned via *attrs
For ex:
sattr(animals, "cat", "leg","fingers", 4) is equivalent to animals["cat"]["leg"]["fingers"]=4
This method creates necessary objects until it reaches the final depth
This behaviour is also known as autovivification and plenty of implementation are around
This implementation addresses the corner case of replacing existing primitives
https://gist.github.com/hrldcpr/2012250#gistcomment-1779319
"""
for attr in attrs[:-2]:
if type(d.get(attr)) is not dict:
d[attr] = {}
d = d[attr]
d[attrs[-2]] = attrs[-1]

You can use pydash:
import pydash as _
_.get(dataDict, ["b", "v", "y"], default='Default')
https://pydash.readthedocs.io/en/latest/api.html

If you also want the ability to work with arbitrary json including nested lists and dicts, and nicely handle invalid lookup paths, here's my solution:
from functools import reduce
def get_furthest(s, path):
'''
Gets the furthest value along a given key path in a subscriptable structure.
subscriptable, list -> any
:param s: the subscriptable structure to examine
:param path: the lookup path to follow
:return: a tuple of the value at the furthest valid key, and whether the full path is valid
'''
def step_key(acc, key):
s = acc[0]
if isinstance(s, str):
return (s, False)
try:
return (s[key], acc[1])
except LookupError:
return (s, False)
return reduce(step_key, path, (s, True))
def get_val(s, path):
val, successful = get_furthest(s, path)
if successful:
return val
else:
raise LookupError('Invalid lookup path: {}'.format(path))
def set_val(s, path, value):
get_val(s, path[:-1])[path[-1]] = value

How about check and then set dict element without processing all indexes twice?
Solution:
def nested_yield(nested, keys_list):
"""
Get current nested data by send(None) method. Allows change it to Value by calling send(Value) next time
:param nested: list or dict of lists or dicts
:param keys_list: list of indexes/keys
"""
if not len(keys_list): # assign to 1st level list
if isinstance(nested, list):
while True:
nested[:] = yield nested
else:
raise IndexError('Only lists can take element without key')
last_key = keys_list.pop()
for key in keys_list:
nested = nested[key]
while True:
try:
nested[last_key] = yield nested[last_key]
except IndexError as e:
print('no index {} in {}'.format(last_key, nested))
yield None
Example workflow:
ny = nested_yield(nested_dict, nested_address)
data_element = ny.send(None)
if data_element:
# process element
...
else:
# extend/update nested data
ny.send(new_data_element)
...
ny.close()
Test
>>> cfg= {'Options': [[1,[0]],[2,[4,[8,16]]],[3,[9]]]}
ny = nested_yield(cfg, ['Options',1,1,1])
ny.send(None)
[8, 16]
>>> ny.send('Hello!')
'Hello!'
>>> cfg
{'Options': [[1, [0]], [2, [4, 'Hello!']], [3, [9]]]}
>>> ny.close()

Very late to the party, but posting in case this may help someone in the future. For my use case, the following function worked the best. Works to pull any data type out of dictionary
dict is the dictionary containing our value
list is a list of "steps" towards our value
def getnestedvalue(dict, list):
length = len(list)
try:
for depth, key in enumerate(list):
if depth == length - 1:
output = dict[key]
return output
dict = dict[key]
except (KeyError, TypeError):
return None
return None

I'd rather use simple recursion function:
def get_value_by_path(data, maplist):
if not maplist:
return data
for key in maplist:
if key in data:
return get_value_by_path(data[key], maplist[1:])

a method for concatenating strings:
def get_sub_object_from_path(dict_name, map_list):
for i in map_list:
_string = "['%s']" % i
dict_name += _string
value = eval(dict_name)
return value
#Sample:
_dict = {'new': 'person', 'time': {'for': 'one'}}
map_list = ['time', 'for']
print get_sub_object_from_path("_dict",map_list)
#Output:
#one

Extending #DomTomCat and others' approach, these functional (ie, return modified data via deepcopy without affecting the input) setter and mapper works for nested dict and list.
setter:
def set_at_path(data0, keys, value):
data = deepcopy(data0)
if len(keys)>1:
if isinstance(data,dict):
return {k:(set_by_path(v,keys[1:],value) if k==keys[0] else v) for k,v in data.items()}
if isinstance(data,list):
return [set_by_path(x[1],keys[1:],value) if x[0]==keys[0] else x[1] for x in enumerate(data)]
else:
data[keys[-1]]=value
return data
mapper:
def map_at_path(data0, keys, f):
data = deepcopy(data0)
if len(keys)>1:
if isinstance(data,dict):
return {k:(map_at_path(v,keys[1:],f) if k==keys[0] else v) for k,v in data.items()}
if isinstance(data,list):
return [map_at_path(x[1],keys[1:],f) if x[0]==keys[0] else x[1] for x in enumerate(data)]
else:
data[keys[-1]]=f(data[keys[-1]])
return data

I use this
def get_dictionary_value(dictionary_temp, variable_dictionary_keys):
try:
if(len(variable_dictionary_keys) == 0):
return str(dictionary_temp)
variable_dictionary_key = variable_dictionary_keys[0]
variable_dictionary_keys.remove(variable_dictionary_key)
return get_dictionary_value(dictionary_temp[variable_dictionary_key] , variable_dictionary_keys)
except Exception as variable_exception:
logging.error(variable_exception)
return ''

You can make use of the eval function in python.
def nested_parse(nest, map_list):
nestq = "nest['" + "']['".join(map_list) + "']"
return eval(nestq, {'__builtins__':None}, {'nest':nest})
Explanation
For your example query: maplist = ["b", "v", "y"]
nestq will be "nest['b']['v']['y']" where nest is the nested dictionary.
The eval builtin function executes the given string. However, it is important to be careful about possible vulnerabilities that arise from use of eval function. Discussion can be found here:
https://nedbatchelder.com/blog/201206/eval_really_is_dangerous.html
https://www.journaldev.com/22504/python-eval-function
In the nested_parse() function, I have made sure that no __builtins__ globals are available and only local variable that is available is the nest dictionary.

Accessing python dict using nested key lookup string

I am looking to create a simple nested "lookup" mechanism in python, and wanted to make sure there wasn't already something somewhere hidden in the vast libraries in python that doesn't already do this before creating it.
I am looking to take a dict that is formatted something like this
my_dict = {
"root": {
"secondary": {
"user1": {
"name": "jim",
"age": 24
},
"user2": {
"name": "fred",
"age": 25
}
}
}
}
and I am trying to have a way to access the data by using a decimal notation that would be something similar to
root.secondary.user2
and return that resulting dict back as a response. I am thinking that there must be something that does this and I could write one without much difficulty but I want to make sure I am not recreating something I might be missing from the documentation. Thanks

There's nothing in the standard library for this purpose, but it is rather easy to code this yourself:
>>> key = "root.secondary.user2"
>>> reduce(dict.get, key.split("."), my_dict)
{'age': 25, 'name': 'fred'}
This exploits the fact that the look-up for the key k in the dictionary d can be written as dict.get(d, k). Applying this iteratively using reduce() leads to the desired result.
Edit: For completeness three functions to get, set or delete dictionary keys using this method:
def get_key(my_dict, key):
return reduce(dict.get, key.split("."), my_dict)
def set_key(my_dict, key, value):
key = key.split(".")
my_dict = reduce(dict.get, key[:-1], my_dict)
my_dict[key[-1]] = value
def del_key(my_dict, key):
key = key.split(".")
my_dict = reduce(dict.get, key[:-1], my_dict)
del my_dict[key[-1]]

You can have that. You can subclass dict, add the key lookup (and even retain the name dict) by using code similar to the one below. The {...} form however will still use the builtin dict class (now called orig_dict), so you have to enclose it, like so: Dict({...}). This implementation recursively converts dictionaries to the new form, so you don't have to use the method above for any dictionary entries that are plain dictionaries themselves.
orig_dict = dict
class Dict(orig_dict):
def __init__(self, *args, **kwargs):
super(Dict, self).__init__(*args, **kwargs)
for k, v in self.iteritems():
if type(v) == orig_dict and not isinstance(v, Dict):
super(Dict, self).__setitem__(k, Dict(v))
def __getattribute__(self, k):
try: return super(Dict, self).__getattribute__(k)
except: return self.__getitem__(k)
def __setattr__(self, k, v):
if self.has_key(k): self.__setitem__(k, v)
else: return super(Dict, self).__setattr__(k, v)
def __delattr__(self, k):
try: self.__delitem__(k)
except: super(Dict, self).__delattr__(k)
def __setitem__(self, k, v):
toconvert = type(v) == orig_dict and not isinstance(v, Dict)
super(Dict, self).__setitem__(k, Dict(v) if toconvert else v)
# dict = Dict <-- you can even do this but I advise against it
# testing:
b = Dict(a=1, b=Dict(c=2, d=3))
c = Dict({'a': 1, 'b': {'c': 2, 'd': 3}})
d = Dict(a=1, b={'c': 2, 'd': {'e': 3, 'f': {'g': 4}}})
b.a = b.b
b.b = 1
d.b.d.f.g = 40
del d.b.d.e
d.b.c += d.b.d.f.g
c.b.c += c.a
del c.a
print b
print c
print d

Recursion still works.
def walk_into( dict, key ):
head, _, tail = key.partition('.')
if tail:
return walk_into( dict[head], tail )
return dict, key
d, k = walk_into( my_dict, "root.secondary.user2" )
d[k] can be used for getting or putting a new value.

I have a pretty complete implementation for this and some other stuff here. Repository here, trict.util combined with the __get__ method in trict.trict might have the stuff you need if you don't feel like installing it. Also it actually is in conda-forge even though the README might say otherwise if I haven't gotten around to updating it before you're reading this.

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Creating dynamic nested dictionary of counts - python

Check out the setdefault method on dictionaries. d = {} d.setdefault('pqr', {}).setdefault('lmn', {}).setdefault('xyz', {})['pass'] = 1 d.setdefault('pqr', {}).setdefault('lmn', {}).setdefault('bcd', {})['pass'] = 1 d gives {'pqr': {'lmn': {'bcd': {'pass': 1}, 'xyz': {'pass': 1}}}}

Related

append a nested python dict with a full path key without overwrite exiting keys or create a new key if the current key is not exist [duplicate]

how to process api url query string with a dot (.) [duplicate]

Python Dict: Get value associated with with a list of keys, where each subsequent key resides in the previous key's value [duplicate]

access nested dictionary with a list of keys [duplicate]

Accessing python dict using nested key lookup string

Categories

Resources