Related
I have a complex dictionary structure which I would like to access via a list of keys to address the correct item.
dataDict = {
"a":{
"r": 1,
"s": 2,
"t": 3
},
"b":{
"u": 1,
"v": {
"x": 1,
"y": 2,
"z": 3
},
"w": 3
}
}
maplist = ["a", "r"]
or
maplist = ["b", "v", "y"]
I have made the following code which works but I'm sure there is a better and more efficient way to do this if anyone has an idea.
# Get a given data from a dictionary with position provided as a list
def getFromDict(dataDict, mapList):
for k in mapList: dataDict = dataDict[k]
return dataDict
# Set a given data in a dictionary with position provided as a list
def setInDict(dataDict, mapList, value):
for k in mapList[:-1]: dataDict = dataDict[k]
dataDict[mapList[-1]] = value
Use reduce() to traverse the dictionary:
from functools import reduce # forward compatibility for Python 3
import operator
def getFromDict(dataDict, mapList):
return reduce(operator.getitem, mapList, dataDict)
and reuse getFromDict to find the location to store the value for setInDict():
def setInDict(dataDict, mapList, value):
getFromDict(dataDict, mapList[:-1])[mapList[-1]] = value
All but the last element in mapList is needed to find the 'parent' dictionary to add the value to, then use the last element to set the value to the right key.
Demo:
>>> getFromDict(dataDict, ["a", "r"])
1
>>> getFromDict(dataDict, ["b", "v", "y"])
2
>>> setInDict(dataDict, ["b", "v", "w"], 4)
>>> import pprint
>>> pprint.pprint(dataDict)
{'a': {'r': 1, 's': 2, 't': 3},
'b': {'u': 1, 'v': {'w': 4, 'x': 1, 'y': 2, 'z': 3}, 'w': 3}}
Note that the Python PEP8 style guide prescribes snake_case names for functions. The above works equally well for lists or a mix of dictionaries and lists, so the names should really be get_by_path() and set_by_path():
from functools import reduce # forward compatibility for Python 3
import operator
def get_by_path(root, items):
"""Access a nested object in root by item sequence."""
return reduce(operator.getitem, items, root)
def set_by_path(root, items, value):
"""Set a value in a nested object in root by item sequence."""
get_by_path(root, items[:-1])[items[-1]] = value
And for completion's sake, a function to delete a key:
def del_by_path(root, items):
"""Delete a key-value in a nested object in root by item sequence."""
del get_by_path(root, items[:-1])[items[-1]]
It seems more pythonic to use a for loop.
See the quote from What’s New In Python 3.0.
Removed reduce(). Use functools.reduce() if you really need it; however, 99 percent of the time an explicit for loop is more readable.
def nested_get(dic, keys):
for key in keys:
dic = dic[key]
return dic
def nested_set(dic, keys, value):
for key in keys[:-1]:
dic = dic.setdefault(key, {})
dic[keys[-1]] = value
def nested_del(dic, keys):
for key in keys[:-1]:
dic = dic[key]
del dic[keys[-1]]
Note that the accepted solution doesn't set non-existing nested keys (it raises KeyError). Using the approach above will create non-existing nodes instead.
The code works in both Python 2 and 3.
Using reduce is clever, but the OP's set method may have issues if the parent keys do not pre-exist in the nested dictionary. Since this is the first SO post I saw for this subject in my google search, I would like to make it slightly better.
The set method in ( Setting a value in a nested python dictionary given a list of indices and value ) seems more robust to missing parental keys. To copy it over:
def nested_set(dic, keys, value):
for key in keys[:-1]:
dic = dic.setdefault(key, {})
dic[keys[-1]] = value
Also, it can be convenient to have a method that traverses the key tree and get all the absolute key paths, for which I have created:
def keysInDict(dataDict, parent=[]):
if not isinstance(dataDict, dict):
return [tuple(parent)]
else:
return reduce(list.__add__,
[keysInDict(v,parent+[k]) for k,v in dataDict.items()], [])
One use of it is to convert the nested tree to a pandas DataFrame, using the following code (assuming that all leafs in the nested dictionary have the same depth).
def dict_to_df(dataDict):
ret = []
for k in keysInDict(dataDict):
v = np.array( getFromDict(dataDict, k), )
v = pd.DataFrame(v)
v.columns = pd.MultiIndex.from_product(list(k) + [v.columns])
ret.append(v)
return reduce(pd.DataFrame.join, ret)
This library may be helpful: https://github.com/akesterson/dpath-python
A python library for accessing and searching dictionaries via
/slashed/paths ala xpath
Basically it lets you glob over a dictionary as if it were a
filesystem.
How about using recursive functions?
To get a value:
def getFromDict(dataDict, maplist):
first, rest = maplist[0], maplist[1:]
if rest:
# if `rest` is not empty, run the function recursively
return getFromDict(dataDict[first], rest)
else:
return dataDict[first]
And to set a value:
def setInDict(dataDict, maplist, value):
first, rest = maplist[0], maplist[1:]
if rest:
try:
if not isinstance(dataDict[first], dict):
# if the key is not a dict, then make it a dict
dataDict[first] = {}
except KeyError:
# if key doesn't exist, create one
dataDict[first] = {}
setInDict(dataDict[first], rest, value)
else:
dataDict[first] = value
Solved this with recursion:
def get(d,l):
if len(l)==1: return d[l[0]]
return get(d[l[0]],l[1:])
Using your example:
dataDict = {
"a":{
"r": 1,
"s": 2,
"t": 3
},
"b":{
"u": 1,
"v": {
"x": 1,
"y": 2,
"z": 3
},
"w": 3
}
}
maplist1 = ["a", "r"]
maplist2 = ["b", "v", "y"]
print(get(dataDict, maplist1)) # 1
print(get(dataDict, maplist2)) # 2
Instead of taking a performance hit each time you want to look up a value, how about you flatten the dictionary once then simply look up the key like b:v:y
def flatten(mydict,sep = ':'):
new_dict = {}
for key,value in mydict.items():
if isinstance(value,dict):
_dict = {sep.join([key, _key]):_value for _key, _value in flatten(value).items()}
new_dict.update(_dict)
else:
new_dict[key]=value
return new_dict
dataDict = {
"a":{
"r": 1,
"s": 2,
"t": 3
},
"b":{
"u": 1,
"v": {
"x": 1,
"y": 2,
"z": 3
},
"w": 3
}
}
flat_dict = flatten(dataDict)
print flat_dict
{'b:w': 3, 'b:u': 1, 'b:v:y': 2, 'b:v:x': 1, 'b:v:z': 3, 'a:r': 1, 'a:s': 2, 'a:t': 3}
This way you can simply look up items using flat_dict['b:v:y'] which will give you 1.
And instead of traversing the dictionary on each lookup, you may be able to speed this up by flattening the dictionary and saving the output so that a lookup from cold start would mean loading up the flattened dictionary and simply performing a key/value lookup with no traversal.
Check out NestedDict from the ndicts package (I am the author), it does exactly what you ask for.
from ndicts import NestedDict
data_dict = {
"a":{
"r": 1,
"s": 2,
"t": 3
},
"b":{
"u": 1,
"v": {
"x": 1,
"y": 2,
"z": 3
},
"w": 3
}
}
nd = NestedDict(data_dict)
You can now access keys using comma separated values.
>>> nd["a", "r"]
1
>>> nd["b", "v"]
{"x": 1, "y": 2, "z": 3}
Pure Python style, without any import:
def nested_set(element, value, *keys):
if type(element) is not dict:
raise AttributeError('nested_set() expects dict as first argument.')
if len(keys) < 2:
raise AttributeError('nested_set() expects at least three arguments, not enough given.')
_keys = keys[:-1]
_element = element
for key in _keys:
_element = _element[key]
_element[keys[-1]] = value
example = {"foo": { "bar": { "baz": "ok" } } }
keys = ['foo', 'bar']
nested_set(example, "yay", *keys)
print(example)
Output
{'foo': {'bar': 'yay'}}
An alternative way if you don't want to raise errors if one of the keys is absent (so that your main code can run without interruption):
def get_value(self,your_dict,*keys):
curr_dict_ = your_dict
for k in keys:
v = curr_dict.get(k,None)
if v is None:
break
if isinstance(v,dict):
curr_dict = v
return v
In this case, if any of the input keys is not present, None is returned, which can be used as a check in your main code to perform an alternative task.
It's satisfying to see these answers for having two static methods for setting & getting nested attributes. These solutions are way better than using nested trees https://gist.github.com/hrldcpr/2012250
Here's my implementation.
Usage:
To set nested attribute call sattr(my_dict, 1, 2, 3, 5) is equal to my_dict[1][2][3][4]=5
To get a nested attribute call gattr(my_dict, 1, 2)
def gattr(d, *attrs):
"""
This method receives a dict and list of attributes to return the innermost value of the give dict
"""
try:
for at in attrs:
d = d[at]
return d
except(KeyError, TypeError):
return None
def sattr(d, *attrs):
"""
Adds "val" to dict in the hierarchy mentioned via *attrs
For ex:
sattr(animals, "cat", "leg","fingers", 4) is equivalent to animals["cat"]["leg"]["fingers"]=4
This method creates necessary objects until it reaches the final depth
This behaviour is also known as autovivification and plenty of implementation are around
This implementation addresses the corner case of replacing existing primitives
https://gist.github.com/hrldcpr/2012250#gistcomment-1779319
"""
for attr in attrs[:-2]:
if type(d.get(attr)) is not dict:
d[attr] = {}
d = d[attr]
d[attrs[-2]] = attrs[-1]
You can use pydash:
import pydash as _
_.get(dataDict, ["b", "v", "y"], default='Default')
https://pydash.readthedocs.io/en/latest/api.html
If you also want the ability to work with arbitrary json including nested lists and dicts, and nicely handle invalid lookup paths, here's my solution:
from functools import reduce
def get_furthest(s, path):
'''
Gets the furthest value along a given key path in a subscriptable structure.
subscriptable, list -> any
:param s: the subscriptable structure to examine
:param path: the lookup path to follow
:return: a tuple of the value at the furthest valid key, and whether the full path is valid
'''
def step_key(acc, key):
s = acc[0]
if isinstance(s, str):
return (s, False)
try:
return (s[key], acc[1])
except LookupError:
return (s, False)
return reduce(step_key, path, (s, True))
def get_val(s, path):
val, successful = get_furthest(s, path)
if successful:
return val
else:
raise LookupError('Invalid lookup path: {}'.format(path))
def set_val(s, path, value):
get_val(s, path[:-1])[path[-1]] = value
How about check and then set dict element without processing all indexes twice?
Solution:
def nested_yield(nested, keys_list):
"""
Get current nested data by send(None) method. Allows change it to Value by calling send(Value) next time
:param nested: list or dict of lists or dicts
:param keys_list: list of indexes/keys
"""
if not len(keys_list): # assign to 1st level list
if isinstance(nested, list):
while True:
nested[:] = yield nested
else:
raise IndexError('Only lists can take element without key')
last_key = keys_list.pop()
for key in keys_list:
nested = nested[key]
while True:
try:
nested[last_key] = yield nested[last_key]
except IndexError as e:
print('no index {} in {}'.format(last_key, nested))
yield None
Example workflow:
ny = nested_yield(nested_dict, nested_address)
data_element = ny.send(None)
if data_element:
# process element
...
else:
# extend/update nested data
ny.send(new_data_element)
...
ny.close()
Test
>>> cfg= {'Options': [[1,[0]],[2,[4,[8,16]]],[3,[9]]]}
ny = nested_yield(cfg, ['Options',1,1,1])
ny.send(None)
[8, 16]
>>> ny.send('Hello!')
'Hello!'
>>> cfg
{'Options': [[1, [0]], [2, [4, 'Hello!']], [3, [9]]]}
>>> ny.close()
Very late to the party, but posting in case this may help someone in the future. For my use case, the following function worked the best. Works to pull any data type out of dictionary
dict is the dictionary containing our value
list is a list of "steps" towards our value
def getnestedvalue(dict, list):
length = len(list)
try:
for depth, key in enumerate(list):
if depth == length - 1:
output = dict[key]
return output
dict = dict[key]
except (KeyError, TypeError):
return None
return None
I'd rather use simple recursion function:
def get_value_by_path(data, maplist):
if not maplist:
return data
for key in maplist:
if key in data:
return get_value_by_path(data[key], maplist[1:])
a method for concatenating strings:
def get_sub_object_from_path(dict_name, map_list):
for i in map_list:
_string = "['%s']" % i
dict_name += _string
value = eval(dict_name)
return value
#Sample:
_dict = {'new': 'person', 'time': {'for': 'one'}}
map_list = ['time', 'for']
print get_sub_object_from_path("_dict",map_list)
#Output:
#one
Extending #DomTomCat and others' approach, these functional (ie, return modified data via deepcopy without affecting the input) setter and mapper works for nested dict and list.
setter:
def set_at_path(data0, keys, value):
data = deepcopy(data0)
if len(keys)>1:
if isinstance(data,dict):
return {k:(set_by_path(v,keys[1:],value) if k==keys[0] else v) for k,v in data.items()}
if isinstance(data,list):
return [set_by_path(x[1],keys[1:],value) if x[0]==keys[0] else x[1] for x in enumerate(data)]
else:
data[keys[-1]]=value
return data
mapper:
def map_at_path(data0, keys, f):
data = deepcopy(data0)
if len(keys)>1:
if isinstance(data,dict):
return {k:(map_at_path(v,keys[1:],f) if k==keys[0] else v) for k,v in data.items()}
if isinstance(data,list):
return [map_at_path(x[1],keys[1:],f) if x[0]==keys[0] else x[1] for x in enumerate(data)]
else:
data[keys[-1]]=f(data[keys[-1]])
return data
I use this
def get_dictionary_value(dictionary_temp, variable_dictionary_keys):
try:
if(len(variable_dictionary_keys) == 0):
return str(dictionary_temp)
variable_dictionary_key = variable_dictionary_keys[0]
variable_dictionary_keys.remove(variable_dictionary_key)
return get_dictionary_value(dictionary_temp[variable_dictionary_key] , variable_dictionary_keys)
except Exception as variable_exception:
logging.error(variable_exception)
return ''
You can make use of the eval function in python.
def nested_parse(nest, map_list):
nestq = "nest['" + "']['".join(map_list) + "']"
return eval(nestq, {'__builtins__':None}, {'nest':nest})
Explanation
For your example query: maplist = ["b", "v", "y"]
nestq will be "nest['b']['v']['y']" where nest is the nested dictionary.
The eval builtin function executes the given string. However, it is important to be careful about possible vulnerabilities that arise from use of eval function. Discussion can be found here:
https://nedbatchelder.com/blog/201206/eval_really_is_dangerous.html
https://www.journaldev.com/22504/python-eval-function
In the nested_parse() function, I have made sure that no __builtins__ globals are available and only local variable that is available is the nest dictionary.
I have a complex dictionary structure which I would like to access via a list of keys to address the correct item.
dataDict = {
"a":{
"r": 1,
"s": 2,
"t": 3
},
"b":{
"u": 1,
"v": {
"x": 1,
"y": 2,
"z": 3
},
"w": 3
}
}
maplist = ["a", "r"]
or
maplist = ["b", "v", "y"]
I have made the following code which works but I'm sure there is a better and more efficient way to do this if anyone has an idea.
# Get a given data from a dictionary with position provided as a list
def getFromDict(dataDict, mapList):
for k in mapList: dataDict = dataDict[k]
return dataDict
# Set a given data in a dictionary with position provided as a list
def setInDict(dataDict, mapList, value):
for k in mapList[:-1]: dataDict = dataDict[k]
dataDict[mapList[-1]] = value
Use reduce() to traverse the dictionary:
from functools import reduce # forward compatibility for Python 3
import operator
def getFromDict(dataDict, mapList):
return reduce(operator.getitem, mapList, dataDict)
and reuse getFromDict to find the location to store the value for setInDict():
def setInDict(dataDict, mapList, value):
getFromDict(dataDict, mapList[:-1])[mapList[-1]] = value
All but the last element in mapList is needed to find the 'parent' dictionary to add the value to, then use the last element to set the value to the right key.
Demo:
>>> getFromDict(dataDict, ["a", "r"])
1
>>> getFromDict(dataDict, ["b", "v", "y"])
2
>>> setInDict(dataDict, ["b", "v", "w"], 4)
>>> import pprint
>>> pprint.pprint(dataDict)
{'a': {'r': 1, 's': 2, 't': 3},
'b': {'u': 1, 'v': {'w': 4, 'x': 1, 'y': 2, 'z': 3}, 'w': 3}}
Note that the Python PEP8 style guide prescribes snake_case names for functions. The above works equally well for lists or a mix of dictionaries and lists, so the names should really be get_by_path() and set_by_path():
from functools import reduce # forward compatibility for Python 3
import operator
def get_by_path(root, items):
"""Access a nested object in root by item sequence."""
return reduce(operator.getitem, items, root)
def set_by_path(root, items, value):
"""Set a value in a nested object in root by item sequence."""
get_by_path(root, items[:-1])[items[-1]] = value
And for completion's sake, a function to delete a key:
def del_by_path(root, items):
"""Delete a key-value in a nested object in root by item sequence."""
del get_by_path(root, items[:-1])[items[-1]]
It seems more pythonic to use a for loop.
See the quote from What’s New In Python 3.0.
Removed reduce(). Use functools.reduce() if you really need it; however, 99 percent of the time an explicit for loop is more readable.
def nested_get(dic, keys):
for key in keys:
dic = dic[key]
return dic
def nested_set(dic, keys, value):
for key in keys[:-1]:
dic = dic.setdefault(key, {})
dic[keys[-1]] = value
def nested_del(dic, keys):
for key in keys[:-1]:
dic = dic[key]
del dic[keys[-1]]
Note that the accepted solution doesn't set non-existing nested keys (it raises KeyError). Using the approach above will create non-existing nodes instead.
The code works in both Python 2 and 3.
Using reduce is clever, but the OP's set method may have issues if the parent keys do not pre-exist in the nested dictionary. Since this is the first SO post I saw for this subject in my google search, I would like to make it slightly better.
The set method in ( Setting a value in a nested python dictionary given a list of indices and value ) seems more robust to missing parental keys. To copy it over:
def nested_set(dic, keys, value):
for key in keys[:-1]:
dic = dic.setdefault(key, {})
dic[keys[-1]] = value
Also, it can be convenient to have a method that traverses the key tree and get all the absolute key paths, for which I have created:
def keysInDict(dataDict, parent=[]):
if not isinstance(dataDict, dict):
return [tuple(parent)]
else:
return reduce(list.__add__,
[keysInDict(v,parent+[k]) for k,v in dataDict.items()], [])
One use of it is to convert the nested tree to a pandas DataFrame, using the following code (assuming that all leafs in the nested dictionary have the same depth).
def dict_to_df(dataDict):
ret = []
for k in keysInDict(dataDict):
v = np.array( getFromDict(dataDict, k), )
v = pd.DataFrame(v)
v.columns = pd.MultiIndex.from_product(list(k) + [v.columns])
ret.append(v)
return reduce(pd.DataFrame.join, ret)
This library may be helpful: https://github.com/akesterson/dpath-python
A python library for accessing and searching dictionaries via
/slashed/paths ala xpath
Basically it lets you glob over a dictionary as if it were a
filesystem.
How about using recursive functions?
To get a value:
def getFromDict(dataDict, maplist):
first, rest = maplist[0], maplist[1:]
if rest:
# if `rest` is not empty, run the function recursively
return getFromDict(dataDict[first], rest)
else:
return dataDict[first]
And to set a value:
def setInDict(dataDict, maplist, value):
first, rest = maplist[0], maplist[1:]
if rest:
try:
if not isinstance(dataDict[first], dict):
# if the key is not a dict, then make it a dict
dataDict[first] = {}
except KeyError:
# if key doesn't exist, create one
dataDict[first] = {}
setInDict(dataDict[first], rest, value)
else:
dataDict[first] = value
Solved this with recursion:
def get(d,l):
if len(l)==1: return d[l[0]]
return get(d[l[0]],l[1:])
Using your example:
dataDict = {
"a":{
"r": 1,
"s": 2,
"t": 3
},
"b":{
"u": 1,
"v": {
"x": 1,
"y": 2,
"z": 3
},
"w": 3
}
}
maplist1 = ["a", "r"]
maplist2 = ["b", "v", "y"]
print(get(dataDict, maplist1)) # 1
print(get(dataDict, maplist2)) # 2
Instead of taking a performance hit each time you want to look up a value, how about you flatten the dictionary once then simply look up the key like b:v:y
def flatten(mydict,sep = ':'):
new_dict = {}
for key,value in mydict.items():
if isinstance(value,dict):
_dict = {sep.join([key, _key]):_value for _key, _value in flatten(value).items()}
new_dict.update(_dict)
else:
new_dict[key]=value
return new_dict
dataDict = {
"a":{
"r": 1,
"s": 2,
"t": 3
},
"b":{
"u": 1,
"v": {
"x": 1,
"y": 2,
"z": 3
},
"w": 3
}
}
flat_dict = flatten(dataDict)
print flat_dict
{'b:w': 3, 'b:u': 1, 'b:v:y': 2, 'b:v:x': 1, 'b:v:z': 3, 'a:r': 1, 'a:s': 2, 'a:t': 3}
This way you can simply look up items using flat_dict['b:v:y'] which will give you 1.
And instead of traversing the dictionary on each lookup, you may be able to speed this up by flattening the dictionary and saving the output so that a lookup from cold start would mean loading up the flattened dictionary and simply performing a key/value lookup with no traversal.
Check out NestedDict from the ndicts package (I am the author), it does exactly what you ask for.
from ndicts import NestedDict
data_dict = {
"a":{
"r": 1,
"s": 2,
"t": 3
},
"b":{
"u": 1,
"v": {
"x": 1,
"y": 2,
"z": 3
},
"w": 3
}
}
nd = NestedDict(data_dict)
You can now access keys using comma separated values.
>>> nd["a", "r"]
1
>>> nd["b", "v"]
{"x": 1, "y": 2, "z": 3}
Pure Python style, without any import:
def nested_set(element, value, *keys):
if type(element) is not dict:
raise AttributeError('nested_set() expects dict as first argument.')
if len(keys) < 2:
raise AttributeError('nested_set() expects at least three arguments, not enough given.')
_keys = keys[:-1]
_element = element
for key in _keys:
_element = _element[key]
_element[keys[-1]] = value
example = {"foo": { "bar": { "baz": "ok" } } }
keys = ['foo', 'bar']
nested_set(example, "yay", *keys)
print(example)
Output
{'foo': {'bar': 'yay'}}
An alternative way if you don't want to raise errors if one of the keys is absent (so that your main code can run without interruption):
def get_value(self,your_dict,*keys):
curr_dict_ = your_dict
for k in keys:
v = curr_dict.get(k,None)
if v is None:
break
if isinstance(v,dict):
curr_dict = v
return v
In this case, if any of the input keys is not present, None is returned, which can be used as a check in your main code to perform an alternative task.
It's satisfying to see these answers for having two static methods for setting & getting nested attributes. These solutions are way better than using nested trees https://gist.github.com/hrldcpr/2012250
Here's my implementation.
Usage:
To set nested attribute call sattr(my_dict, 1, 2, 3, 5) is equal to my_dict[1][2][3][4]=5
To get a nested attribute call gattr(my_dict, 1, 2)
def gattr(d, *attrs):
"""
This method receives a dict and list of attributes to return the innermost value of the give dict
"""
try:
for at in attrs:
d = d[at]
return d
except(KeyError, TypeError):
return None
def sattr(d, *attrs):
"""
Adds "val" to dict in the hierarchy mentioned via *attrs
For ex:
sattr(animals, "cat", "leg","fingers", 4) is equivalent to animals["cat"]["leg"]["fingers"]=4
This method creates necessary objects until it reaches the final depth
This behaviour is also known as autovivification and plenty of implementation are around
This implementation addresses the corner case of replacing existing primitives
https://gist.github.com/hrldcpr/2012250#gistcomment-1779319
"""
for attr in attrs[:-2]:
if type(d.get(attr)) is not dict:
d[attr] = {}
d = d[attr]
d[attrs[-2]] = attrs[-1]
You can use pydash:
import pydash as _
_.get(dataDict, ["b", "v", "y"], default='Default')
https://pydash.readthedocs.io/en/latest/api.html
If you also want the ability to work with arbitrary json including nested lists and dicts, and nicely handle invalid lookup paths, here's my solution:
from functools import reduce
def get_furthest(s, path):
'''
Gets the furthest value along a given key path in a subscriptable structure.
subscriptable, list -> any
:param s: the subscriptable structure to examine
:param path: the lookup path to follow
:return: a tuple of the value at the furthest valid key, and whether the full path is valid
'''
def step_key(acc, key):
s = acc[0]
if isinstance(s, str):
return (s, False)
try:
return (s[key], acc[1])
except LookupError:
return (s, False)
return reduce(step_key, path, (s, True))
def get_val(s, path):
val, successful = get_furthest(s, path)
if successful:
return val
else:
raise LookupError('Invalid lookup path: {}'.format(path))
def set_val(s, path, value):
get_val(s, path[:-1])[path[-1]] = value
How about check and then set dict element without processing all indexes twice?
Solution:
def nested_yield(nested, keys_list):
"""
Get current nested data by send(None) method. Allows change it to Value by calling send(Value) next time
:param nested: list or dict of lists or dicts
:param keys_list: list of indexes/keys
"""
if not len(keys_list): # assign to 1st level list
if isinstance(nested, list):
while True:
nested[:] = yield nested
else:
raise IndexError('Only lists can take element without key')
last_key = keys_list.pop()
for key in keys_list:
nested = nested[key]
while True:
try:
nested[last_key] = yield nested[last_key]
except IndexError as e:
print('no index {} in {}'.format(last_key, nested))
yield None
Example workflow:
ny = nested_yield(nested_dict, nested_address)
data_element = ny.send(None)
if data_element:
# process element
...
else:
# extend/update nested data
ny.send(new_data_element)
...
ny.close()
Test
>>> cfg= {'Options': [[1,[0]],[2,[4,[8,16]]],[3,[9]]]}
ny = nested_yield(cfg, ['Options',1,1,1])
ny.send(None)
[8, 16]
>>> ny.send('Hello!')
'Hello!'
>>> cfg
{'Options': [[1, [0]], [2, [4, 'Hello!']], [3, [9]]]}
>>> ny.close()
Very late to the party, but posting in case this may help someone in the future. For my use case, the following function worked the best. Works to pull any data type out of dictionary
dict is the dictionary containing our value
list is a list of "steps" towards our value
def getnestedvalue(dict, list):
length = len(list)
try:
for depth, key in enumerate(list):
if depth == length - 1:
output = dict[key]
return output
dict = dict[key]
except (KeyError, TypeError):
return None
return None
I'd rather use simple recursion function:
def get_value_by_path(data, maplist):
if not maplist:
return data
for key in maplist:
if key in data:
return get_value_by_path(data[key], maplist[1:])
a method for concatenating strings:
def get_sub_object_from_path(dict_name, map_list):
for i in map_list:
_string = "['%s']" % i
dict_name += _string
value = eval(dict_name)
return value
#Sample:
_dict = {'new': 'person', 'time': {'for': 'one'}}
map_list = ['time', 'for']
print get_sub_object_from_path("_dict",map_list)
#Output:
#one
Extending #DomTomCat and others' approach, these functional (ie, return modified data via deepcopy without affecting the input) setter and mapper works for nested dict and list.
setter:
def set_at_path(data0, keys, value):
data = deepcopy(data0)
if len(keys)>1:
if isinstance(data,dict):
return {k:(set_by_path(v,keys[1:],value) if k==keys[0] else v) for k,v in data.items()}
if isinstance(data,list):
return [set_by_path(x[1],keys[1:],value) if x[0]==keys[0] else x[1] for x in enumerate(data)]
else:
data[keys[-1]]=value
return data
mapper:
def map_at_path(data0, keys, f):
data = deepcopy(data0)
if len(keys)>1:
if isinstance(data,dict):
return {k:(map_at_path(v,keys[1:],f) if k==keys[0] else v) for k,v in data.items()}
if isinstance(data,list):
return [map_at_path(x[1],keys[1:],f) if x[0]==keys[0] else x[1] for x in enumerate(data)]
else:
data[keys[-1]]=f(data[keys[-1]])
return data
I use this
def get_dictionary_value(dictionary_temp, variable_dictionary_keys):
try:
if(len(variable_dictionary_keys) == 0):
return str(dictionary_temp)
variable_dictionary_key = variable_dictionary_keys[0]
variable_dictionary_keys.remove(variable_dictionary_key)
return get_dictionary_value(dictionary_temp[variable_dictionary_key] , variable_dictionary_keys)
except Exception as variable_exception:
logging.error(variable_exception)
return ''
You can make use of the eval function in python.
def nested_parse(nest, map_list):
nestq = "nest['" + "']['".join(map_list) + "']"
return eval(nestq, {'__builtins__':None}, {'nest':nest})
Explanation
For your example query: maplist = ["b", "v", "y"]
nestq will be "nest['b']['v']['y']" where nest is the nested dictionary.
The eval builtin function executes the given string. However, it is important to be careful about possible vulnerabilities that arise from use of eval function. Discussion can be found here:
https://nedbatchelder.com/blog/201206/eval_really_is_dangerous.html
https://www.journaldev.com/22504/python-eval-function
In the nested_parse() function, I have made sure that no __builtins__ globals are available and only local variable that is available is the nest dictionary.
I have a nested dictionary that I'm trying to replace a given value of a key using a path made up of keys to that particular value.
Basic Example:
path_to_value = ["fruit", "apple", "colour"]
replacement_value = "green"
dictionary = {"fruit": {"apple": {"colour": "red"}, "banana": {"colour": "yellow", "size": "big"}}}
I've found a function here on Stackoverflow, but it recursively replaces all values in the dict which I don't want.
def change_key(d, required_key, new_value):
for k, v in d.items():
if isinstance(v, dict):
change_key(v, required_key, new_value)
if k == required_key:
d[k] = new_value
Any help would be appreciated.
I think something like this should work: narrow in using all but the last keys to get the dictionary you want to modify, then modify it using the last key.
def change_key(d, path_to_value, new_value):
for key in path_to_value[:-1]:
d = d[key]
d[path_to_value[-1]] = new_value
you cay try this:
def replace_val(dict, val_path, new_val):
if len(val_path) == 1:
dict[val_path[0]] = new_val
return dict
else:
dict[val_path[0]] = replace_val(dict[val_path[0]], val_path[1:], new_val)
return dict
I have a complex dictionary structure which I would like to access via a list of keys to address the correct item.
dataDict = {
"a":{
"r": 1,
"s": 2,
"t": 3
},
"b":{
"u": 1,
"v": {
"x": 1,
"y": 2,
"z": 3
},
"w": 3
}
}
maplist = ["a", "r"]
or
maplist = ["b", "v", "y"]
I have made the following code which works but I'm sure there is a better and more efficient way to do this if anyone has an idea.
# Get a given data from a dictionary with position provided as a list
def getFromDict(dataDict, mapList):
for k in mapList: dataDict = dataDict[k]
return dataDict
# Set a given data in a dictionary with position provided as a list
def setInDict(dataDict, mapList, value):
for k in mapList[:-1]: dataDict = dataDict[k]
dataDict[mapList[-1]] = value
Use reduce() to traverse the dictionary:
from functools import reduce # forward compatibility for Python 3
import operator
def getFromDict(dataDict, mapList):
return reduce(operator.getitem, mapList, dataDict)
and reuse getFromDict to find the location to store the value for setInDict():
def setInDict(dataDict, mapList, value):
getFromDict(dataDict, mapList[:-1])[mapList[-1]] = value
All but the last element in mapList is needed to find the 'parent' dictionary to add the value to, then use the last element to set the value to the right key.
Demo:
>>> getFromDict(dataDict, ["a", "r"])
1
>>> getFromDict(dataDict, ["b", "v", "y"])
2
>>> setInDict(dataDict, ["b", "v", "w"], 4)
>>> import pprint
>>> pprint.pprint(dataDict)
{'a': {'r': 1, 's': 2, 't': 3},
'b': {'u': 1, 'v': {'w': 4, 'x': 1, 'y': 2, 'z': 3}, 'w': 3}}
Note that the Python PEP8 style guide prescribes snake_case names for functions. The above works equally well for lists or a mix of dictionaries and lists, so the names should really be get_by_path() and set_by_path():
from functools import reduce # forward compatibility for Python 3
import operator
def get_by_path(root, items):
"""Access a nested object in root by item sequence."""
return reduce(operator.getitem, items, root)
def set_by_path(root, items, value):
"""Set a value in a nested object in root by item sequence."""
get_by_path(root, items[:-1])[items[-1]] = value
And for completion's sake, a function to delete a key:
def del_by_path(root, items):
"""Delete a key-value in a nested object in root by item sequence."""
del get_by_path(root, items[:-1])[items[-1]]
It seems more pythonic to use a for loop.
See the quote from What’s New In Python 3.0.
Removed reduce(). Use functools.reduce() if you really need it; however, 99 percent of the time an explicit for loop is more readable.
def nested_get(dic, keys):
for key in keys:
dic = dic[key]
return dic
def nested_set(dic, keys, value):
for key in keys[:-1]:
dic = dic.setdefault(key, {})
dic[keys[-1]] = value
def nested_del(dic, keys):
for key in keys[:-1]:
dic = dic[key]
del dic[keys[-1]]
Note that the accepted solution doesn't set non-existing nested keys (it raises KeyError). Using the approach above will create non-existing nodes instead.
The code works in both Python 2 and 3.
Using reduce is clever, but the OP's set method may have issues if the parent keys do not pre-exist in the nested dictionary. Since this is the first SO post I saw for this subject in my google search, I would like to make it slightly better.
The set method in ( Setting a value in a nested python dictionary given a list of indices and value ) seems more robust to missing parental keys. To copy it over:
def nested_set(dic, keys, value):
for key in keys[:-1]:
dic = dic.setdefault(key, {})
dic[keys[-1]] = value
Also, it can be convenient to have a method that traverses the key tree and get all the absolute key paths, for which I have created:
def keysInDict(dataDict, parent=[]):
if not isinstance(dataDict, dict):
return [tuple(parent)]
else:
return reduce(list.__add__,
[keysInDict(v,parent+[k]) for k,v in dataDict.items()], [])
One use of it is to convert the nested tree to a pandas DataFrame, using the following code (assuming that all leafs in the nested dictionary have the same depth).
def dict_to_df(dataDict):
ret = []
for k in keysInDict(dataDict):
v = np.array( getFromDict(dataDict, k), )
v = pd.DataFrame(v)
v.columns = pd.MultiIndex.from_product(list(k) + [v.columns])
ret.append(v)
return reduce(pd.DataFrame.join, ret)
This library may be helpful: https://github.com/akesterson/dpath-python
A python library for accessing and searching dictionaries via
/slashed/paths ala xpath
Basically it lets you glob over a dictionary as if it were a
filesystem.
How about using recursive functions?
To get a value:
def getFromDict(dataDict, maplist):
first, rest = maplist[0], maplist[1:]
if rest:
# if `rest` is not empty, run the function recursively
return getFromDict(dataDict[first], rest)
else:
return dataDict[first]
And to set a value:
def setInDict(dataDict, maplist, value):
first, rest = maplist[0], maplist[1:]
if rest:
try:
if not isinstance(dataDict[first], dict):
# if the key is not a dict, then make it a dict
dataDict[first] = {}
except KeyError:
# if key doesn't exist, create one
dataDict[first] = {}
setInDict(dataDict[first], rest, value)
else:
dataDict[first] = value
Solved this with recursion:
def get(d,l):
if len(l)==1: return d[l[0]]
return get(d[l[0]],l[1:])
Using your example:
dataDict = {
"a":{
"r": 1,
"s": 2,
"t": 3
},
"b":{
"u": 1,
"v": {
"x": 1,
"y": 2,
"z": 3
},
"w": 3
}
}
maplist1 = ["a", "r"]
maplist2 = ["b", "v", "y"]
print(get(dataDict, maplist1)) # 1
print(get(dataDict, maplist2)) # 2
Instead of taking a performance hit each time you want to look up a value, how about you flatten the dictionary once then simply look up the key like b:v:y
def flatten(mydict,sep = ':'):
new_dict = {}
for key,value in mydict.items():
if isinstance(value,dict):
_dict = {sep.join([key, _key]):_value for _key, _value in flatten(value).items()}
new_dict.update(_dict)
else:
new_dict[key]=value
return new_dict
dataDict = {
"a":{
"r": 1,
"s": 2,
"t": 3
},
"b":{
"u": 1,
"v": {
"x": 1,
"y": 2,
"z": 3
},
"w": 3
}
}
flat_dict = flatten(dataDict)
print flat_dict
{'b:w': 3, 'b:u': 1, 'b:v:y': 2, 'b:v:x': 1, 'b:v:z': 3, 'a:r': 1, 'a:s': 2, 'a:t': 3}
This way you can simply look up items using flat_dict['b:v:y'] which will give you 1.
And instead of traversing the dictionary on each lookup, you may be able to speed this up by flattening the dictionary and saving the output so that a lookup from cold start would mean loading up the flattened dictionary and simply performing a key/value lookup with no traversal.
Check out NestedDict from the ndicts package (I am the author), it does exactly what you ask for.
from ndicts import NestedDict
data_dict = {
"a":{
"r": 1,
"s": 2,
"t": 3
},
"b":{
"u": 1,
"v": {
"x": 1,
"y": 2,
"z": 3
},
"w": 3
}
}
nd = NestedDict(data_dict)
You can now access keys using comma separated values.
>>> nd["a", "r"]
1
>>> nd["b", "v"]
{"x": 1, "y": 2, "z": 3}
Pure Python style, without any import:
def nested_set(element, value, *keys):
if type(element) is not dict:
raise AttributeError('nested_set() expects dict as first argument.')
if len(keys) < 2:
raise AttributeError('nested_set() expects at least three arguments, not enough given.')
_keys = keys[:-1]
_element = element
for key in _keys:
_element = _element[key]
_element[keys[-1]] = value
example = {"foo": { "bar": { "baz": "ok" } } }
keys = ['foo', 'bar']
nested_set(example, "yay", *keys)
print(example)
Output
{'foo': {'bar': 'yay'}}
An alternative way if you don't want to raise errors if one of the keys is absent (so that your main code can run without interruption):
def get_value(self,your_dict,*keys):
curr_dict_ = your_dict
for k in keys:
v = curr_dict.get(k,None)
if v is None:
break
if isinstance(v,dict):
curr_dict = v
return v
In this case, if any of the input keys is not present, None is returned, which can be used as a check in your main code to perform an alternative task.
It's satisfying to see these answers for having two static methods for setting & getting nested attributes. These solutions are way better than using nested trees https://gist.github.com/hrldcpr/2012250
Here's my implementation.
Usage:
To set nested attribute call sattr(my_dict, 1, 2, 3, 5) is equal to my_dict[1][2][3][4]=5
To get a nested attribute call gattr(my_dict, 1, 2)
def gattr(d, *attrs):
"""
This method receives a dict and list of attributes to return the innermost value of the give dict
"""
try:
for at in attrs:
d = d[at]
return d
except(KeyError, TypeError):
return None
def sattr(d, *attrs):
"""
Adds "val" to dict in the hierarchy mentioned via *attrs
For ex:
sattr(animals, "cat", "leg","fingers", 4) is equivalent to animals["cat"]["leg"]["fingers"]=4
This method creates necessary objects until it reaches the final depth
This behaviour is also known as autovivification and plenty of implementation are around
This implementation addresses the corner case of replacing existing primitives
https://gist.github.com/hrldcpr/2012250#gistcomment-1779319
"""
for attr in attrs[:-2]:
if type(d.get(attr)) is not dict:
d[attr] = {}
d = d[attr]
d[attrs[-2]] = attrs[-1]
You can use pydash:
import pydash as _
_.get(dataDict, ["b", "v", "y"], default='Default')
https://pydash.readthedocs.io/en/latest/api.html
If you also want the ability to work with arbitrary json including nested lists and dicts, and nicely handle invalid lookup paths, here's my solution:
from functools import reduce
def get_furthest(s, path):
'''
Gets the furthest value along a given key path in a subscriptable structure.
subscriptable, list -> any
:param s: the subscriptable structure to examine
:param path: the lookup path to follow
:return: a tuple of the value at the furthest valid key, and whether the full path is valid
'''
def step_key(acc, key):
s = acc[0]
if isinstance(s, str):
return (s, False)
try:
return (s[key], acc[1])
except LookupError:
return (s, False)
return reduce(step_key, path, (s, True))
def get_val(s, path):
val, successful = get_furthest(s, path)
if successful:
return val
else:
raise LookupError('Invalid lookup path: {}'.format(path))
def set_val(s, path, value):
get_val(s, path[:-1])[path[-1]] = value
How about check and then set dict element without processing all indexes twice?
Solution:
def nested_yield(nested, keys_list):
"""
Get current nested data by send(None) method. Allows change it to Value by calling send(Value) next time
:param nested: list or dict of lists or dicts
:param keys_list: list of indexes/keys
"""
if not len(keys_list): # assign to 1st level list
if isinstance(nested, list):
while True:
nested[:] = yield nested
else:
raise IndexError('Only lists can take element without key')
last_key = keys_list.pop()
for key in keys_list:
nested = nested[key]
while True:
try:
nested[last_key] = yield nested[last_key]
except IndexError as e:
print('no index {} in {}'.format(last_key, nested))
yield None
Example workflow:
ny = nested_yield(nested_dict, nested_address)
data_element = ny.send(None)
if data_element:
# process element
...
else:
# extend/update nested data
ny.send(new_data_element)
...
ny.close()
Test
>>> cfg= {'Options': [[1,[0]],[2,[4,[8,16]]],[3,[9]]]}
ny = nested_yield(cfg, ['Options',1,1,1])
ny.send(None)
[8, 16]
>>> ny.send('Hello!')
'Hello!'
>>> cfg
{'Options': [[1, [0]], [2, [4, 'Hello!']], [3, [9]]]}
>>> ny.close()
Very late to the party, but posting in case this may help someone in the future. For my use case, the following function worked the best. Works to pull any data type out of dictionary
dict is the dictionary containing our value
list is a list of "steps" towards our value
def getnestedvalue(dict, list):
length = len(list)
try:
for depth, key in enumerate(list):
if depth == length - 1:
output = dict[key]
return output
dict = dict[key]
except (KeyError, TypeError):
return None
return None
I'd rather use simple recursion function:
def get_value_by_path(data, maplist):
if not maplist:
return data
for key in maplist:
if key in data:
return get_value_by_path(data[key], maplist[1:])
a method for concatenating strings:
def get_sub_object_from_path(dict_name, map_list):
for i in map_list:
_string = "['%s']" % i
dict_name += _string
value = eval(dict_name)
return value
#Sample:
_dict = {'new': 'person', 'time': {'for': 'one'}}
map_list = ['time', 'for']
print get_sub_object_from_path("_dict",map_list)
#Output:
#one
Extending #DomTomCat and others' approach, these functional (ie, return modified data via deepcopy without affecting the input) setter and mapper works for nested dict and list.
setter:
def set_at_path(data0, keys, value):
data = deepcopy(data0)
if len(keys)>1:
if isinstance(data,dict):
return {k:(set_by_path(v,keys[1:],value) if k==keys[0] else v) for k,v in data.items()}
if isinstance(data,list):
return [set_by_path(x[1],keys[1:],value) if x[0]==keys[0] else x[1] for x in enumerate(data)]
else:
data[keys[-1]]=value
return data
mapper:
def map_at_path(data0, keys, f):
data = deepcopy(data0)
if len(keys)>1:
if isinstance(data,dict):
return {k:(map_at_path(v,keys[1:],f) if k==keys[0] else v) for k,v in data.items()}
if isinstance(data,list):
return [map_at_path(x[1],keys[1:],f) if x[0]==keys[0] else x[1] for x in enumerate(data)]
else:
data[keys[-1]]=f(data[keys[-1]])
return data
I use this
def get_dictionary_value(dictionary_temp, variable_dictionary_keys):
try:
if(len(variable_dictionary_keys) == 0):
return str(dictionary_temp)
variable_dictionary_key = variable_dictionary_keys[0]
variable_dictionary_keys.remove(variable_dictionary_key)
return get_dictionary_value(dictionary_temp[variable_dictionary_key] , variable_dictionary_keys)
except Exception as variable_exception:
logging.error(variable_exception)
return ''
You can make use of the eval function in python.
def nested_parse(nest, map_list):
nestq = "nest['" + "']['".join(map_list) + "']"
return eval(nestq, {'__builtins__':None}, {'nest':nest})
Explanation
For your example query: maplist = ["b", "v", "y"]
nestq will be "nest['b']['v']['y']" where nest is the nested dictionary.
The eval builtin function executes the given string. However, it is important to be careful about possible vulnerabilities that arise from use of eval function. Discussion can be found here:
https://nedbatchelder.com/blog/201206/eval_really_is_dangerous.html
https://www.journaldev.com/22504/python-eval-function
In the nested_parse() function, I have made sure that no __builtins__ globals are available and only local variable that is available is the nest dictionary.
There is a JSON like this:
{
"P1": "ss",
"Id": 1234,
"P2": {
"P1": "cccc"
},
"P3": [
{
"P1": "aaa"
}
]
}
How can I find all P1's value without it iterating all JSON?
P.S.: P1 can be anywhere in the JSON.
If no method can do this, can you tell me how to iterate through the JSON?
As I said in my other answer, I don't think there is a way of finding all values associated with the "P1" key without iterating over the whole structure. However I've come up with even better way to do that which came to me while looking at #Mike Brennan's answer to another JSON-related question How to get string objects instead of Unicode from JSON?
The basic idea is to use the object_hook parameter that json.loads() accepts just to watch what is being decoded and check for the sought-after value.
Note: This will only work if the representation is of a JSON object (i.e. something enclosed in curly braces {}), as in your sample.
from __future__ import print_function
import json
def find_values(id, json_repr):
results = []
def _decode_dict(a_dict):
try:
results.append(a_dict[id])
except KeyError:
pass
return a_dict
json.loads(json_repr, object_hook=_decode_dict) # Return value ignored.
return results
json_repr = '{"P1": "ss", "Id": 1234, "P2": {"P1": "cccc"}, "P3": [{"P1": "aaa"}]}'
print(find_values('P1', json_repr))
(Python 3) output:
['cccc', 'aaa', 'ss']
I had the same issue just the other day. I wound up just searching through the entire object and accounted for both lists and dicts. The following snippets allows you to search for the first occurrence of a multiple keys.
import json
def deep_search(needles, haystack):
found = {}
if type(needles) != type([]):
needles = [needles]
if type(haystack) == type(dict()):
for needle in needles:
if needle in haystack.keys():
found[needle] = haystack[needle]
elif len(haystack.keys()) > 0:
for key in haystack.keys():
result = deep_search(needle, haystack[key])
if result:
for k, v in result.items():
found[k] = v
elif type(haystack) == type([]):
for node in haystack:
result = deep_search(needles, node)
if result:
for k, v in result.items():
found[k] = v
return found
deep_search(["P1", "P3"], json.loads(json_string))
It returns a dict with the keys being the keys searched for. Haystack is expected to be a Python object already, so you have to do json.loads before passing it to deep_search.
Any comments for optimization are welcomed!
My approach to this problem would be different.
As JSON doesn't allow depth first search, so convert the json to a Python Object, feed it to an XML decoder and then extract the Node you are intending to search
from xml.dom.minidom import parseString
import json
def bar(somejson, key):
def val(node):
# Searches for the next Element Node containing Value
e = node.nextSibling
while e and e.nodeType != e.ELEMENT_NODE:
e = e.nextSibling
return (e.getElementsByTagName('string')[0].firstChild.nodeValue if e
else None)
# parse the JSON as XML
foo_dom = parseString(xmlrpclib.dumps((json.loads(somejson),)))
# and then search all the name tags which are P1's
# and use the val user function to get the value
return [val(node) for node in foo_dom.getElementsByTagName('name')
if node.firstChild.nodeValue in key]
bar(foo, 'P1')
[u'cccc', u'aaa', u'ss']
bar(foo, ('P1','P2'))
[u'cccc', u'cccc', u'aaa', u'ss']
Using json to convert the json to Python objects and then going through recursively works best. This example does include going through lists.
import json
def get_all(myjson, key):
if type(myjson) == str:
myjson = json.loads(myjson)
if type(myjson) is dict:
for jsonkey in myjson:
if type(myjson[jsonkey]) in (list, dict):
get_all(myjson[jsonkey], key)
elif jsonkey == key:
print myjson[jsonkey]
elif type(myjson) is list:
for item in myjson:
if type(item) in (list, dict):
get_all(item, key)
Converting the JSON to Python and recursively searching is by far the easiest:
def findall(v, k):
if type(v) == type({}):
for k1 in v:
if k1 == k:
print v[k1]
findall(v[k1], k)
findall(json.loads(a), 'P1')
(where a is the string)
The example code ignores arrays. Adding that is left as an exercise.
Bearing in mind that json is simply a string, using regular expressions with look-ahead and look-behind can accomplish this task very quickly.
Typically, the json would have been extracted from a request to external api, so code to show how that would work has been included but commented out.
import re
#import requests
#import json
#r1 = requests.get( ... url to some api ...)
#JSON = str(json.loads(r1.text))
JSON = """
{
"P1": "ss",
"Id": 1234,
"P2": {
"P1": "cccc"
},
"P3": [
{
"P1": "aaa"
}
]
}
"""
rex1 = re.compile('(?<=\"P1\": \")[a-zA-Z_\- ]+(?=\")')
rex2 = rex1.findall(JSON)
print(rex2)
#['ss', 'cccc', 'aaa']
I don't think there's any way of finding all values associated with P1 without iterating over the whole structure. Here's a recursive way to do it that first deserializes the JSON object into an equivalent Python object. To simplify things most of the work is done via a recursive private nested function.
import json
try:
STRING_TYPE = basestring
except NameError:
STRING_TYPE = str # Python 3
def find_values(id, obj):
results = []
def _find_values(id, obj):
try:
for key, value in obj.items(): # dict?
if key == id:
results.append(value)
elif not isinstance(value, STRING_TYPE):
_find_values(id, value)
except AttributeError:
pass
try:
for item in obj: # iterable?
if not isinstance(item, STRING_TYPE):
_find_values(id, item)
except TypeError:
pass
if not isinstance(obj, STRING_TYPE):
_find_values(id, obj)
return results
json_repr = '{"P1": "ss", "Id": 1234, "P2": {"P1": "cccc"}, "P3": [{"P1": "aaa"}]}'
obj = json.loads(json_repr)
print(find_values('P1', obj))
You could also use a generator to search the object after json.load().
Code example from my answer here: https://stackoverflow.com/a/39016088/5250939
def item_generator(json_input, lookup_key):
if isinstance(json_input, dict):
for k, v in json_input.iteritems():
if k == lookup_key:
yield v
else:
for child_val in item_generator(v, lookup_key):
yield child_val
elif isinstance(json_input, list):
for item in json_input:
for item_val in item_generator(item, lookup_key):
yield item_val
The question is old, but no answer answered 100%, so this was my solution:
what it does:
recursive algorithm;
list search;
object search;
returns all the results it finds in the tree;
returns the id of the parent in the key
suggestions:
study Depth First Search and Breadth First Search;
if your json is too big, recursion may be a problem, research stack algorithm
#staticmethod
def search_into_json_myversion(jsondata, searchkey, parentkeyname: str = None) -> list:
found = []
if type(jsondata) is list:
for element in jsondata:
val = Tools.search_into_json_myversion(element, searchkey, parentkeyname=parentkeyname)
if len(val) != 0:
found = found + val
elif type(jsondata) is dict:
if searchkey in jsondata.keys():
pathkey = parentkeyname + '->' + searchkey if parentkeyname != None else searchkey
found.append({pathkey: jsondata[searchkey]})
else:
for key, value in jsondata.items():
val = Tools.search_into_json_myversion(value, searchkey, parentkeyname=key)
if len(val) != 0:
found = found + val
return found