Update Nested Dictionary value using Recursion - python

I want to update Dict dictionary's value by inp dictionary's values using recursion or loop.
also the format should not change mean use recursion or loop on same format
please suggest a solution that is applicable to all level nesting not for this particular case
dict={
"name": "john",
"quality":
{
"type1":"honest",
"type2":"clever"
},
"marks":
[
{
"english":34
},
{
"math":90
}
]
}
inp = {
"name" : "jack",
"type1" : "dumb",
"type2" : "liar",
"english" : 28,
"math" : 89
}

Another solution, changing the dict in-place:
dct = {
"name": "john",
"quality": {"type1": "honest", "type2": "clever"},
"marks": [{"english": 34}, {"math": 90}],
}
inp = {
"name": "jack",
"type1": "dumb",
"type2": "liar",
"english": 28,
"math": 89,
}
def change(d, inp):
if isinstance(d, list):
for i in d:
change(i, inp)
elif isinstance(d, dict):
for k, v in d.items():
if not isinstance(v, (list, dict)):
d[k] = inp.get(k, v)
else:
change(v, inp)
change(dct, inp)
print(dct)
Prints:
{
"name": "jack",
"quality": {"type1": "dumb", "type2": "liar"},
"marks": [{"english": 28}, {"math": 89}],
}

First, make sure you change the name of the first Dictionary, say to myDict, since dict is reserved in Python as a Class Type.
The below function will do what you are looking for, in a recursive manner.
def recursive_swipe(input_var, updates):
if isinstance(input_var, list):
output_var = []
for entry in input_var:
output_var.append(recursive_swipe(entry, updates))
elif isinstance(input_var, dict):
output_var = {}
for label in input_var:
if isinstance(input_var[label], list) or isinstance(input_var[label], dict):
output_var[label] = recursive_swipe(input_var[label], updates)
else:
if label in updates:
output_var[label] = updates[label]
else:
output_var = input_var
return output_var
myDict = recursive_swipe(myDict, inp)
You may look for more optimal solutions if there are some limits to the formatting of the two dictionaries that were not stated in your question.

Related

python dict recursion returns empty list

I have this dictionary that I am trying to iterate through recursively. When I hit a matching node match I want to return that node which is a list.
Currently with my code I keep on getting an empty list. I have stepped through the code and I see my check condition being hit, but the recursion still returns an empty value. what am I doing wrong here? thanks
dictionary data:
{
"apiVersion": "v1",
"kind": "Deployment",
"metadata": {
"name": "cluster",
"namespace": "namespace",
},
"spec": {
"template": {
"metadata": {
"labels": {
"app": "flink",
"cluster": "repo_name-cluster",
"component": "jobmanager",
"track": "prod",
}
},
"spec": {
"containers": [
{
"name": "jobmanager",
"image": "IMAGE_TAG_",
"imagePullPolicy": "Always",
"args": ["jobmanager"],
"resources": {
"requests": {"cpu": "100.0", "memory": "100Gi"},
"limits": {"cpu": "100.0", "memory": "100Gi"},
},
"env": [
{
"name": "ADDRESS",
"value": "jobmanager-prod",
},
{"name": "HADOOP_USER_NAME", "value": "yarn"},
{"name": "JOB_MANAGER_MEMORY", "value": "1000m"},
{"name": "HADOOP_CONF_DIR", "value": "/etc/hadoop/conf"},
{
"name": "TRACK",
"valueFrom": {
"fieldRef": {
"fieldPath": "metadata.labels['track']"
}
},
},
],
}
]
},
},
},
}
code:
test = iterdict(data, "env")
print(test)
def iterdict(data, match):
output = []
if not isinstance(data, str):
for k, v in data.items():
print("key ", k)
if isinstance(v, dict):
iterdict(v, match)
elif isinstance(v, list):
if k.lower() == match.lower():
# print(v)
output += v
return output
else:
for i in v:
iterdict(i, match)
return output
expected return value:
[{'name': 'JOB_MANAGER_RPC_ADDRESS', 'value': 'repo_name-cluster-jobmanager-prod'}, {'name': 'HADOOP_USER_NAME', 'value': 'yarn'}, {'name': 'JOB_MANAGER_MEMORY', 'value': '1000m'}, {'name': 'HADOOP_CONF_DIR', 'value': '/etc/hadoop/conf'}, {'name': 'TRACK', 'valueFrom': {...}}]
When you recurse to iterdict, you're simply throwing away the return value. Thus, since every value in the top level of your dictionary is either a string or a dict, you will end up just returning an empty list.
You probably want to append the recursive outputs:
output += iterdict(v, match)
and
output += iterdict(i, match)
However, this is potentially inefficient as you will build a lot of intermediate lists. A better strategy might be to make your function a generator; the name iterdict would suggest this anyway. To do so, get rid of your output variable and the return statements, and use yield instead:
yield from iterdict(v, match)
yield from v
yield from iterdict(i, match)
and then, at the top level, you can just iterate over your results:
for value in iterdict(data, "env"):
...
or, if you really need a list, collect the generator output into a list:
test = list(iterdata(data, "env"))
This will likely be faster (no intermediate lists) and more Pythonic.
You are not updating the output to output list when you are running it recursively.
You can either append the output or use yield keyword to make use of generators in python. Return creates temporary lists which are memry intensive and impedes performance when you are running it recursively. Thats why use generators.
def iterdict(data, match):
if isinstance(data, str):
return []
for k, v in data.items():
if isinstance(v, dict):
yield from iterdict(v, match)
elif isinstance(v, list):
if k.lower() == match.lower():
yield from v
for i in v:
yield from iterdict(i, match)
test = list(iterdict(data, "env"))
print(test)

How to return specific key value pair from nested json without knowing location?

I want to get the value of a specific key in a nested json file, without knowing the exact location. So basically looking through all the keys (and nested keys) until it finds the match, and return a dictionary {match: "value"}
Nested json_data:
{
"$id": "1",
"DataChangedEntry": {
"$id": "2",
"PathProperty": "/",
"Metadata": null,
"PreviousValue": null,
"CurrentValue": {
"CosewicWsRefId": {
"Value": "QkNlrjq2HL9bhTQqU8-qH"
},
"Date": {
"Value": "2022-05-20T00:00:00Z"
},
"YearSentToMinister": {
"Value": "0001-01-01T00:00:00"
},
"DateSentToMinister": {
"Value": "0001-01-01T00:00:00"
},
"Order": null,
"Type": {
"Value": "REGULAR"
},
"ReportType": {
"Value": "NEW"
},
"Stage": {
"Value": "ASSESSED"
},
"State": {
"Value": "PUBLISHED"
},
"StatusAndCriteria": {
"Status": {
"Value": "EXTINCT"
},
"StatusComment": {
"EnglishText": null,
"FrenchText": null
},
"StatusChange": {
"Value": "NOT_INITIALIZED"
},
"StatusCriteria": {
"EnglishText": null,
"FrenchText": null
},
"ApplicabilityOfCriteria": {
"ApplicabilityCriteriaList": []
}
},
"Designation": null,
"Note": null,
"DomainEvents": [],
"Version": {
"Value": 1651756761385.1248
},
"Id": {
"Value": "3z3XlCkaXY9xinAbK5PrU"
},
"CreatedAt": {
"Value": 1651756761384
},
"ModifiedAt": {
"Value": 1651756785274
},
"CreatedBy": {
"Value": "G#a"
},
"ModifiedBy": {
"Value": "G#a"
}
}
},
"EventAction": "Create",
"EventDataChange": {
"$ref": "2"
},
"CorrelationId": "3z3XlCkaXY9xinAbK5PrU",
"EventId": "WGxlewsUAHayLHZ2LHvFk",
"EventTimeUtc": "2022-05-06T13:15:31.7463355Z",
"EventDataVersion": "1.0.0",
"EventType": "AssessmentCreatedInfrastructure"
}
Desired return is the value from json_data["DataChangedEntry"]["CurrentValue"]["Date"]["Value"]:
"2022-05-20T00:00:00Z"
So far I've tried a recursive function but it keeps return None:
match_dict = {}
def recursive_json(data,attr,m_dict):
for k,v in data.items():
if k == attr:
for k2,v2 in v.items():
m_dict = {attr, v2}
print('IF: ',m_dict)
return m_dict
elif isinstance(v,dict):
return recursive_json(v,attr,m_dict)
print('RETURN: ',recursive_json(json_data, "Date", match_dict))
Output:
RETURN: None
I tried removing the second return statement, and it now prints the value I want in the function, but still returns None:
match_dict = {}
def recursive_json(data,attr,m_dict):
for k,v in data.items():
if k == attr:
for k2,v2 in v.items():
m_dict = {attr, v2}
print('IF: ',m_dict)
return m_dict
elif isinstance(v,dict):
recursive_json(v,attr,m_dict)
print('RETURN: ',recursive_json(json_data, "Date", match_dict))
Output:
IF: {'Date', '2022-05-20T00:00:00Z'}
RETURN: None
I don't get why it keeps returning None. Is there a better way to return the value I want?
The underlying question is: how can we make multiple recursive calls in a loop, return the recursive result if any of them returns something useful, and fail otherwise?
If we blindly return inside the loop, then only one recursive call can be made. Whatever it returns, gets returned at this level. If it didn't find the useful result, we don't get a useful result.
If we blindly don't return inside the loop, then the values that were returned don't matter. Nothing in the current call makes use of them, so we will finish looping, make all the recursive calls, reach the end of the function... and thus implicitly return None.
The way around this, of course, is to check whether the recursive call returned something useful. If it did, we can return that; otherwise, we keep going. If we reach the end, then we signal that we couldn't find anything useful - that way, if we are being recursively called, the caller can do the right thing.
Assuming that None cannot be a "useful" value, we can naturally use that as the signal. We don't even have to return it explicitly at the end.
After fixing some other typos (we should not overwrite the global built-in dict name, and anyway we don't need to name the dict that we pass in at the start, and the parameter should be m_dict so that it's properly defined when we make the recursive call), we get:
def recursive_json(data, attr, m_dict):
for k,v in data.items():
if k == attr:
for k2,v2 in v.items():
m_dict = {attr, v2}
print('IF: ', m_dict)
return m_dict
elif isinstance(v,dict):
result = recursive_json(v, attr, m_dict)
if result:
return result
# call it:
recursive_json(json_data, "Date", {})
We can see that the debug trace is printed, and the value is also returned.
Let's improve this a bit:
First off, the inner for k2,v2 in v.items(): loop doesn't make any sense. Again, we can only return once per call, so this would skip any values in the dict after the first. We would be better served just returning v directly. Also, the m_dict parameter doesn't actually help implement the logic; we don't modify it between calls. It doesn't make sense to use a set for our return value, since it's fundamentally unordered; we care about the order here. Finally, we don't need the debug trace any more. That gives us:
def recursive_json(data, attr):
for k, v in data.items():
if k == attr:
return attr, v
elif isinstance(v,dict):
result = recursive_json(v, attr)
if result:
return result
To get fancier, we can separate the base case from the recursive case, and use more elegant tools for each. To check if any of the keys matches, we can simply check with the in operator. To recurse and return the first fruitful result, the built-in next is useful. We get:
def recursive_json(data, attr):
if not isinstance(data, dict):
# reached a leaf, can't search in here.
return None
if attr in data:
return k, data[k]
candidates = (recursive_json(v, attr) for v in data.values())
try:
# the first non-None candidate, if any.
return next(c for c in candidates if c is not None)
except StopIteration:
return None # all candidates were None.
It seems like you're trying to write something like this:
from json import loads
from typing import Any
test_json = """
{
"a": {
"b": {
"value": 1
}
},
"b": {
"value": 2
},
"c": {
"b": {
"value": 3
},
"c": {
"value": 4
}
},
"d": {}
}
"""
json_data = loads(test_json)
def find_value(data: dict, attr: str, depth_first: bool=True) -> (bool, Any):
# assumes data is a dict, with 'value' attributes for the attr to be found
# returns [whether value was found]: bool, [actual value]: Any
for k, v in data.items():
if k == attr and 'value' in v:
return True, v['value']
elif depth_first and isinstance(v, dict):
if (t := find_value(v, attr, depth_first))[0]:
return t
if not depth_first:
for _, v in data.items():
if isinstance(v, dict) and (t := find_value(v, attr, depth_first))[0]:
return t
return False, None
# returns True, 1 - first 'b' with a 'value', depth-first
print(find_value(json_data, 'b'))
# returns True, 2 - first 'b' with a 'value', breadth-first
print(find_value(json_data, 'b', False))
# returns True, 4 - first 'c' with a 'value' - the 'c' at the root level has no 'value'
print(find_value(json_data, 'c'))
# returns False, None - no 'd' with a value
print(find_value(json_data, 'd'))
# returns False, None - no 'e' in data
print(find_value(json_data, 'e'))
Your own function can return None because you don't actually return the value a recursive call would return. And the default return value for a function is None.
However, your code also doesn't account for the case where there is nothing to be found.
(Note: this solution only works in Python 3.8 or later, due to its use of the walrus operator := - of course it's not that hard to write it without, but that's left as an exercisae for the reader

Python - iterate and update a nested dictionary & lists

Having the following dict, where some of the values can be list of dictionaries:
{
"A": [
{
"B": {
"C": "D",
"X": "CHNAGE ME"
}
},
{
"E": "F"
}
],
"G": {
"Y": "CHANGE ME"
}
}
I would like to recursively iterate over the items and change the pairs of key values where the value is "CHANGE ME", so the result would be:
{
"A": [
{
"B": {
"C": "D",
"X.CHANGED": "CHANGED"
}
},
{
"E": "F"
}
],
"G": {
"Y.CHANGED": "CHANGED"
}
}
Solutions I've found were not handling a case where the value is a list, for example:
import collections
def nested_dict_iter(nested):
for key, value in nested.iteritems():
if isinstance(value, collections.Mapping):
for inner_key, inner_value in nested_dict_iter(value):
yield inner_key, inner_value
else:
yield key, value
How can I achieve my goal?
Using recursion
Ex:
def update(data):
for k, v in data.copy().items():
if isinstance(v, dict): # For DICT
data[k] = update(v)
elif isinstance(v, list): # For LIST
data[k] = [update(i) for i in v]
elif v == 'CHANGE ME': # Update Key-Value
# data.pop(k)
# OR
del data[k]
data[f"{k}.CHANGED"] = 'CHANGED'
return data
print(update(data))
Output:
{
'A':[{'B': {'C': 'D', 'X.CHANGED': 'CHANGED'}}, {'E': 'F'}],
'G':{'Y.CHANGED': 'CHANGED'}
}
Note: I have not tested all corner cases

How to perform quick upleveling in python?

I have the following object in python:
{
name: John,
age: {
years:18
},
computer_skills: {
years:4
},
mile_runner: {
years:2
}
}
I have an array with 100 people with the same structure.
What is the best way to go through all 100 people and make it such that there is no more "years"? In other words, each object in the 100 would look something like:
{
name: John,
age:18,
computer_skills:4,
mile_runner:2
}
I know I can do something in pseudocode:
for(item in list):
if('years' in (specific key)):
specifickey = item[(specific key)][(years)]
But is there a smarter/more efficent way?
Your pseudo-code is already pretty good I think:
for person in persons:
for k, v in person.items():
if isinstance(v, dict) and 'years' in v:
person[k] = v['years']
This overwrites every property which is a dictionary that has a years property with that property’s value.
Unlike other solutions (like dict comprehensions), this will modify the object in-place, so no new memory to keep everything is required.
def flatten(d):
ret = {}
for key, value in d.iteritems():
if isinstance(value, dict) and len(value) == 1 and "years" in value:
ret[key] = value["years"]
else:
ret[key] = value
return ret
d = {
"name": "John",
"age": {
"years":18
},
"computer_skills": {
"years":4
},
"mile_runner": {
"years":2
}
}
print flatten(d)
Result:
{'age': 18, 'mile_runner': 2, 'name': 'John', 'computer_skills': 4}
Dictionary comprehension:
import json
with open("input.json") as f:
cont = json.load(f)
print {el:cont[el]["years"] if "years" in cont[el] else cont[el] for el in cont}
prints
{u'age': 18, u'mile_runner': 2, u'name': u'John', u'computer_skills': 4}
where input.json contains
{
"name": "John",
"age": {
"years":18
},
"computer_skills": {
"years":4
},
"mile_runner": {
"years":2
}
}
Linear with regards to number of elements, you can't really hope for any lower.
As people said in the comments, it isn't exactly clear what your "object" is, but assuming that you actually have a list of dicts like this:
list = [{
'name': 'John',
'age': {
'years': 18
},
'computer_skills': {
'years':4
},
'mile_runner': {
'years':2
}
}]
Then you can do something like this:
for item in list:
for key in item:
try:
item[key] = item[key]['years']
except (TypeError, KeyError):
pass
Result:
list = [{'age': 18, 'mile_runner': 2, 'name': 'John', 'computer_skills': 4}]

Python filter nested dict given list of key names

Is there a way to filter a nested dict in Python, so I can see only the keys I'd specified ?
Example:
x = {
"field": [
{
"nm_field": "ch_origem_sistema_chave",
"inf_tabelado": {
"dropdown_value": "",
"dropdown_key": "",
"url_lista": "",
"chave_relacional": ""
},
},
{
"nm_field": "ax_andamento_data",
"inf_tabelado": {
"dropdown_value": "",
"dropdown_key": "",
"url_lista": "",
"chave_relacional": ""
},
}
],
"_metadata": {
"dt_reg": "22/01/2014 16:17:16",
"dt_last_up": "10/04/2014 16:30:44",
},
"url_detalhes": "/DetalhesDocsPro.aspx",
"url_app": "/docspro",
}
y = filter(x, ['dropdown_value', 'nm_field', 'url_app', 'dt_reg'])
Then var y would be something like:
{
"field": [
{
"nm_field": "ch_origem_sistema_chave",
"inf_tabelado": {
"dropdown_value": "",
},
},
{
"nm_field": "ax_andamento_data",
"inf_tabelado": {
"dropdown_value": "",
},
}
],
"_metadata": {
"dt_reg": "22/01/2014 16:17:16",
},
"url_app": "/docspro",
}
I've tried to do something using defaultdict, but had no success with lists at any level of recursion. Also I found dificulty while working with different data structures.
Here's a modified version of 2rs2ts's answer that returns a new object rather than modifying the old one (and handles filtering on non-leaf nodes):
import copy
def fltr(node, vals):
if isinstance(node, dict):
retVal = {}
for key in node:
if key in vals:
retVal[key] = copy.deepcopy(node[key])
elif isinstance(node[key], list) or isinstance(node[key], dict):
child = fltr(node[key], vals)
if child:
retVal[key] = child
if retVal:
return retVal
else:
return None
elif isinstance(node, list):
retVal = []
for entry in node:
child = fltr(entry, vals)
if child:
retVal.append(child)
if retVal:
return retVal
else:
return None
With this, you will call
y = fltr(x, ['dropdown_value', 'nm_field', 'url_app', 'dt_reg'])
and get
{
"field": [
{
"inf_tabelado": {
"dropdown_value": ""
},
"nm_field": "ch_origem_sistema_chave"
},
{
"inf_tabelado": {
"dropdown_value": ""
},
"nm_field": "ax_andamento_data"
}
],
"url_app": "/docspro",
"_metadata": {
"dt_reg": "22/01/2014 16:17:16"
}
}
Note that this will return None if everything is filtered. For example,
fltr(x, [])
will always return None, no matter what is in x.
Here's a solution which walks the structure in a depth-first manner to find the "leaf" nodes which you are checking to see if they're in your list of elements to preserve. When it finds such an element, it removes it from the dictionary with del. (So this is done in-place.)
def fltr(d, vals):
if isinstance(d, dict):
vals_to_del = []
for k in d:
if k in vals:
continue
if not isinstance(d[k], list) and not isinstance(d[k], dict):
if k not in vals:
vals_to_del.append(k)
else:
fltr(d[k], vals)
for k in vals_to_del:
del d[k]
elif isinstance(d, list):
for i in d:
fltr(i, vals)
Note that I didn't define a function called filter, because it's a built-in one and you don't want to shadow it.
>>> fltr(x, ['dropdown_value', 'nm_field', 'url_app', 'dt_reg'])
>>> x
{'field': [{'inf_tabelado': {'dropdown_value': ''}, 'nm_field': 'ch_origem_sistema_chave'}, {'inf_tabelado': {'dropdown_value': ''}, 'nm_field': 'ax_andamento_data'}], 'url_app': '/docspro', '_metadata': {'dt_reg': '22/01/2014 16:17:16'}}

Categories