Related
I have 2 dictionaries:
data = {
"filter":
{
"and":
[
{
"or":
[
{
"and":
[
{"category": "profile", "key": "languages", "operator": "IN", "value": "EN"},
{"category": "skill", "key": "26366", "value": 100, "operator": "EQ"},
],
},
],
},
{"or": [{"category": "skill", "key": "45165", "operator": "NE"}]},
{"or": [{"category": "skill", "key": "48834", "value": 80, "operator": "GT"}]},
{"or": [{"category": "profile", "key": "gender", "operator": "EQ", "value": "FEMALE"}]},
],
},
}
new_val = {'26366': '11616', '45165': '11613', '48834': '11618'}
I want to update values in "data" dictionary with the values from "new_val" dictionary.
So that 26366(in "data" dict) becomes 11616(from "new_val" dict), 45165 becomes 11613, and 48834 becomes 11618.
"data" dictionary nesting can be different (both up and down)
The key in the "data" dictionary can be different, not only "key", it can be "skill_id", "filter_id" and so on.
And get this result:
{
"filter":
{
"and":
[
{
"or":
[
{
"and":
[
{"category": "profile", "key": "languages", "operator": "IN", "value": "EN"},
{"category": "skill", "key": "11616", "value": 100, "operator": "EQ"},
],
},
],
},
{"or": [{"category": "skill", "key": "11613", "operator": "NE"}]},
{"or": [{"category": "skill", "key": "11618", "value": 80, "operator": "GT"}]},
{"or": [{"category": "profile", "key": "gender", "operator": "EQ", "value": "FEMALE"}]},
],
},
}
To return an updated dict without modifying the old one:
def updated_in_depth(d, replace):
if isinstance(d, dict):
return {k: updated_in_depth(v, replace)
for k,v in d.items()}
elif isinstance(d, list):
return [updated_in_depth(x, replace) for x in d]
else:
return replace.get(d, d)
Testing with your data and new_val:
>>> updated_in_depth(data, new_val)
{'filter': {'and': [{'or': [{'and': [
{'category': 'profile', 'key': 'languages', 'operator': 'IN', 'value': 'EN'},
{'category': 'skill', 'key': '11616', 'value': 100, 'operator': 'EQ'}]}]},
{'or': [{'category': 'skill', 'key': '11613', 'operator': 'NE'}]},
{'or': [{'category': 'skill', 'key': '11618', 'value': 80, 'operator': 'GT'}]},
{'or': [{'category': 'profile', 'key': 'gender', 'operator': 'EQ', 'value': 'FEMALE'}]}]}}
Use something like this:
data['filter']['and']['or']['and'][1]['key']='11616'
To search for the keys recursively you can do:
from copy import deepcopy
def replace(d, new_vals):
if isinstance(d, dict):
# replace key (if there's match):
if "key" in d:
d["key"] = new_vals.get(d["key"], d["key"])
for v in d.values():
replace(v, new_vals)
elif isinstance(d, list):
for v in d:
replace(v, new_vals)
new_data = deepcopy(data)
replace(new_data, new_val)
print(new_data)
Prints:
{
"filter": {
"and": [
{
"or": [
{
"and": [
{
"category": "profile",
"key": "languages",
"operator": "IN",
"value": "EN",
},
{
"category": "skill",
"key": "11616",
"value": 100,
"operator": "EQ",
},
]
}
]
},
{"or": [{"category": "skill", "key": "11613", "operator": "NE"}]},
{
"or": [
{
"category": "skill",
"key": "11618",
"value": 80,
"operator": "GT",
}
]
},
{
"or": [
{
"category": "profile",
"key": "gender",
"operator": "EQ",
"value": "FEMALE",
}
]
},
]
}
}
If you don't need copy of data you can omit the deepcopy:
replace(data, new_val)
print(data)
You can build a recursive function like this
def walk_dict(d):
if isinstance(d, list):
for item in d:
walk_dict(item)
elif isinstance(d, dict):
if 'key' in d and d['key'] in new_val:
d['key'] = new_val[d['key']]
for k, v in d.items():
walk_dict(v)
walk_dict(data)
print(data)
As many have advised, a recursive function will do the trick:
def a(d):
if isinstance(d, dict): # if dictionary, apply a to all values
d = {k: a(d[k]) for k in d.keys()}
return d
elif isinstance(d, list): # if list, apply to all elements
return [a(x) for x in d]
else: # apply to d directly (it is a number, a string or a bool)
return new_val[d] if d in new_val else d
When a is called, it check what is the type of the variable d:
if d is a list, it apply a to each element of the list and return the updated list
if d is a dict, it applies a to all values and return the updated dict
otherwise, it returns the mapped new value if the old one has been found in the new_val keys
data = {
"filter":
{
"and":
[
{
"or":
[
{
"and":
[
{"category": "profile", "key": "languages", "operator": "IN", "value": "EN"},
{"category": "skill", "key": "11616", "value": 100, "operator": "EQ"},
],
},
],
},
{"or": [{"category": "skill", "key": "11613", "operator": "NE"}]},
{"or": [{"category": "skill", "key": "11618", "value": 80, "operator": "GT"}]},
{"or": [{"category": "profile", "key": "gender", "operator": "EQ", "value": "FEMALE"}]},
],
},
}
class Replace:
def __init__(self,data):
self.data=data
def start(self,d):
data = self.data
def replace(data):
if type(data) == list:
for v in data:
replace(v)
if type(data) == dict:
for k,v in data.items():
if type(v) == dict:
replace(v)
if type(v) == str:
if v in d:
data[k] = d[v]
replace(data)
return data
new_data = Replace(data).start({'26366': '11616',
'45165': '11613',
'48834': '11618'})
print(new_data)
I have the following list (notice "keyE" has a dictionary as a string):
[
{
"keyA": "Example",
"keyB": "{\"keyC\":2,\"keyD\":{\"keyE\":\"{\"name\":\"foo\"}\"},\"keyF\":0}"
},
{
"keyA": "Example2",
"keyB": "{\"keyC\":6,\"keyD\":{\"keyE\":\"{\"name\":\"bar\"}\"},\"keyF\":5}"
}
]
And I want to convert it to this (it can have any number of nested dictionaries and lists):
[
{
"keyA": "Example",
"keyB": {
"keyC": 2,
"keyD": {
"keyE": {
"name": "foo"
}
},
"keyF": 0
}
},
{
"keyA": "Example2",
"keyB": {
"keyC": 6,
"keyD": {
"keyE": {
"name": "bar"
}
},
"keyF": 5
}
}
]
So far, I have the following but I don't know what to do after the json.loads. I know I have to recursively call the function but not sure how.
import json
def convert(data_list: list) -> list:
for i in range(len(data_list)):
obj = data_list[i]
for key, value in obj.items():
if isinstance(value, str) and any(char in "{[]}" for char in value):
try:
data = json.loads(value)
# What do I do here?
except:
continue
No idea if this'll work for your more complicated cases, but I was able to use ast.literal_eval() and some really janky chained str.replace calls:
import ast
def replace(s):
return ast.literal_eval(s.replace(r'"{', "{").replace(r'}"', "}"))
x = [{"keyA": "Example",
"keyB": "{\"keyC\":2,\"keyD\":{\"keyE\":\"{\"name\":\"foo\"}\"},\"keyF\":0}"},
{"keyA": "Example2",
"keyB": "{\"keyC\":6,\"keyD\":{\"keyE\":\"{\"name\":\"bar\"}\"},\"keyF\":5}"}]
for d in x:
for key, value in d.items():
if "{" in value:
d[key] = replace(value)
Output:
In [4]: x
Out[4]:
[{'keyA': 'Example',
'keyB': {'keyC': 2, 'keyD': {'keyE': {'name': 'foo'}}, 'keyF': 0}},
{'keyA': 'Example2',
'keyB': {'keyC': 6, 'keyD': {'keyE': {'name': 'bar'}}, 'keyF': 5}}]
In [5]: x[0]["keyB"]["keyD"]["keyE"]["name"]
Out[5]: 'foo'
Your nested key seems like a JSON string that can be loaded into a dictionary using json.loads method.Though the nested JSON won't get converted to the dictionary that's why I've added the recursive function to address the nested dictionary present in the JSON.
import json
from json import JSONDecodeError
def recurse(d):
try:
if isinstance(d, dict):
loaded_d = d
else:
loaded_d = json.loads(d)
for k, v in loaded_d.items():
loaded_d[k] = recurse(v)
except (JSONDecodeError, TypeError):
return d
return loaded_d
for d in data_list:
for key, val in d.items():
d[key] = recurse(val)
Output:
[
{
"keyA": "Example",
"keyB": {"keyC": 2, "keyD": {"keyE": {"name": "foo"}}, "keyF": 0},
},
{
"keyA": "Example2",
"keyB": {"keyC": 6, "keyD": {"keyE": {"name": "bar"}}, "keyF": 5},
},
]
Okay, here is recursive solution:
import json
from json import JSONDecodeError
data = [
{
"keyA": "Example",
"keyB": "{\"keyC\":2,\"keyD\":{\"keyE\":\"{\\\"name\\\":\\\"foo\\\"}\"},\"keyF\":0}"
},
{
"keyA": "Example2",
"keyB": "{\"keyC\":6,\"keyD\":{\"keyE\":\"{\\\"name\\\":\\\"bar\\\"}\"},\"keyF\":5}"
}
]
def rec_convert(data):
for k, v in data.items():
try:
if type(v) == dict:
rec_convert(v)
continue
data[k] = json.loads(v)
rec_convert(data[k])
except (JSONDecodeError, TypeError):
continue
for el in data:
rec_convert(el)
print("raw print:")
print(data)
print("pretty print")
print(json.dumps(data, indent=2))
and output:
raw print:
[{'keyA': 'Example', 'keyB': {'keyC': 2, 'keyD': {'keyE': {'name': 'foo'}}, 'keyF': 0}}, {'keyA': 'Example2', 'keyB': {'keyC': 6, 'keyD': {'keyE': {'name': 'bar'}}, 'keyF': 5}}]
pretty print
[
{
"keyA": "Example",
"keyB": {
"keyC": 2,
"keyD": {
"keyE": {
"name": "foo"
}
},
"keyF": 0
}
},
{
"keyA": "Example2",
"keyB": {
"keyC": 6,
"keyD": {
"keyE": {
"name": "bar"
}
},
"keyF": 5
}
}
]
I've updated Vishal Singh's answer to accommodate for lists inside a dictionary.
def decode_json_recursively(obj):
try:
if isinstance(obj, list):
data = [decode_json_recursively(el) for el in obj]
elif isinstance(obj, dict):
data = obj
else:
data = json.loads(obj)
if isinstance(data, dict):
for k, v in data.items():
data[k] = decode_json_recursively(v)
except (JSONDecodeError, TypeError, AttributeError):
return obj
return data
I have 2 nested dictionaries that have some matching keys and similar structure, and want to merge them into a final third dictionary in a specific way. A default value dictionary, has the values that will be used if not in the second dictionary, which will have some keys that match, and some keys that dont exist. In either event I want it to overwrite the default key or add a new key from the second dictionary to this third dictionary. See (shortened) example below:
default:
{"model_name": "null",
"description": "null",
"frequency": "d",
"tasks": [
{
"target": "elastic",
"metrics": "null",
"model_type": "null",
"alert": {
"type": "pagerduty",
"threshold": 5,
"service_id" : "P94CEA6"
}
}
]
}
second dict
{"model_name": "dqs_cie_registration_09",
"description": "test cie registration",
"tasks": [
{
"source": "elastic",
"metrics": [
"indid_unique_cnt", "zs"
],
"model_type": "Deep_Dive",
"elastic_config": "config",
"read_object": "dqs_rtfs_d_*",
"watcher": "cie_watch_zs_3d.json",
"target_write_index": "dqs_target_write_index"
}
]
}
Id like to merge it so it results in
{"model_name": "dqs_cie_registration_09",
"description": "test cie registration",
"frequency": "d",
"tasks": [
{
"target": "elastic",
"source": "elastic",
"metrics": ["indid_unique_cnt", "zs"],
"model_type": "Deep_Dive",
"elastic_config": "config",
"read_object": "dqs_rtfs_d_*",
"watcher": "cie_watch_zs_3d.json",
"target_write_index": "dqs_target_write_index",
"alert": {
"type": "pagerduty",
"threshold": 5,
"service_id" : "P94CEA6"
}
]
}
The third dict merges the second dict on the first.
I haven't really gotten anywhere but I feel there is a really easy way to implement this that I just don't remember.
Following merge routine produces desired result
import copy # to provide deepcopy
import pprint # Pretty Print
def merge(a, b):
" Merges b into a (to preserve a make a deepcopy prior to calling merge "
if isinstance(a, dict) and isinstance(b, dict):
" Dictionaries "
for k, v in b.items():
if k in a:
# Conditionally add keys from b
if isinstance(a[k], str):
if a[k] == "null":
a[k] = copy.deepcopy(b[k])
else:
merge(a[k], b[k])
else:
# Add keys from b
a[k] = copy.deepcopy(b[k])
elif isinstance(a, list) and isinstance(b, list):
" Lists "
if len(a) == len(b):
for i, item in enumerate(b):
if isinstance(item, str) and isinstance(b[i], str):
if item == "null":
a[i] = b[i]
else:
merge(a[i], b[i])
Usage
d1 = {"model_name": "null",
"description": "null",
"frequency": "d",
"tasks": [
{
"target": "elastic",
"metrics": "null",
"model_type": "null",
"alert": {
"type": "pagerduty",
"threshold": 5,
"service_id" : "P94CEA6"
}
}
]
}
d2 = {"model_name": "dqs_cie_registration_09",
"description": "test cie registration",
"tasks": [
{
"source": "elastic",
"metrics": [
"indid_unique_cnt", "zs"
],
"model_type": "Deep_Dive",
"elastic_config": "config",
"read_object": "dqs_rtfs_d_*",
"watcher": "cie_watch_zs_3d.json",
"target_write_index": "dqs_target_write_index"
}
]
}
merge(d1, d2) # to preserve d1 create a deepcopy prior to merge (i.e. temp = copy.deepcopy(d1))
pp = pprint.PrettyPrinter(indent=4)
pp.pprint(d1)
Output
{ 'description': 'test cie registration',
'frequency': 'd',
'model_name': 'dqs_cie_registration_09',
'tasks': [ { 'alert': { 'service_id': 'P94CEA6',
'threshold': 5,
'type': 'pagerduty'},
'elastic_config': 'config',
'metrics': ['indid_unique_cnt', 'zs'],
'model_type': 'Deep_Dive',
'read_object': 'dqs_rtfs_d_*',
'source': 'elastic',
'target': 'elastic',
'target_write_index': 'dqs_target_write_index',
'watcher': 'cie_watch_zs_3d.json'}
]
}
I have the following function to remove keys with an empty value or that start with an underscore. It works on a non-nested dictionary:
def _remove_missing_and_underscored_keys(d):
if not d: return d
for key in d.keys():
if not d.get(key):
del d[key]
elif key.startswith('_'):
del d[key]
return d
d = {"Name": "David",
"_Age": 50,
"Numbers": [1,2,3,4,5],
"Height": ""
}
>>> _remove_missing_and_underscored_keys(d)
{'Name': 'David', 'Numbers': [1, 2, 3, 4, 5]}
However, I would like to create the above so that it can remove nested items as well. I believe I need to use a yield statement for this, but I'm not having luck with implementing it properly. Here is an example of what I want it to do:
d = {
"PlatformID": "B00EU7XL9Q",
"Platform": "Amazon",
"Type": "Collection",
"Products": {
"UK": {
"URL": "http://www.amazon.co.uk/dp/B00EU7XL9Q",
"Rating": None,
"_IsAudited": True,
"Offers": {
"HDBUY": {
"Currency": "GBP",
"FutureReleaseStartDate": None,
"Cost": "14.99",
"IsFutureRelease": False
},
"SDBUY": {
"Currency": "GBP",
"FutureReleaseStartDate": None,
"Cost": "14.99",
"IsFutureRelease": False
}
}
}
}
}
>>> _remove_missing_and_underscored_keys(d)
{
"PlatformID": "B00EU7XL9Q",
"Platform": "Amazon",
"Type": "Collection",
"Products": {
"UK": {
"URL": "http://www.amazon.co.uk/dp/B00EU7XL9Q",
"Offers": {
"HDBUY": {
"Currency": "GBP",
"Cost": "14.99",
},
"SDBUY": {
"Currency": "GBP",
"Cost": "14.99",
}
}
}
}
}
In other words, it will do the above operation on all nested levels of the dict.
You can use recursion with a dictionary comprehension:
d = {'PlatformID': 'B00EU7XL9Q', 'Platform': 'Amazon', 'Type': 'Collection', 'Products': {'UK': {'URL': 'http://www.amazon.co.uk/dp/B00EU7XL9Q', 'Rating': None, '_IsAudited': True, 'Offers': {'HDBUY': {'Currency': 'GBP', 'FutureReleaseStartDate': None, 'Cost': '14.99', 'IsFutureRelease': False}, 'SDBUY': {'Currency': 'GBP', 'FutureReleaseStartDate': None, 'Cost': '14.99', 'IsFutureRelease': False}}}}}
def _del(_d):
return {a:_del(b) if isinstance(b, dict) else b for a, b in _d.items() if b and not a.startswith('_')}
import json
print(json.dumps(_del(d), indent=4))
Output:
{
"PlatformID": "B00EU7XL9Q",
"Platform": "Amazon",
"Type": "Collection",
"Products": {
"UK": {
"URL": "http://www.amazon.co.uk/dp/B00EU7XL9Q",
"Offers": {
"HDBUY": {
"Currency": "GBP",
"Cost": "14.99"
},
"SDBUY": {
"Currency": "GBP",
"Cost": "14.99"
}
}
}
}
}
def _remove_missing_and_underscored_keys(d):
if not d: return d
for key in d.keys():
if not d.get(key):
del d[key]
elif key.startswith('_'):
del d[key]
elif type(d[key]) == dict:
d[key] = _remove_missing_and_underscored_keys(d[key])
return d
I think you meant to say, you needed to use recursion to solve this. I don't think using a generator quite solves your problem.
Another caveat is that you shouldn't iterate through a variable you are changing. That's why I create the copy _d modify and return that and iterate through the original structure.
import pprint
def _remove_missing_and_underscored_keys(d):
if not d: return d
_d = d.copy()
for key in _d.keys():
if not _d.get(key):
del d[key]
elif key.startswith('_'):
del d[key]
elif isinstance(_d[key], dict):
_remove_missing_and_underscored_keys(_d[key])
return _d
_d = {
"PlatformID": "B00EU7XL9Q",
"Platform": "Amazon",
"Type": "Collection",
"Products": {
"UK": {
"URL": "http://www.amazon.co.uk/dp/B00EU7XL9Q",
"Rating": None,
"_IsAudited": True,
"Offers": {
"HDBUY": {
"Currency": "GBP",
"FutureReleaseStartDate": None,
"Cost": "14.99",
"IsFutureRelease": False
},
"SDBUY": {
"Currency": "GBP",
"FutureReleaseStartDate": None,
"Cost": "14.99",
"IsFutureRelease": False
}
}
}
}
}
foo = _remove_missing_and_underscored_keys(_d)
pprint.pprint(foo)
Output:
{'Platform': 'Amazon',
'PlatformID': 'B00EU7XL9Q',
'Products': {'UK': {'Offers': {'HDBUY': {'Cost': '14.99', 'Currency': 'GBP'},
'SDBUY': {'Cost': '14.99', 'Currency': 'GBP'}},
'URL': 'http://www.amazon.co.uk/dp/B00EU7XL9Q'}},
'Type': 'Collection'}
Just go recursive.
Add another check to see if a value in primary dict is a dict, and call the same function on it.
# your code goes here
def _remove_missing_and_underscored_keys(d):
if not d: return d
for key in d.keys():
if not d.get(key):
del d[key]
elif key.startswith('_'):
del d[key]
elif type(d[key]) is dict:
#print("key '{}' stores a dict '{}', need to cleanup recursively".format(key, d[key]))
d[key] = _remove_missing_and_underscored_keys(d[key])
# Keep below check if you want to treat empty dict as `empty` as well
if d[key] == None or d[key] == {}:
del d[key]
return d
d = {
"PlatformID": "B00EU7XL9Q",
"Platform": "Amazon",
"Type": "Collection",
"Products": {
"UK": {
"URL": "http://www.amazon.co.uk/dp/B00EU7XL9Q",
"Rating": None,
"_IsAudited": True,
"Offers": {
"HDBUY": {
"Currency": "GBP",
"FutureReleaseStartDate": None,
"Cost": "14.99",
"IsFutureRelease": False
},
"SDBUY": {
"Currency": "GBP",
"FutureReleaseStartDate": None,
"Cost": "14.99",
"IsFutureRelease": False
},
"x" : {
"y":None
}
}
}
}
}
e = _remove_missing_and_underscored_keys(d)
print(e)
See it in action: https://ideone.com/5xDDZl
Above code also handles empty dicts stored at any key or any dict that became empty after recursively cleaning it. You can remove that check if needed.
Here is my dictionary (or JSON)
{
"$schema": "http://json-schema.org/draft-03/schema#",
"name": "Product",
"type": "object",
"properties": {
"id": {
"type": "number",
"description": "Product identifier",
"required": True
},
"name": {
"type": "string",
"description": "Name of the product",
"required": True
},
"price": {
"type": "number",
"minimum": 0,
"required": True
},
"tags": {
"type": "array",
"items": {
"type": "string"
}
},
"stock": {
"type": "object",
"properties": {
"warehouse": {
"type": "number"
},
"retail": {
"type": "number"
}
}
}
}
}
I want to print all keys with this format key1.key2.key3. This is my code:
def myprint(d, keys = ''):
for k, v in d.items():
temp = keys
keys += k
if isinstance(v,dict):
keys += '.'
myprint(v,keys)
else:
print(keys)
keys = temp
Unfortunately, this failed where returns result like this:
$schema
type
name
properties.stock.type
properties.stock.properties.warehouse.type
properties.stock.properties.warehouse.retail.type
properties.stock.price.minimum
properties.stock.price.type
properties.stock.price.required
properties.stock.price.tags.items.type
properties.stock.price.tags.items.type
properties.stock.price.tags.id.required
properties.stock.price.tags.id.type
properties.stock.price.tags.id.description
properties.stock.price.tags.id.name.required
properties.stock.price.tags.id.name.type
properties.stock.price.tags.id.name.description
As you see, the last several lines are wrong.
Anyone have suggestions? Not only restrict from this script other methods are welcome, but no module being used.
You can use recursion:
d = {'$schema': 'http://json-schema.org/draft-03/schema#', 'name': 'Product', 'type': 'object', 'properties': {'id': {'type': 'number', 'description': 'Product identifier', 'required': True}, 'name': {'type': 'string', 'description': 'Name of the product', 'required': True}, 'price': {'type': 'number', 'minimum': 0, 'required': True}, 'tags': {'type': 'array', 'items': {'type': 'string'}}, 'stock': {'type': 'object', 'properties': {'warehouse': {'type': 'number'}, 'retail': {'type': 'number'}}}}}
def display_keys(s, last=None):
for a, b in s.items():
if not isinstance(b, dict):
yield "{}.{}".format(last, a) if last else str(a)
else:
for h in display_keys(b, str(a) if not last else '{}.{}'.format(last, a)):
yield h
print(list(display_keys(d)))
Output:
['$schema', 'name', 'type', 'properties.id.type', 'properties.id.description', 'properties.id.required', 'properties.name.type', 'properties.name.description', 'properties.name.required', 'properties.price.type', 'properties.price.minimum', 'properties.price.required', 'properties.tags.type', 'properties.tags.items.type', 'properties.stock.type', 'properties.stock.properties.warehouse.type', 'properties.stock.properties.retail.type']
I think you make it complicated by updating keys. A string in Python is immutable. We can each time pass an extended key to the next recursive level, so with:
def myprint(d, keys = ''):
for k, v in d.items():
if isinstance(v, dict):
myprint(v, '{}{}.'.format(keys, k))
else:
print('{}{}'.format(keys, k))
This then yields:
>>> myprint(d)
$schema
name
type
properties.id.type
properties.id.description
properties.id.required
properties.name.type
properties.name.description
properties.name.required
properties.price.type
properties.price.minimum
properties.price.required
properties.tags.type
properties.tags.items.type
properties.stock.type
properties.stock.properties.warehouse.type
properties.stock.properties.retail.type
The problem with your code was that you only restored the old keys value in case something was not a dictionary. So as a result if there were multiply subdictionaries, you started to concatenate the keys together.