Let's say I have a dict like
{
"key_a": "value",
"key_b": {
"key_b_a": "value",
"key_b_b": {
"key_b_b_a": "value"
}
}
}
What I want is to create a method to delete the given key or change its value.
def del_key(key):
my_dict = <dictionary described above>
keys = key.split(':')
if len(keys) == 1:
del my_dict[keys[0]]
elif len(keys) == 2:
del my_dict[keys[0]][keys[1]]
elif len(keys) == 3:
del my_dict[keys[0]][keys[1]][keys[2]]
. . .
del_key('key_b:key_b_b:key_b_b_a')
del_key('key_b:key_b_b')
del_key('key_a')
How can I do this gracefully?
It assumes your input is valid key,otherwise you have to check.
data = {
"key_a": "value",
"key_b": {
"key_b_a": "value",
"key_b_b": {
"key_b_b_a": "value"
}
}
}
def del_key(key):
key = key.split(':')
temp = data
for i in key[:-1]:
temp = temp[i]
del temp[key[-1]]
return data
print del_key('key_b:key_b_b:key_b_b_a')
print del_key('key_b:key_b_b')
print del_key('key_a')
output:
{'key_a': 'value', 'key_b': {'key_b_a': 'value', 'key_b_b': {}}}
{'key_a': 'value', 'key_b': {'key_b_a': 'value'}}
{'key_b': {'key_b_a': 'value'}}
Related
I am trying to convert a nested JSON into a CSV file with three columns: the level 0 key, the branch, and the lowest level leaf.
For example, in the JSON below:
{
"protein": {
"meat": {
"chicken": {},
"beef": {},
"pork": {}
},
"powder": {
"^ISOPURE": {},
"substitute": {}
}
},
"carbs": {
"_vegetables": {
"veggies": {
"lettuce": {},
"carrots": {},
"corn": {}
}
},
"bread": {
"white": {},
"multigrain": {
"whole wheat": {}
},
"other": {}
}
},
"fat": {
"healthy": {
"avocado": {}
},
"unhealthy": {}
}
}
I want to create an output like this (didn't include entire tree example just to get point across):
level 0
branch
leaf
protein
protein.meat
chicken
protein
protein.meat
beef
I tried using json normalize but the actual file will not have paths that I can use to identify the nested fields as each dictionary is unique.
This returns the level 0 field but I need to have these as rows, not columns. Any help would be very much appreciated.
I created a function that pcan unnest the json based on key values like this:
import json
with open('path/to/json') as m:
my_json = json.load(m)
def unnest_json(data):
for key, value in data.items():
print(str(key)+'.'+str(value))
if isinstance(value, dict):
unnest_json(value)
elif isinstance(value, list):
for val in value:
if isinstance(val, str):
pass
elif isinstance(val, list):
pass
else:
unnest_json(val)
unnest_json(my_json)
Probably not the cleanest approach but I think you can use some sort of recursive function (traverse in below code) to convert the dictionary into a list of column values and then convert them to pandas DataFrame.
data = {
"protein": {
"meat": {
"chicken": {},
"beef": {},
"pork": {}
},
"powder": {
"^ISOPURE": {},
"substitute": {}
}
},
"carbs": {
"_vegetables": {
"veggies": {
"lettuce": {},
"carrots": {},
"corn": {}
}
},
"bread": {
"white": {},
"multigrain": {
"whole wheat": {}
},
"other": {}
}
},
"fat": {
"healthy": {
"avocado": {}
},
"unhealthy": {}
}
}
def traverse(col_values, dictionary, rows):
for key in dictionary:
new_col_values = list(col_values)
if dictionary[key]:
new_col_values[1] += '.' + key
traverse(new_col_values, dictionary[key], rows)
else:
new_col_values[2] = key
rows.append(new_col_values)
rows = []
for key in data:
traverse([key, str(key), None], data[key], rows)
import pandas as pd
df = pd.DataFrame(rows, columns=["level 0", "branch", "leaf"])
print(df)
Having the following dict, where some of the values can be list of dictionaries:
{
"A": [
{
"B": {
"C": "D",
"X": "CHNAGE ME"
}
},
{
"E": "F"
}
],
"G": {
"Y": "CHANGE ME"
}
}
I would like to recursively iterate over the items and change the pairs of key values where the value is "CHANGE ME", so the result would be:
{
"A": [
{
"B": {
"C": "D",
"X.CHANGED": "CHANGED"
}
},
{
"E": "F"
}
],
"G": {
"Y.CHANGED": "CHANGED"
}
}
Solutions I've found were not handling a case where the value is a list, for example:
import collections
def nested_dict_iter(nested):
for key, value in nested.iteritems():
if isinstance(value, collections.Mapping):
for inner_key, inner_value in nested_dict_iter(value):
yield inner_key, inner_value
else:
yield key, value
How can I achieve my goal?
Using recursion
Ex:
def update(data):
for k, v in data.copy().items():
if isinstance(v, dict): # For DICT
data[k] = update(v)
elif isinstance(v, list): # For LIST
data[k] = [update(i) for i in v]
elif v == 'CHANGE ME': # Update Key-Value
# data.pop(k)
# OR
del data[k]
data[f"{k}.CHANGED"] = 'CHANGED'
return data
print(update(data))
Output:
{
'A':[{'B': {'C': 'D', 'X.CHANGED': 'CHANGED'}}, {'E': 'F'}],
'G':{'Y.CHANGED': 'CHANGED'}
}
Note: I have not tested all corner cases
If I have a python dictionary like the following:
conf = {
'memory': {
'alarm': {
'active': 'yes',
'pagefile_error': {
'active':'no'
}
}
},
'disk': {
'alarm':{
'active':'yes',
'fixed':{
'#dev':{
'active':'yes',
'something':'else'
}
}
}
},
'cpu': {
'alarm': {
'active':'no',
'highcpu': {
'active':'yes'
}
}
}
}
how can I filter only the paths that end in 'active':'yes' and not show any other info.
In addition, for parent items that show up as active: no, I would want to disregard whatever comes after those.
conf = {
'memory': {
'alarm': {
'active': 'yes'
}
},
'disk' : {
'alarm':{
'active':'yes',
'fixed': {
'#dev': {
'active':'yes'
}
}
}
}
}
I don't have any working code for this yet as I'm not sure where to start. all I have at the moment is the starting dictionary.
Using recursion :
def keep_active_only(my_dict):
result_dict = {}
for key, value in my_dict.items():
# If there is embedded dict
if isinstance(value, dict):
# Compute the embedded dict using recursion
result_subdict = keep_active_only(value)
# Keeping result only if not empty
if result_subdict:
result_dict[key] = result_subdict
# Keep active key if value is yes
elif key == "active" and value == "yes":
result_dict[key] = value
# Returns empty dict if active is no
elif key == "active" and value == "no":
return {}
return result_dict
Output :
>>> keep_active_only(conf)
{
'disk': {
'alarm': {
'active': 'yes',
'fixed': {
'#dev': {
'active': 'yes'
}
}
}
},
'memory': {
'alarm': {
'active': 'yes'
}
}
}
You can use recursion:
def active(d):
_r, _flag = [], False
for a, b in d.items():
if a == 'active' and not _flag:
_r.append(b == 'yes')
_flag = True
if not _flag and isinstance(b, dict):
_r.append(active(b))
return all(_r)
def build(d, flag = False):
return {a:b if not isinstance(b, dict) else build(b, 'active' in b)
for a, b in d.items() if ((not isinstance(b, dict) and not flag) or a == 'active') or (isinstance(b, dict) and active(b))}
import json
print(json.dumps(build(conf), indent=4))
Output:
{
"memory": {
"alarm": {
"active": "yes"
}
},
"disk": {
"alarm": {
"active": "yes",
"fixed": {
"#dev": {
"active": "yes"
}
}
}
}
}
Not sure if I understand correctly, but here is a function that discards all data from the dict that does not take you to a particular key and value:
def filter_dict(d, key, value):
new_dict = {}
for d_key, d_value in d.items():
if d_key == key and d_value == value:
new_dict[d_key] = d_value
elif isinstance(d_value, dict):
child = filter_dict(d_value, key, value)
if child:
new_dict[d_key] = child
return new_dict
Here is how you would use it in your example:
from pprint import pprint
conf = {
'memory': {
'alarm': {
'active': 'yes',
'pagefile_error': {
'active':'no'
}
}
},
'disk': {
'alarm': {
'active': 'yes',
'fixed': {
'#dev': {
'active': 'yes',
'something': 'else'
}
}
}
}
}
pprint(filter_dict(conf, 'active', 'yes'))
# {'disk': {'alarm': {'active': 'yes', 'fixed': {'#dev': {'active': 'yes'}}}},
# 'memory': {'alarm': {'active': 'yes'}}}
I've seen similar questions but none that exactly match what I'm doing and I believe other developers might face same issue if they are working with MongoDB.
I'm looking to compare two nested dict objects with dict and arrays and return a dict with additions and deletion (like you would git diff two files)
Here is what I have so far:
def dict_diff(alpha, beta, recurse_adds=False, recurse_dels=False):
"""
:return: differences between two python dict with adds and dels
example:
(This is the expected output)
{
'adds':
{
'specific_hours': [{'ends_at': '2015-12-25'}],
}
'dels':
{
'specific_hours': [{'ends_at': '2015-12-24'}],
'subscription_products': {'review_management': {'thiswillbedeleted': 'deleteme'}}
}
}
"""
if type(alpha) is dict and type(beta) is dict:
a_keys = alpha.keys()
b_keys = beta.keys()
dels = {}
adds = {}
for key in a_keys:
if type(alpha[key]) is list:
if alpha[key] != beta[key]:
adds[key] = dict_diff(alpha[key], beta[key], recurse_adds=True)
dels[key] = dict_diff(alpha[key], beta[key], recurse_dels=True)
elif type(alpha[key]) is dict:
if alpha[key] != beta[key]:
adds[key] = dict_diff(alpha[key], beta[key], recurse_adds=True)
dels[key] = dict_diff(alpha[key], beta[key], recurse_dels=True)
elif key not in b_keys:
dels[key] = alpha[key]
elif alpha[key] != beta[key]:
adds[key] = beta[key]
dels[key] = alpha[key]
for key in b_keys:
if key not in a_keys:
adds[key] = beta[key]
elif type(alpha) is list and type(beta) is list:
index = 0
adds=[]
dels=[]
for elem in alpha:
if alpha[index] != beta[index]:
dels.append(alpha[index])
adds.append(beta[index])
# print('update', adds, dels)
index+=1
else:
raise Exception("dict_diff function can only get dict objects")
if recurse_adds:
if bool(adds):
return adds
return {}
if recurse_dels:
if bool(dels):
return dels
return {}
return {'adds': adds, 'dels': dels}
The result I'm getting now is:
{'adds': {'specific_hours': [{'ends_at': '2015-12-24',
'open_hours': ['07:30-11:30', '12:30-21:30'],
'starts_at': '2015-12-22'},
{'ends_at': '2015-01-03',
'open_hours': ['07:30-11:30'],
'starts_at': '2015-01-0'}],
'subscription_products': {'review_management': {}}},
'dels': {'specific_hours': [{'ends_at': '2015-12-24',
'open_hours': ['07:30-11:30', '12:30-21:30'],
'starts_at': '2015-12-2'},
{'ends_at': '2015-01-03',
'open_hours': ['07:30-11:30'],
'starts_at': '2015-01-0'}],
'subscription_products': {'review_management': {'thiswillbedeleted': 'deleteme'}}}}
And this is the two objects I'm trying to compare:
alpha = {
'specific_hours': [
{
"starts_at": "2015-12-2",
"ends_at": "2015-12-24",
"open_hours": [
"07:30-11:30",
"12:30-21:30"
]
},
{
"starts_at": "2015-01-0",
"ends_at": "2015-01-03",
"open_hours": [
"07:30-11:30"
]
}
],
'subscription_products': {'presence_management':
{'expiration_date': 1953291600,
'payment_type': {
'free': 'iamfree',
'test': "test",
},
},
'review_management':
{'expiration_date': 1511799660,
'payment_type': {
'free': 'iamfree',
'test': "test",
},
'thiswillbedeleted': "deleteme",
}
},
}
beta = {
'specific_hours': [
{
"starts_at": "2015-12-22",
"ends_at": "2015-12-24",
"open_hours": [
"07:30-11:30",
"12:30-21:30"
]
},
{
"starts_at": "2015-01-0",
"ends_at": "2015-01-03",
"open_hours": [
"07:30-11:30"
]
}
],
'subscription_products': {'presence_management':
{'expiration_date': 1953291600,
'payment_type': {
'free': 'iamfree',
'test': "test",
},
},
'review_management':
{'expiration_date': 1511799660,
'payment_type': {
'free': 'iamfree',
'test': "test",
},
}
},
}
I know similar questions have already been asked before, but I really having problems implementing them for my special case:
Let's say I have a dictionary with varying depths, for example:
dicti = {'files':
{'a':{'offset':100, 'start': 0},
'b':{
'c':{'offset':50, 'start':0}
'd':{'offset':70, 'start':0}
}
'e':{
'f':{'offset':80, 'start':0}
'g':{'offset':30, 'start':0}
'h':{'offset':20, 'start':0}
}
}
}
etc... (with a lot more different levels and entries)
so now I want a copy of that dictionary with basically the same structure and keys, but if 'offset' (at any level) is greater than let's say 50 'offset' should be changed to 0
I guess some kind of iterative function would be the best, but I cannot get my head around that...
You might use the standard machinery for the copy and then modify the copied dictionary (solution #1 in my example), or you might do copying and modification in the same function (solution #2).
In either case, you're looking for a recursive function.
import copy
from pprint import pprint
dicti = {'files':
{'a':{'offset':100, 'start': 0},
'b':{
'c':{'offset':50, 'start':0},
'd':{'offset':70, 'start':0},
},
'e':{
'f':{'offset':80, 'start':0},
'g':{'offset':30, 'start':0},
'h':{'offset':20, 'start':0},
}
}
}
# Solution 1, two passes
def modify(d):
if isinstance(d, dict):
if d.get('offset', 0) > 50:
d['offset'] = 0
for k,v in d.items():
modify(v)
dictj = copy.deepcopy(dicti)
modify(dictj)
pprint(dictj)
# Solution 2, copy and modify in one pass
def copy_and_modify(d):
if isinstance(d, dict):
d2 = {k:copy_and_modify(v) for k,v in d.items()}
if d2.get('offset') > 50:
d2['offset'] = 0
return d2
return d
dictj = copy_and_modify(dicti)
pprint(dictj)
A recursive solution is going to be more intuitive. You want something like the following pseudocode:
def copy(dict):
new_dict = {}
for key, value in dict:
if value is a dictionary:
new_dict[key] = copy(value)
else if key == 'offset' and value > 50:
new_dict[key] = 0
else:
new_dict[key] = value
return new_dict
d = {'files':
{'a':{'offset':100, 'start': 0},
'b':{
'c':{'offset':50, 'start':0},
'd':{'offset':70, 'start':0}
},
'e':{
'f':{'offset':80, 'start':0},
'g':{'offset':30, 'start':0},
'h':{'offset':20, 'start':0}
}
}
}
def transform(item):
new_item = item.copy() # consider usage of deepcopy if needed
if new_item['offset'] == 80:
new_item['offset'] = 'CHANGED'
return new_item
def visit(item):
if item.get('offset'):
return transform(item)
else:
return {k: visit(v) for k, v in item.items()}
result = visit(d)
print(result)
Output:
{
'files': {
'b': {
'd': {
'offset': 70,
'start': 0
},
'c': {
'offset': 50,
'start': 0
}
},
'e': {
'g': {
'offset': 30,
'start': 0
},
'h': {
'offset': 20,
'start': 0
},
'f': {
'offset': 'CHANGED',
'start': 0
}
},
'a': {
'offset': 100,
'start': 0
}
}
}
You can revise some links regarding stuff which is used in the answer:
Recursion
Visitor pattern
You could call a recursive function to change its value once condition is met:
dicti = {'files':
{'a':{'offset':100, 'start': 0},
'b':{
'c':{'offset':50, 'start':0},
'd':{'offset':70, 'start':0}
},
'e':{
'f':{'offset':80, 'start':0},
'g':{'offset':30, 'start':0},
'h':{'offset':20, 'start':0}
}
}
}
def dictLoop(dt):
for k, v in dt.items():
if isinstance(v, int):
if k == 'offset' and v > 50:
dt[k] = 0
else: dictLoop(v)
return dt
print dictLoop(dicti)