How to move sub-dictionary to another dictionary? - python

I am writing a function that takes 2 strings as inputs and would move a section of the dictionary to another.
def move(item_to_move, destination):
# do something....
My initial dictionary looks like this.
directories = {
'beers': {
'ipa': {
'stone': {}
}
},
'wines': {
'red': {
'cabernet': {}
}
},
'other' : {}
}
I would like to move either a subsection or section of the dictionary to another section. The sections are represented by each key of the path delimited by a '/'. For example, the inputs for my function would be:
item_to_move='beers/ipa'
destination='other'
move(directories, item_to_move,destination)
The output would be:
{
'wines': {
'red': {
'cabernet': {}
},
},
'other' :{
'beers': {
'ipa': {
'stone': {}
} }
},
}
NOTE: I am assuming all input paths for items_to_move are valid.

Find the origin's parent dictionary and the target's dictionary, then update the the target's dictionary with the origin's key and value (removing it from the origin's parent):
def move(tree,originPath,targetPath):
originKey = None
for originName in originPath.split("/"):
originParent = originParent[originKey] if originKey else tree
originKey = originName
targetDict = tree
for targetName in targetPath.split("/"):
targetDict = targetDict[targetName]
targetDict.update({originKey:originParent.pop(originKey)})
output:
directories = {
'beers': {
'ipa': {
'stone': {}
}
},
'wines': {
'red': {
'cabernet': {}
}
},
'other' : {}
}
move(directories,'beers/ipa','other')
print(directories)
{ 'beers': {},
'wines': { 'red': {'cabernet': {}} },
'other': { 'ipa': {'stone': {}} }
}

Related

Python: merge Nested Dictionary into one JSON

How to merge strings from the yield generator of JSON into one JSON?
I have got Nested Dictionary by yield generator, and I aim to have one JSON file.
I have the output of these correct strings of nested dictionary.
{"domain.com": {"Chrome": "19362.344607264396"}}
{"domain.com": {"ChromeMobile": "7177.498437391487"}}
{"another.com": {"MobileSafari": "6237.433155080214"}}
{"another.com": {"Safari": "5895.409403430795"}}
and I want to merge into one JSON file
[
{
"domain.com": {
"Chrome": "19362.344607264396"
}
},
{
"domain.com": {
"ChromeMobile": "7177.498437391487"
}
},
{
"another.com": {
"MobileSafari": "6237.433155080214"
}
},
{
"another.com": {
"Safari": "5895.409403430795"
}
}
]
or ideally to have a JSON likes to this
{
"browsers": [
{
"domain.com": {
"Chrome": "19362.344607264396",
"ChromeMobile": "7177.498437391487",
},
"another.com": {
"MobileSafari": "6237.433155080214",
"Safari": "5895.409403430795"
}
}
]
}
My code
# Cloudflare zone bandwidth total
def browser_map_page_views(domain_zone):
cloudflare = prom.custom_query(
query="topk(5, sum by(family) (increase(browser_map_page_views_count{job='cloudflare', zone='"f'{domain_zone}'"'}[10d])))"
)
for domain_z in cloudflare:
user_agent = domain_z['metric']['family']
value = domain_z['value'][1]
yield {domain_zone: {user_agent: {'value': value}}}
# Get list of zones from Prometheus based on Host Tracker data
def domain_zones():
zones_domain = prom.custom_query(
query="host_tracker_uptime_percent{job='donodeexporter'}"
)
for domain_z in zones_domain:
yield domain_z['metric']['zone']
There is a final output.
# Get a list of domains and substitution each one into a request of Prometheus query.
for domain_list in domain_zones():
for dict in browser_map_page_views(domain_zone=domain_list):
dicts = dict
print(json.dumps(dicts))
Something like this
import json
DATA = [
{"domain.com": {"Chrome": "19362.344607264396"}},
{"domain.com": {"ChromeMobile": "7177.498437391487"}},
{"another.com": {"MobileSafari": "6237.433155080214"}},
{"another.com": {"Safari": "5895.409403430795"}}
]
def yield_data():
for d in DATA:
yield d
def merge1():
return list(yield_data())
def merge2():
output = {}
for d in DATA:
key = list(d.keys())[0]
output[key] = output.get(key, {})
output[key].update(d[key])
return {"browsers": [output]}
print(json.dumps(merge1(), indent=2))
print(json.dumps(merge2(), indent=2))
merge1's output looks like:
[
{
"domain.com": {
"Chrome": "19362.344607264396"
}
},
{
"domain.com": {
"ChromeMobile": "7177.498437391487"
}
},
{
"another.com": {
"MobileSafari": "6237.433155080214"
}
},
{
"another.com": {
"Safari": "5895.409403430795"
}
}
]
merge2's output looks like
{
"browsers": [
{
"domain.com": {
"Chrome": "19362.344607264396",
"ChromeMobile": "7177.498437391487"
},
"another.com": {
"MobileSafari": "6237.433155080214",
"Safari": "5895.409403430795"
}
}
]
}

Convert nested JSON to Dataframe with columns referencing nested paths

I am trying to convert a nested JSON into a CSV file with three columns: the level 0 key, the branch, and the lowest level leaf.
For example, in the JSON below:
{
"protein": {
"meat": {
"chicken": {},
"beef": {},
"pork": {}
},
"powder": {
"^ISOPURE": {},
"substitute": {}
}
},
"carbs": {
"_vegetables": {
"veggies": {
"lettuce": {},
"carrots": {},
"corn": {}
}
},
"bread": {
"white": {},
"multigrain": {
"whole wheat": {}
},
"other": {}
}
},
"fat": {
"healthy": {
"avocado": {}
},
"unhealthy": {}
}
}
I want to create an output like this (didn't include entire tree example just to get point across):
level 0
branch
leaf
protein
protein.meat
chicken
protein
protein.meat
beef
I tried using json normalize but the actual file will not have paths that I can use to identify the nested fields as each dictionary is unique.
This returns the level 0 field but I need to have these as rows, not columns. Any help would be very much appreciated.
I created a function that pcan unnest the json based on key values like this:
import json
with open('path/to/json') as m:
my_json = json.load(m)
def unnest_json(data):
for key, value in data.items():
print(str(key)+'.'+str(value))
if isinstance(value, dict):
unnest_json(value)
elif isinstance(value, list):
for val in value:
if isinstance(val, str):
pass
elif isinstance(val, list):
pass
else:
unnest_json(val)
unnest_json(my_json)
Probably not the cleanest approach but I think you can use some sort of recursive function (traverse in below code) to convert the dictionary into a list of column values and then convert them to pandas DataFrame.
data = {
"protein": {
"meat": {
"chicken": {},
"beef": {},
"pork": {}
},
"powder": {
"^ISOPURE": {},
"substitute": {}
}
},
"carbs": {
"_vegetables": {
"veggies": {
"lettuce": {},
"carrots": {},
"corn": {}
}
},
"bread": {
"white": {},
"multigrain": {
"whole wheat": {}
},
"other": {}
}
},
"fat": {
"healthy": {
"avocado": {}
},
"unhealthy": {}
}
}
def traverse(col_values, dictionary, rows):
for key in dictionary:
new_col_values = list(col_values)
if dictionary[key]:
new_col_values[1] += '.' + key
traverse(new_col_values, dictionary[key], rows)
else:
new_col_values[2] = key
rows.append(new_col_values)
rows = []
for key in data:
traverse([key, str(key), None], data[key], rows)
import pandas as pd
df = pd.DataFrame(rows, columns=["level 0", "branch", "leaf"])
print(df)

MongoDB Python Update/ Insert dict in dict without overwriting

I can't insert my new document value (dict) without overwriting my existing data. I've looked through all different resources and can't find an answer.
I've also though of putting the values from first_level_dict into a list "first_level_dict" : [dict1, dict2] but I won't know how to append the dict eighter.
Sample Data:
# Create the document
target_dict = {
"_id": 55,
"Root_dict": {
"first_level_dict": {
"second_level_dict1": {"Content1": "Value1"}
}
},
"Root_key": "Root_value"
}
collection.insert_one(target_dict)
The result I'm looking for:
result_dict = {
"_id": 55,
"Root_dict": {
"first_level_dict": {
"second_level_dict1": {"Content1": "Value1"},
"second_level_dict2": {"Content2": "Value2"}
}
},
"Root_key": "Root_value"
}
Update: New Values example 2:
# New Values Sample
new_values = {
"_id": 55,
"Root_dict": {
"first_level_dict": {
"secon_level_dict2": {"Content2": "Value2"},
"secon_level_dict3": {"Content3": "Value3"}
}
}
collection.insert_one(target_dict)
Update: The result I'm looking for example 2:
result_dict = {
"_id": 55,
"Root_dict": {
"first_level_dict": {
"second_level_dict1": {"Content1": "Value1"},
"second_level_dict2": {"Content2": "Value2"},
"second_level_dict3": {"Content3": "Value3"},
}
},
"Root_key": "Root_value"
}
What I've tried:
# Update document "$setOnInsert"
q = {"_id": 55}
target_dict = {"$set": {"Root_dict": {"first_level_dict": {"second_level_dict2": {"Content2": "Value2"}}}}}
collection.update_one(q, target_dict)
What I've tried example 2:
# Update document
q = {"_id": 55}
target_dict = {"$set": {"Root_dict.first_level_dict": {
"second_level_dict2": {"Content2": "Value2"},
"second_level_dict3": {"Content3": "Value3"}}}}
collection.update_one(q, target_dict)
Try using the dot notation:
target_dict = {$set: {"Root_dict.first_level_dict.second_level_dict2": {"Content2": "Value2"}}}
Additionally, to update/add multiple fields (for "example 2"):
target_dict = {$set: {
"Root_dict.first_level_dict.second_level_dict2": {"Content2": "Value2"},
"Root_dict.first_level_dict.second_level_dict3": {"Content3": "Value3"}
}
}

find all dictionary matching key=value pairs in dynamic dictionary

If I have a python dictionary like the following:
conf = {
'memory': {
'alarm': {
'active': 'yes',
'pagefile_error': {
'active':'no'
}
}
},
'disk': {
'alarm':{
'active':'yes',
'fixed':{
'#dev':{
'active':'yes',
'something':'else'
}
}
}
},
'cpu': {
'alarm': {
'active':'no',
'highcpu': {
'active':'yes'
}
}
}
}
how can I filter only the paths that end in 'active':'yes' and not show any other info.
In addition, for parent items that show up as active: no, I would want to disregard whatever comes after those.
conf = {
'memory': {
'alarm': {
'active': 'yes'
}
},
'disk' : {
'alarm':{
'active':'yes',
'fixed': {
'#dev': {
'active':'yes'
}
}
}
}
}
I don't have any working code for this yet as I'm not sure where to start. all I have at the moment is the starting dictionary.
Using recursion :
def keep_active_only(my_dict):
result_dict = {}
for key, value in my_dict.items():
# If there is embedded dict
if isinstance(value, dict):
# Compute the embedded dict using recursion
result_subdict = keep_active_only(value)
# Keeping result only if not empty
if result_subdict:
result_dict[key] = result_subdict
# Keep active key if value is yes
elif key == "active" and value == "yes":
result_dict[key] = value
# Returns empty dict if active is no
elif key == "active" and value == "no":
return {}
return result_dict
Output :
>>> keep_active_only(conf)
{
'disk': {
'alarm': {
'active': 'yes',
'fixed': {
'#dev': {
'active': 'yes'
}
}
}
},
'memory': {
'alarm': {
'active': 'yes'
}
}
}
You can use recursion:
def active(d):
_r, _flag = [], False
for a, b in d.items():
if a == 'active' and not _flag:
_r.append(b == 'yes')
_flag = True
if not _flag and isinstance(b, dict):
_r.append(active(b))
return all(_r)
def build(d, flag = False):
return {a:b if not isinstance(b, dict) else build(b, 'active' in b)
for a, b in d.items() if ((not isinstance(b, dict) and not flag) or a == 'active') or (isinstance(b, dict) and active(b))}
import json
print(json.dumps(build(conf), indent=4))
Output:
{
"memory": {
"alarm": {
"active": "yes"
}
},
"disk": {
"alarm": {
"active": "yes",
"fixed": {
"#dev": {
"active": "yes"
}
}
}
}
}
Not sure if I understand correctly, but here is a function that discards all data from the dict that does not take you to a particular key and value:
def filter_dict(d, key, value):
new_dict = {}
for d_key, d_value in d.items():
if d_key == key and d_value == value:
new_dict[d_key] = d_value
elif isinstance(d_value, dict):
child = filter_dict(d_value, key, value)
if child:
new_dict[d_key] = child
return new_dict
Here is how you would use it in your example:
from pprint import pprint
conf = {
'memory': {
'alarm': {
'active': 'yes',
'pagefile_error': {
'active':'no'
}
}
},
'disk': {
'alarm': {
'active': 'yes',
'fixed': {
'#dev': {
'active': 'yes',
'something': 'else'
}
}
}
}
}
pprint(filter_dict(conf, 'active', 'yes'))
# {'disk': {'alarm': {'active': 'yes', 'fixed': {'#dev': {'active': 'yes'}}}},
# 'memory': {'alarm': {'active': 'yes'}}}

Comparing Nested Python dict with list and dict

I've seen similar questions but none that exactly match what I'm doing and I believe other developers might face same issue if they are working with MongoDB.
I'm looking to compare two nested dict objects with dict and arrays and return a dict with additions and deletion (like you would git diff two files)
Here is what I have so far:
def dict_diff(alpha, beta, recurse_adds=False, recurse_dels=False):
"""
:return: differences between two python dict with adds and dels
example:
(This is the expected output)
{
'adds':
{
'specific_hours': [{'ends_at': '2015-12-25'}],
}
'dels':
{
'specific_hours': [{'ends_at': '2015-12-24'}],
'subscription_products': {'review_management': {'thiswillbedeleted': 'deleteme'}}
}
}
"""
if type(alpha) is dict and type(beta) is dict:
a_keys = alpha.keys()
b_keys = beta.keys()
dels = {}
adds = {}
for key in a_keys:
if type(alpha[key]) is list:
if alpha[key] != beta[key]:
adds[key] = dict_diff(alpha[key], beta[key], recurse_adds=True)
dels[key] = dict_diff(alpha[key], beta[key], recurse_dels=True)
elif type(alpha[key]) is dict:
if alpha[key] != beta[key]:
adds[key] = dict_diff(alpha[key], beta[key], recurse_adds=True)
dels[key] = dict_diff(alpha[key], beta[key], recurse_dels=True)
elif key not in b_keys:
dels[key] = alpha[key]
elif alpha[key] != beta[key]:
adds[key] = beta[key]
dels[key] = alpha[key]
for key in b_keys:
if key not in a_keys:
adds[key] = beta[key]
elif type(alpha) is list and type(beta) is list:
index = 0
adds=[]
dels=[]
for elem in alpha:
if alpha[index] != beta[index]:
dels.append(alpha[index])
adds.append(beta[index])
# print('update', adds, dels)
index+=1
else:
raise Exception("dict_diff function can only get dict objects")
if recurse_adds:
if bool(adds):
return adds
return {}
if recurse_dels:
if bool(dels):
return dels
return {}
return {'adds': adds, 'dels': dels}
The result I'm getting now is:
{'adds': {'specific_hours': [{'ends_at': '2015-12-24',
'open_hours': ['07:30-11:30', '12:30-21:30'],
'starts_at': '2015-12-22'},
{'ends_at': '2015-01-03',
'open_hours': ['07:30-11:30'],
'starts_at': '2015-01-0'}],
'subscription_products': {'review_management': {}}},
'dels': {'specific_hours': [{'ends_at': '2015-12-24',
'open_hours': ['07:30-11:30', '12:30-21:30'],
'starts_at': '2015-12-2'},
{'ends_at': '2015-01-03',
'open_hours': ['07:30-11:30'],
'starts_at': '2015-01-0'}],
'subscription_products': {'review_management': {'thiswillbedeleted': 'deleteme'}}}}
And this is the two objects I'm trying to compare:
alpha = {
'specific_hours': [
{
"starts_at": "2015-12-2",
"ends_at": "2015-12-24",
"open_hours": [
"07:30-11:30",
"12:30-21:30"
]
},
{
"starts_at": "2015-01-0",
"ends_at": "2015-01-03",
"open_hours": [
"07:30-11:30"
]
}
],
'subscription_products': {'presence_management':
{'expiration_date': 1953291600,
'payment_type': {
'free': 'iamfree',
'test': "test",
},
},
'review_management':
{'expiration_date': 1511799660,
'payment_type': {
'free': 'iamfree',
'test': "test",
},
'thiswillbedeleted': "deleteme",
}
},
}
beta = {
'specific_hours': [
{
"starts_at": "2015-12-22",
"ends_at": "2015-12-24",
"open_hours": [
"07:30-11:30",
"12:30-21:30"
]
},
{
"starts_at": "2015-01-0",
"ends_at": "2015-01-03",
"open_hours": [
"07:30-11:30"
]
}
],
'subscription_products': {'presence_management':
{'expiration_date': 1953291600,
'payment_type': {
'free': 'iamfree',
'test': "test",
},
},
'review_management':
{'expiration_date': 1511799660,
'payment_type': {
'free': 'iamfree',
'test': "test",
},
}
},
}

Categories