Comparing Nested Python dict with list and dict - python

I've seen similar questions but none that exactly match what I'm doing and I believe other developers might face same issue if they are working with MongoDB.
I'm looking to compare two nested dict objects with dict and arrays and return a dict with additions and deletion (like you would git diff two files)
Here is what I have so far:
def dict_diff(alpha, beta, recurse_adds=False, recurse_dels=False):
"""
:return: differences between two python dict with adds and dels
example:
(This is the expected output)
{
'adds':
{
'specific_hours': [{'ends_at': '2015-12-25'}],
}
'dels':
{
'specific_hours': [{'ends_at': '2015-12-24'}],
'subscription_products': {'review_management': {'thiswillbedeleted': 'deleteme'}}
}
}
"""
if type(alpha) is dict and type(beta) is dict:
a_keys = alpha.keys()
b_keys = beta.keys()
dels = {}
adds = {}
for key in a_keys:
if type(alpha[key]) is list:
if alpha[key] != beta[key]:
adds[key] = dict_diff(alpha[key], beta[key], recurse_adds=True)
dels[key] = dict_diff(alpha[key], beta[key], recurse_dels=True)
elif type(alpha[key]) is dict:
if alpha[key] != beta[key]:
adds[key] = dict_diff(alpha[key], beta[key], recurse_adds=True)
dels[key] = dict_diff(alpha[key], beta[key], recurse_dels=True)
elif key not in b_keys:
dels[key] = alpha[key]
elif alpha[key] != beta[key]:
adds[key] = beta[key]
dels[key] = alpha[key]
for key in b_keys:
if key not in a_keys:
adds[key] = beta[key]
elif type(alpha) is list and type(beta) is list:
index = 0
adds=[]
dels=[]
for elem in alpha:
if alpha[index] != beta[index]:
dels.append(alpha[index])
adds.append(beta[index])
# print('update', adds, dels)
index+=1
else:
raise Exception("dict_diff function can only get dict objects")
if recurse_adds:
if bool(adds):
return adds
return {}
if recurse_dels:
if bool(dels):
return dels
return {}
return {'adds': adds, 'dels': dels}
The result I'm getting now is:
{'adds': {'specific_hours': [{'ends_at': '2015-12-24',
'open_hours': ['07:30-11:30', '12:30-21:30'],
'starts_at': '2015-12-22'},
{'ends_at': '2015-01-03',
'open_hours': ['07:30-11:30'],
'starts_at': '2015-01-0'}],
'subscription_products': {'review_management': {}}},
'dels': {'specific_hours': [{'ends_at': '2015-12-24',
'open_hours': ['07:30-11:30', '12:30-21:30'],
'starts_at': '2015-12-2'},
{'ends_at': '2015-01-03',
'open_hours': ['07:30-11:30'],
'starts_at': '2015-01-0'}],
'subscription_products': {'review_management': {'thiswillbedeleted': 'deleteme'}}}}
And this is the two objects I'm trying to compare:
alpha = {
'specific_hours': [
{
"starts_at": "2015-12-2",
"ends_at": "2015-12-24",
"open_hours": [
"07:30-11:30",
"12:30-21:30"
]
},
{
"starts_at": "2015-01-0",
"ends_at": "2015-01-03",
"open_hours": [
"07:30-11:30"
]
}
],
'subscription_products': {'presence_management':
{'expiration_date': 1953291600,
'payment_type': {
'free': 'iamfree',
'test': "test",
},
},
'review_management':
{'expiration_date': 1511799660,
'payment_type': {
'free': 'iamfree',
'test': "test",
},
'thiswillbedeleted': "deleteme",
}
},
}
beta = {
'specific_hours': [
{
"starts_at": "2015-12-22",
"ends_at": "2015-12-24",
"open_hours": [
"07:30-11:30",
"12:30-21:30"
]
},
{
"starts_at": "2015-01-0",
"ends_at": "2015-01-03",
"open_hours": [
"07:30-11:30"
]
}
],
'subscription_products': {'presence_management':
{'expiration_date': 1953291600,
'payment_type': {
'free': 'iamfree',
'test': "test",
},
},
'review_management':
{'expiration_date': 1511799660,
'payment_type': {
'free': 'iamfree',
'test': "test",
},
}
},
}

Related

Python: merge Nested Dictionary into one JSON

How to merge strings from the yield generator of JSON into one JSON?
I have got Nested Dictionary by yield generator, and I aim to have one JSON file.
I have the output of these correct strings of nested dictionary.
{"domain.com": {"Chrome": "19362.344607264396"}}
{"domain.com": {"ChromeMobile": "7177.498437391487"}}
{"another.com": {"MobileSafari": "6237.433155080214"}}
{"another.com": {"Safari": "5895.409403430795"}}
and I want to merge into one JSON file
[
{
"domain.com": {
"Chrome": "19362.344607264396"
}
},
{
"domain.com": {
"ChromeMobile": "7177.498437391487"
}
},
{
"another.com": {
"MobileSafari": "6237.433155080214"
}
},
{
"another.com": {
"Safari": "5895.409403430795"
}
}
]
or ideally to have a JSON likes to this
{
"browsers": [
{
"domain.com": {
"Chrome": "19362.344607264396",
"ChromeMobile": "7177.498437391487",
},
"another.com": {
"MobileSafari": "6237.433155080214",
"Safari": "5895.409403430795"
}
}
]
}
My code
# Cloudflare zone bandwidth total
def browser_map_page_views(domain_zone):
cloudflare = prom.custom_query(
query="topk(5, sum by(family) (increase(browser_map_page_views_count{job='cloudflare', zone='"f'{domain_zone}'"'}[10d])))"
)
for domain_z in cloudflare:
user_agent = domain_z['metric']['family']
value = domain_z['value'][1]
yield {domain_zone: {user_agent: {'value': value}}}
# Get list of zones from Prometheus based on Host Tracker data
def domain_zones():
zones_domain = prom.custom_query(
query="host_tracker_uptime_percent{job='donodeexporter'}"
)
for domain_z in zones_domain:
yield domain_z['metric']['zone']
There is a final output.
# Get a list of domains and substitution each one into a request of Prometheus query.
for domain_list in domain_zones():
for dict in browser_map_page_views(domain_zone=domain_list):
dicts = dict
print(json.dumps(dicts))
Something like this
import json
DATA = [
{"domain.com": {"Chrome": "19362.344607264396"}},
{"domain.com": {"ChromeMobile": "7177.498437391487"}},
{"another.com": {"MobileSafari": "6237.433155080214"}},
{"another.com": {"Safari": "5895.409403430795"}}
]
def yield_data():
for d in DATA:
yield d
def merge1():
return list(yield_data())
def merge2():
output = {}
for d in DATA:
key = list(d.keys())[0]
output[key] = output.get(key, {})
output[key].update(d[key])
return {"browsers": [output]}
print(json.dumps(merge1(), indent=2))
print(json.dumps(merge2(), indent=2))
merge1's output looks like:
[
{
"domain.com": {
"Chrome": "19362.344607264396"
}
},
{
"domain.com": {
"ChromeMobile": "7177.498437391487"
}
},
{
"another.com": {
"MobileSafari": "6237.433155080214"
}
},
{
"another.com": {
"Safari": "5895.409403430795"
}
}
]
merge2's output looks like
{
"browsers": [
{
"domain.com": {
"Chrome": "19362.344607264396",
"ChromeMobile": "7177.498437391487"
},
"another.com": {
"MobileSafari": "6237.433155080214",
"Safari": "5895.409403430795"
}
}
]
}

Use python parse string of file path to json

I have the list of path string like below. How to convert it into complete json object?
foldersList = [
'1/',
'1/2/',
'1/2/2.txt',
'1/2/5/',
'1/5.txt',
'2/',
'2/test.txt',
'test.json'
]
How to convert it into complete json object like below
{
"fileMenu":{
"list":[
{
"fileType":"d",
"name":"1",
"subFolders":[
{
"fileType":"-",
"name":"5.txt",
},
{
"fileType":"d",
"name":"2",
"subFolders":[
{
"fileType":"-",
"name":"2.txt",
},
{
"date":1594983597000,
"fileType":"d",
"name":"5",
"size":0,
"subFolders":[]
}]
}]
},
{
"fileType":"d",
"name":"2",
"subFolders":[{
"fileType":"-",
"name":"test.txt"
}]
},
{
"fileType":"-",
"name":"test.json"
}],
"status":"OK"
}
}
How to do this? I tried it with the some code snippets.
foldersList = [
'1/',
'1/2/',
'1/2/2.txt',
'1/2/5/',
'1/5.txt',
'2/',
'2/test.txt',
'test.json'
]
foldersJson = {}
nodeInfoList = []
nodeInfoDic = {}
for i, path in enumerate(foldersList):
nodeInfoDic = foldersJson
for j,node in enumerate(path.split('/')):
if node != '':
if nodeInfoDic.has_key(node) != True:
nodeInfoDic[node] = {}
nodeInfoDic = nodeInfoDic[node]
# print(foldersJson)
nodeInfoList.append(nodeInfoDic)
print(nodeInfoList)
# print(foldersJson)

How to move sub-dictionary to another dictionary?

I am writing a function that takes 2 strings as inputs and would move a section of the dictionary to another.
def move(item_to_move, destination):
# do something....
My initial dictionary looks like this.
directories = {
'beers': {
'ipa': {
'stone': {}
}
},
'wines': {
'red': {
'cabernet': {}
}
},
'other' : {}
}
I would like to move either a subsection or section of the dictionary to another section. The sections are represented by each key of the path delimited by a '/'. For example, the inputs for my function would be:
item_to_move='beers/ipa'
destination='other'
move(directories, item_to_move,destination)
The output would be:
{
'wines': {
'red': {
'cabernet': {}
},
},
'other' :{
'beers': {
'ipa': {
'stone': {}
} }
},
}
NOTE: I am assuming all input paths for items_to_move are valid.
Find the origin's parent dictionary and the target's dictionary, then update the the target's dictionary with the origin's key and value (removing it from the origin's parent):
def move(tree,originPath,targetPath):
originKey = None
for originName in originPath.split("/"):
originParent = originParent[originKey] if originKey else tree
originKey = originName
targetDict = tree
for targetName in targetPath.split("/"):
targetDict = targetDict[targetName]
targetDict.update({originKey:originParent.pop(originKey)})
output:
directories = {
'beers': {
'ipa': {
'stone': {}
}
},
'wines': {
'red': {
'cabernet': {}
}
},
'other' : {}
}
move(directories,'beers/ipa','other')
print(directories)
{ 'beers': {},
'wines': { 'red': {'cabernet': {}} },
'other': { 'ipa': {'stone': {}} }
}

How to just initialise the tile in python dictionary?

I try to generate JSON file from python dictionary data type.
Here is the segment of python code involved in this issue before I dump it to Json format :
channelSeg = {}
channelSeg["ch"] = None
channelSeg["chdata"] = []
for e in channelPkg:
print e
attr = e.split(':')
if attr[0] == "ch":
channel = attr[1].split(',')
channelSeg["ch"] = int(channel[0])
Heading
I am doing this to init dictionary index then later I could append more data in my for loop like this:
channelSeg["ch"] = None
channelSeg["chdata"] = []
but I really want to do is without assign them any data just
channelSeg["ch"]
channelSeg["chdata"]
but python doesn't like me to do that .
So after dump operation , I got repetitive Json data like this(part of it)
"datapkg": [
{
"dataseg": [
{
"ch": 0,
"chdata": [
{
"euler": {
"y": "-19.32",
"x": "93.84",
"z": "-134.14"
}
},
{
"areal": {
"y": "57",
"x": "-242",
"z": "-210"
}
}
]
},
{
"ch": 1,
"chdata": [
{
"areal": {
"y": "-63",
"x": "-30",
"z": "10"
}
}
]
},
{
"ch": null,
"chdata": []
}
],
"t": "174464",
"n": "9884"
},
I always have redundant :
{
"ch": null,
"chdata": []
}
Which make this JSON data package not healthy enough , is there anyway to remove this piece of redundant data?
Many thanks for any advices
===========v2==============
after I consider Edward's answer I found I could only solve it with channelSeg["ch"] = None but I don't know how to deal with another redundant list, it is because I didn't post enough code , so I past more complete code here , and still looking for solutions ..
My code after modify :
for elem in sensorPkg:
channelPkg = elem.split('&') # channelPkg contain each channel's reading
# each channel need a dictonary to store data
channelSeg = {}
# channelSeg["ch"] = None
channelSeg["chdata"] = []
for e in channelPkg:
attr = e.split(':')
if attr[0] == "ch":
new_channel = {
'ch': int((attr[1].split(','))[0])
#channelSeg["ch"] = int(channel[0])
}
channelSeg["chdata"].append(new_channel)
# store channel numbers
elif attr[0] == "euler":
# create euler package
numbers = attr[1].split(',')
eulerSeg = {}
d = {}
d["x"] = numbers[0]
d["y"] = numbers[1]
d["z"] = numbers[2]
eulerSeg["euler"] = d
# append to channel segement
channelSeg["chdata"].append(eulerSeg)
elif attr[0] == "areal": # real accelrometer readings
# create areal package
numbers = attr[1].split(',')
arealSeg = {}
d = {}
d["x"] = numbers[0]
d["y"] = numbers[1]
d["z"] = numbers[2]
arealSeg["areal"] = d
# append to channel segement
channelSeg["chdata"].append(arealSeg)
#and so on
and here is the outcome
{
"dataseg": [
{
"chdata": [
{
"ch": 0
},
{
"euler": {
"y": "6.51",
"x": "73.16",
"z": "-133.69"
}
},
{
"areal": {
"y": "516",
"x": "-330",
"z": "-7"
}
}
]
},
{
"chdata": [
{
"ch": 1
},
{
"euler": {
"y": "24.86",
"x": "4.30",
"z": "-71.39"
}
},
{
"areal": {
"y": "120",
"x": "316",
"z": "273"
}
}
]
},
{
"chdata": [
{
"ch": 2
},
{
"euler": {
"y": "62.32",
"x": "-60.34",
"z": "-120.82"
}
},
{
"areal": {
"y": "440",
"x": "-611",
"z": "816"
}
}
]
},
{
"chdata": []
}
],
"t": "14275",
"n": "794"
},
which
{
"chdata": []
}
Still there
In the data structure that you're working with, I notice that 'dataseg' is a list of channels. Now, you don't need to initialize each channel before adding it to dataseg. First initialize dataseg as an empty list, then, while iterating over your entries in channelPkg, you can create new channel dicts using the information read from channelPkg, and append them immediately:
dataseg = []
for e in channelPkg:
attr = e.split(':')
if attr[0] == "ch":
new_channel = {
'ch': int(attr[1].split(',')),
'data': []
}
dataseg.append(new_channel)
Hope that helps -- I'm not sure what the context of your question is exactly, so comment if this doesn't solve your problem.
Edit
I think that your problem is that the very last channelPkg is empty. So, for e in channelPkg: is equivalent to for e in [], and as a result, the last iteration of the outer loop appends just the initialized values (nothing inside for e in channelPkg executes).
Try adding two lines to test if the sensorPkg has a ch property (I'm assuming that all valid sensorPkgs have a ch property):
for elem in sensorPkg:
channelPkg = elem.split('&')
# Add this to prevent appending an empty channel
if 'ch' not in [e.split(':')[0] for e in channelPkg]:
break
channelSeg = {}
channelSeg["chdata"] = []
for e in channelPkg:
# ... etc
Try using a conditional dictionary comprehension:
channelSeg["chdata"] = {ch.split(',')[0] if ch for ch in e.split(':')}

Python filter nested dict given list of key names

Is there a way to filter a nested dict in Python, so I can see only the keys I'd specified ?
Example:
x = {
"field": [
{
"nm_field": "ch_origem_sistema_chave",
"inf_tabelado": {
"dropdown_value": "",
"dropdown_key": "",
"url_lista": "",
"chave_relacional": ""
},
},
{
"nm_field": "ax_andamento_data",
"inf_tabelado": {
"dropdown_value": "",
"dropdown_key": "",
"url_lista": "",
"chave_relacional": ""
},
}
],
"_metadata": {
"dt_reg": "22/01/2014 16:17:16",
"dt_last_up": "10/04/2014 16:30:44",
},
"url_detalhes": "/DetalhesDocsPro.aspx",
"url_app": "/docspro",
}
y = filter(x, ['dropdown_value', 'nm_field', 'url_app', 'dt_reg'])
Then var y would be something like:
{
"field": [
{
"nm_field": "ch_origem_sistema_chave",
"inf_tabelado": {
"dropdown_value": "",
},
},
{
"nm_field": "ax_andamento_data",
"inf_tabelado": {
"dropdown_value": "",
},
}
],
"_metadata": {
"dt_reg": "22/01/2014 16:17:16",
},
"url_app": "/docspro",
}
I've tried to do something using defaultdict, but had no success with lists at any level of recursion. Also I found dificulty while working with different data structures.
Here's a modified version of 2rs2ts's answer that returns a new object rather than modifying the old one (and handles filtering on non-leaf nodes):
import copy
def fltr(node, vals):
if isinstance(node, dict):
retVal = {}
for key in node:
if key in vals:
retVal[key] = copy.deepcopy(node[key])
elif isinstance(node[key], list) or isinstance(node[key], dict):
child = fltr(node[key], vals)
if child:
retVal[key] = child
if retVal:
return retVal
else:
return None
elif isinstance(node, list):
retVal = []
for entry in node:
child = fltr(entry, vals)
if child:
retVal.append(child)
if retVal:
return retVal
else:
return None
With this, you will call
y = fltr(x, ['dropdown_value', 'nm_field', 'url_app', 'dt_reg'])
and get
{
"field": [
{
"inf_tabelado": {
"dropdown_value": ""
},
"nm_field": "ch_origem_sistema_chave"
},
{
"inf_tabelado": {
"dropdown_value": ""
},
"nm_field": "ax_andamento_data"
}
],
"url_app": "/docspro",
"_metadata": {
"dt_reg": "22/01/2014 16:17:16"
}
}
Note that this will return None if everything is filtered. For example,
fltr(x, [])
will always return None, no matter what is in x.
Here's a solution which walks the structure in a depth-first manner to find the "leaf" nodes which you are checking to see if they're in your list of elements to preserve. When it finds such an element, it removes it from the dictionary with del. (So this is done in-place.)
def fltr(d, vals):
if isinstance(d, dict):
vals_to_del = []
for k in d:
if k in vals:
continue
if not isinstance(d[k], list) and not isinstance(d[k], dict):
if k not in vals:
vals_to_del.append(k)
else:
fltr(d[k], vals)
for k in vals_to_del:
del d[k]
elif isinstance(d, list):
for i in d:
fltr(i, vals)
Note that I didn't define a function called filter, because it's a built-in one and you don't want to shadow it.
>>> fltr(x, ['dropdown_value', 'nm_field', 'url_app', 'dt_reg'])
>>> x
{'field': [{'inf_tabelado': {'dropdown_value': ''}, 'nm_field': 'ch_origem_sistema_chave'}, {'inf_tabelado': {'dropdown_value': ''}, 'nm_field': 'ax_andamento_data'}], 'url_app': '/docspro', '_metadata': {'dt_reg': '22/01/2014 16:17:16'}}

Categories