I know similar questions have already been asked before, but I really having problems implementing them for my special case:
Let's say I have a dictionary with varying depths, for example:
dicti = {'files':
{'a':{'offset':100, 'start': 0},
'b':{
'c':{'offset':50, 'start':0}
'd':{'offset':70, 'start':0}
}
'e':{
'f':{'offset':80, 'start':0}
'g':{'offset':30, 'start':0}
'h':{'offset':20, 'start':0}
}
}
}
etc... (with a lot more different levels and entries)
so now I want a copy of that dictionary with basically the same structure and keys, but if 'offset' (at any level) is greater than let's say 50 'offset' should be changed to 0
I guess some kind of iterative function would be the best, but I cannot get my head around that...
You might use the standard machinery for the copy and then modify the copied dictionary (solution #1 in my example), or you might do copying and modification in the same function (solution #2).
In either case, you're looking for a recursive function.
import copy
from pprint import pprint
dicti = {'files':
{'a':{'offset':100, 'start': 0},
'b':{
'c':{'offset':50, 'start':0},
'd':{'offset':70, 'start':0},
},
'e':{
'f':{'offset':80, 'start':0},
'g':{'offset':30, 'start':0},
'h':{'offset':20, 'start':0},
}
}
}
# Solution 1, two passes
def modify(d):
if isinstance(d, dict):
if d.get('offset', 0) > 50:
d['offset'] = 0
for k,v in d.items():
modify(v)
dictj = copy.deepcopy(dicti)
modify(dictj)
pprint(dictj)
# Solution 2, copy and modify in one pass
def copy_and_modify(d):
if isinstance(d, dict):
d2 = {k:copy_and_modify(v) for k,v in d.items()}
if d2.get('offset') > 50:
d2['offset'] = 0
return d2
return d
dictj = copy_and_modify(dicti)
pprint(dictj)
A recursive solution is going to be more intuitive. You want something like the following pseudocode:
def copy(dict):
new_dict = {}
for key, value in dict:
if value is a dictionary:
new_dict[key] = copy(value)
else if key == 'offset' and value > 50:
new_dict[key] = 0
else:
new_dict[key] = value
return new_dict
d = {'files':
{'a':{'offset':100, 'start': 0},
'b':{
'c':{'offset':50, 'start':0},
'd':{'offset':70, 'start':0}
},
'e':{
'f':{'offset':80, 'start':0},
'g':{'offset':30, 'start':0},
'h':{'offset':20, 'start':0}
}
}
}
def transform(item):
new_item = item.copy() # consider usage of deepcopy if needed
if new_item['offset'] == 80:
new_item['offset'] = 'CHANGED'
return new_item
def visit(item):
if item.get('offset'):
return transform(item)
else:
return {k: visit(v) for k, v in item.items()}
result = visit(d)
print(result)
Output:
{
'files': {
'b': {
'd': {
'offset': 70,
'start': 0
},
'c': {
'offset': 50,
'start': 0
}
},
'e': {
'g': {
'offset': 30,
'start': 0
},
'h': {
'offset': 20,
'start': 0
},
'f': {
'offset': 'CHANGED',
'start': 0
}
},
'a': {
'offset': 100,
'start': 0
}
}
}
You can revise some links regarding stuff which is used in the answer:
Recursion
Visitor pattern
You could call a recursive function to change its value once condition is met:
dicti = {'files':
{'a':{'offset':100, 'start': 0},
'b':{
'c':{'offset':50, 'start':0},
'd':{'offset':70, 'start':0}
},
'e':{
'f':{'offset':80, 'start':0},
'g':{'offset':30, 'start':0},
'h':{'offset':20, 'start':0}
}
}
}
def dictLoop(dt):
for k, v in dt.items():
if isinstance(v, int):
if k == 'offset' and v > 50:
dt[k] = 0
else: dictLoop(v)
return dt
print dictLoop(dicti)
Related
I have the below JSON of forum posts.
What would be the pythonic way of creating a resulting JSON of aggregated Positive/Negative ratings per forum?
Input Json:
{"Posting_Stats":{
"Posts":[
{
"Date":"2020-03-29 12:41:00",
"Forum":"panorama",
"Positive":2,
"Negative":0
},
{
"Date":"2020-03-29 12:37:00",
"Forum":"web",
"Positive":6,
"Negative":0
},
{
"Date":"2020-03-29 12:37:00",
"Forum":"web",
"Positive":2,
"Negative":2
},...]}
Output should be:
{"Forum_Stats" : [{"Forum" : "panorama",
"Positive":2,
"Negative":0},
{"Forum" : "web",
"Positive":8,
"Negative":2},...]
}
]
Cannot think of a different way:
posts = inputData['Posting_Stats']['Posts']
postAggregator = {}
for post in posts:
try:
postAggregator[post['Forum']]['Positive'] += post.get('Positive',0)
postAggregator[post['Forum']]['Negative'] += post.get('Negative',0)
except KeyError:
postAggregator.update({post['Forum']:{"Positive":post.get('Positive',0), "Negative":post.get('Negative',0)}})
outputData = {"Forum_Stats": []}
for key, value in postAggregator.items():
outputData['Forum_Stats'].append({"Forum":key , "Positive":value['Positive'],"Negative":value['Negative']})
print(outputData)
Output:
{'Forum_Stats': [{'Forum': 'panorama', 'Positive': 2, 'Negative': 0}, {'Forum': 'web', 'Positive': 8, 'Negative': 2}]}
This may be one way of solving:
#taking the input in a dictionary
d = {"Posting_Stats":{
"Posts":[
{
"Date":"2020-03-29 12:41:00",
"Forum":"panorama",
"Positive":2,
"Negative":0
},
{
"Date":"2020-03-29 12:37:00",
"Forum":"web",
"Positive":6,
"Negative":0
},
{
"Date":"2020-03-29 12:37:00",
"Forum":"web",
"Positive":2,
"Negative":2
}]}}
#iterating over the values to get their some on the basis of forum as key
temp = {}
for i in d.get('Posting_Stats').get('Posts'):
if temp.get(i.get('Forum')) == None:
temp[i.get('Forum')] = {}
temp[i.get('Forum')]['Positive'] = 0
temp[i.get('Forum')]['Negative'] = 0
temp[i.get('Forum')]['Positive']+=i.get('Positive')
temp[i.get('Forum')]['Negative']+=i.get('Negative')
Finally converting the output into the required format
output = [{'Forum': i , **temp[i] } for i in temp]
print(output)
#[{'Forum': 'panorama', 'Positive': 2, 'Negative': 0},
#{'Forum': 'web', 'Positive': 8, 'Negative': 2}]
I have a dictionary like this
d = {
'Benefits': {
1: {
'BEN1': {
'D': [{'description': 'D1'}],
'C': [{'description': 'C1'}]
}
},
2: {
'BEN2': {
'D': [{'description': 'D2'}],
'C': [{'description': 'C2'}]
}
}
}
}
I am trying to sort dictionary based on KEY OF LAST VALUES(LIST).
FOR EXAMPLE
I am looking for get dictionary value like 'C' IN first and 'D' in second
I'm trying to get correct order. Here is code:
d1 = collections.OrderedDict(sorted(d.items()))
Unfortunately didn't get correct result
This is my expected output
{'Benefits':
{1:
{'BEN1':
{'C':[{'description': 'C1'}], 'D': [{'description': 'D1'}]
}
},
2:
{'BEN2':
{'C': [{'description': 'C2'}], 'D': [{'description': 'D2'}]
}
}
}
}
I am using python 3.5 . I am trying to get order like this
{'C':[{'description': 'C1'}], 'D': [{'description': 'D1'}]}
The following code will sort any dictionary by its key and recursively sort any dictionary value that is also a dictionary by its key and makes no assumption as to the content of the dictionary being sorted. It uses an OrderedDict but if you can be sure it will always run on Python 3.6 or greater, a simple change can be made to use a dict.
from collections import OrderedDict
d = {
'Benefits': {
1: {
'BEN1': {
'D': [{'description': 'D1'}],
'C': [{'description': 'C1'}]
}
},
2: {
'BEN2': {
'D': [{'description': 'D2'}],
'C': [{'description': 'C2'}]
}
}
}
}
def sort_dict(d):
items = [[k, v] for k, v in sorted(d.items(), key=lambda x: x[0])]
for item in items:
if isinstance(item[1], dict):
item[1] = sort_dict(item[1])
return OrderedDict(items)
#return dict(items)
print(sort_dict(d))
See demo
d1 = collections.OrderedDict(sorted(d.items()))
This is not working because it is sorting only on the Benefits item. Here you want to sort inner items, so we have to reach the inner items and sort them.
d1 = {'Benefits': {}}
for a_benefit in d['Benefits']:
d1['Benefits'][a_benefit] = {}
for a_ben in d['Benefits'][a_benefit]:
d1['Benefits'][a_benefit][a_ben] = dict(collections.OrderedDict(sorted(d['Benefits'][a_benefit][a_ben].items())))
Say I have the following list of dictionaries:
x = [{
'218': {
'text': 'profit',
'start': 0,
'end': 21
}
}, {
'312': {
'text': 'for',
'start': 30,
'end': 60
}
}, {
'350': {
'text': 'year',
'start': 70,
'end': 85
}
}, {
'370': {
'text': 'next column',
'start': 120,
'end': 130
}
}, {
'385': {
'text': 'next_column',
'start': 160,
'end': 169
}
}]
I want to merge some of the dictionaries, condition is whenever the end of first dict and the start of next dict have a difference less than 20 than I need to merge all the dict, and concatenate all the text.
The output should look like this:
x_new = [{
'218,312,350': {
'text': 'profit for year',
'start': 0,
'end': 85
}
}, {
'370': {
'text': 'next column',
'start': 120,
'end': 130
}
}, {
'385': {
'text': 'next_column',
'start': 160,
'end': 169
}
}]
I have already solved it with the basic approach, but it does not look good, is there any solution using itertools or something like that?
What i have tried
x_updated=sorted(x, key=lambda x: x.values()[0])
final_merge=[]
merge=[]
for first, second in zip(x_updated, x_updated[1:]):
if abs(second.values()[0]['start']-first.values()[0]['end'])<25:
print "its belong to the same column"
merge=merge+[first.keys()[0]]
else:
merge=merge+[first.keys()[0]]
final_merge=final_merge+[merge]
merge=[]
merge=merge+[second.keys()[0]]
final_merge=final_merge+[merge]
And once i have final_merge, which tells me which value to merge its easy to add the values. but for the above code is there any simple way.Also, in the end after the loop i manually added the last dict because in my situation the last one would always be a different column, but what if it belongs to the same?
This is what I would do:
first I would make some helper functions:
def merge(d1, d2):
return {",".join([list(d1)[0], list(d2)[0]]): {'text': " ".join([list(d1.values())[0]['text'], list(d2.values())[0]['text']]), 'start': list(d1.values())[0]['start'], 'end': list(d2.values())[0]['end']}}
def should_merge(d1, d2):
if (d1 is None) or (d2 is None):
return False
return abs(list(d1.values())[0]['end'] - list(d2.values())[0]['start']) < 20
The first function merges two dictionaries
The second returns True if two dictionaries should merge.
All that's left is the actual merge function:
from itertools import zip_longest
def merged_dicts(x):
actual_merge = []
last_merged = False
for d1, d2 in zip_longest(x, x[1:], fillvalue=None):
if should_merge(d1, d2) and last_merged:
actual_merge.append(merge(actual_merge.pop(), d2))
elif should_merge(d1, d2):
actual_merge.append(merge(d1, d2))
last_merged = True
elif last_merged:
last_merged = False
else:
actual_merge.append(d1)
last_merged = False
print(actual_merge)
That is a little more readable though it doesn't use any "fancy" itertool functions.
I would also consider changing the id of the dict to be inside the inner dict:
d= {'id': '385',
'text': 'next_column',
'start': 160,
'end': 169
}
That is a little less complicated and cleaner.
I would create a class for these objects you use:
class my_dict:
__init__(self, id, text, start, end):
self.id = id
self.text = text
self.start = start
self.end = end
merge(self, other):
self.id = "{},{}".format(self.id, other.id)
self.text = "{} {}".format(self.text, other.text)
self.end = other.end
And then the main code loop will be:
x_new = [x[0]]
for obj in x[1:]:
last = x_new[-1]
if obj.start - last.end > 20:
x_new.append(obj)
else:
last.merge(obj)
Try this:
x=[{'218':{'text':'profit','start':0,'end':21}},
{'312':{'text':'for','start':30,'end':60}},
{'350':{'text':'year','start':70,'end':85}},
{'370':{'text':'next column','start':120,'end':130}},
{'385':{'text':'next_column','start':160,'end':169}}]
x_new = []
d_keys = []
first_start_value = 0
def merge_dict(d_keys,x,i,first_start_value,current_index_dict_key):
# remove duplicate list of string
d_keys = list(set(d_keys))
# sort list by number
d_keys.sort(key=int)
new_key = ','.join(d_keys)
# update start value
x[i][current_index_dict_key]['start'] = first_start_value
dict1 = {new_key: x[i][current_index_dict_key]}
return dict1
for i in range(0,len(x)):
current_index_dict_key = list(x[i].keys())[0]
#check next index of list is valid
if i+1 > len(x)-1:
if len(d_keys) > 0:
# merge dictionary
dict1 = merge_dict(d_keys, x, i, first_start_value, current_index_dict_key)
x_new.append(dict1)
break
dict1 = {current_index_dict_key: x[i][current_index_dict_key]}
x_new.append(dict1)
break
next_index_dict_key = list(x[i+1].keys())[0]
start = x[i+1][next_index_dict_key]['start']
end = x[i][current_index_dict_key]['end']
diff = start - end
#compare current and next list of dicstionary end and start value
if diff < 20:
if len(d_keys) <= 0 and i == 1:
first_start_value = x[i][current_index_dict_key]['start']
d_keys.append(current_index_dict_key)
d_keys.append(next_index_dict_key)
else:
if len(d_keys) > 0:
# merge dictionary
dict1 = merge_dict(d_keys,x,i,first_start_value,current_index_dict_key)
d_keys = []
first_start_value = x[i][current_index_dict_key]['start']
else:
dict1 = {current_index_dict_key: x[i][current_index_dict_key]}
x_new.append(dict1)
print(x_new)
O/P:
[
{
'218,312,350': {
'text': 'year',
'start': 0,
'end': 85
}
},
{
'370': {
'text': 'next column',
'start': 120,
'end': 130
}
},
{
'385': {
'text': 'next_column',
'start': 160,
'end': 169
}
}
]
If I have a python dictionary like the following:
conf = {
'memory': {
'alarm': {
'active': 'yes',
'pagefile_error': {
'active':'no'
}
}
},
'disk': {
'alarm':{
'active':'yes',
'fixed':{
'#dev':{
'active':'yes',
'something':'else'
}
}
}
},
'cpu': {
'alarm': {
'active':'no',
'highcpu': {
'active':'yes'
}
}
}
}
how can I filter only the paths that end in 'active':'yes' and not show any other info.
In addition, for parent items that show up as active: no, I would want to disregard whatever comes after those.
conf = {
'memory': {
'alarm': {
'active': 'yes'
}
},
'disk' : {
'alarm':{
'active':'yes',
'fixed': {
'#dev': {
'active':'yes'
}
}
}
}
}
I don't have any working code for this yet as I'm not sure where to start. all I have at the moment is the starting dictionary.
Using recursion :
def keep_active_only(my_dict):
result_dict = {}
for key, value in my_dict.items():
# If there is embedded dict
if isinstance(value, dict):
# Compute the embedded dict using recursion
result_subdict = keep_active_only(value)
# Keeping result only if not empty
if result_subdict:
result_dict[key] = result_subdict
# Keep active key if value is yes
elif key == "active" and value == "yes":
result_dict[key] = value
# Returns empty dict if active is no
elif key == "active" and value == "no":
return {}
return result_dict
Output :
>>> keep_active_only(conf)
{
'disk': {
'alarm': {
'active': 'yes',
'fixed': {
'#dev': {
'active': 'yes'
}
}
}
},
'memory': {
'alarm': {
'active': 'yes'
}
}
}
You can use recursion:
def active(d):
_r, _flag = [], False
for a, b in d.items():
if a == 'active' and not _flag:
_r.append(b == 'yes')
_flag = True
if not _flag and isinstance(b, dict):
_r.append(active(b))
return all(_r)
def build(d, flag = False):
return {a:b if not isinstance(b, dict) else build(b, 'active' in b)
for a, b in d.items() if ((not isinstance(b, dict) and not flag) or a == 'active') or (isinstance(b, dict) and active(b))}
import json
print(json.dumps(build(conf), indent=4))
Output:
{
"memory": {
"alarm": {
"active": "yes"
}
},
"disk": {
"alarm": {
"active": "yes",
"fixed": {
"#dev": {
"active": "yes"
}
}
}
}
}
Not sure if I understand correctly, but here is a function that discards all data from the dict that does not take you to a particular key and value:
def filter_dict(d, key, value):
new_dict = {}
for d_key, d_value in d.items():
if d_key == key and d_value == value:
new_dict[d_key] = d_value
elif isinstance(d_value, dict):
child = filter_dict(d_value, key, value)
if child:
new_dict[d_key] = child
return new_dict
Here is how you would use it in your example:
from pprint import pprint
conf = {
'memory': {
'alarm': {
'active': 'yes',
'pagefile_error': {
'active':'no'
}
}
},
'disk': {
'alarm': {
'active': 'yes',
'fixed': {
'#dev': {
'active': 'yes',
'something': 'else'
}
}
}
}
}
pprint(filter_dict(conf, 'active', 'yes'))
# {'disk': {'alarm': {'active': 'yes', 'fixed': {'#dev': {'active': 'yes'}}}},
# 'memory': {'alarm': {'active': 'yes'}}}
Let's say I have a dict like
{
"key_a": "value",
"key_b": {
"key_b_a": "value",
"key_b_b": {
"key_b_b_a": "value"
}
}
}
What I want is to create a method to delete the given key or change its value.
def del_key(key):
my_dict = <dictionary described above>
keys = key.split(':')
if len(keys) == 1:
del my_dict[keys[0]]
elif len(keys) == 2:
del my_dict[keys[0]][keys[1]]
elif len(keys) == 3:
del my_dict[keys[0]][keys[1]][keys[2]]
. . .
del_key('key_b:key_b_b:key_b_b_a')
del_key('key_b:key_b_b')
del_key('key_a')
How can I do this gracefully?
It assumes your input is valid key,otherwise you have to check.
data = {
"key_a": "value",
"key_b": {
"key_b_a": "value",
"key_b_b": {
"key_b_b_a": "value"
}
}
}
def del_key(key):
key = key.split(':')
temp = data
for i in key[:-1]:
temp = temp[i]
del temp[key[-1]]
return data
print del_key('key_b:key_b_b:key_b_b_a')
print del_key('key_b:key_b_b')
print del_key('key_a')
output:
{'key_a': 'value', 'key_b': {'key_b_a': 'value', 'key_b_b': {}}}
{'key_a': 'value', 'key_b': {'key_b_a': 'value'}}
{'key_b': {'key_b_a': 'value'}}