Change value of python nested dictionary with variable path? - python

How can I pass a dictionary path as an argument? I thought maybe it had something to do with *args or **kwargs but I didn't understand how to use them for this
dictionary = {
'key1': {
'attribute1': 'green',
},
'attribute2': 5
}
def SaveToFile(target, value):
with open('savefile.json', 'r') as savefile:
dictionary = json.load(savefile)
dictionary[target] = value
with open('savefile.json', 'w') as savefile:
json.dump(dictionary, savefile)
SaveToFile('["key1"]["attribute1"]', 'blue')
SaveToFile('["attribute2"]', 10)
print(dictionary)
desired output:
{
'key1': {
'attribute1': 'blue'
},
'attribute2': 10
}

use regex and recursion to solve this
dictionary = {
'key1': {
'attribute1': 'green',
},
'attribute2': 5
}
import re
def update_dict(d_, val, *keys):
if not keys:
return {}
key = keys[0]
if isinstance(d_[key], dict):
d_[key].update(update_dict(d_[key], val, *keys[1:]))
else:
if key in d_:
d_[key]= val
return d_
def ChangeValue(target, value):
keys = filter(lambda x: bool(x), re.split('\[\"(.*?)\"\]', target))
update_dict(dictionary, value, *keys)
ChangeValue('["key1"]["attribute1"]', 'blue')
ChangeValue('["attribute2"]', 10)
dictionary
# output {'key1': {'attribute1': 'blue'}, 'attribute2': 10}

Related

Python Dictionary comprehension with condition

Suppose that I have a dict named data like below:
{
001: {
'data': {
'fruit': 'apple',
'vegetable': 'spinach'
},
'text': 'lorem ipsum',
'status': 10
},
002: {
.
.
.
}
}
I want to flatten(?) the data key and convert it to this:
{
001: {
'fruit': 'apple',
'vegetable': 'spinach',
'text': 'lorem ipsum',
'status': 10
},
002: {
.
.
.
}
}
I am trying to achieve this using dict comprehensions. Below implementation is with for loops:
mydict = {}
for id, values in data.items():
mydict[id] = {}
for label, value in values.items():
if label == 'data':
for x, y in value.items():
mydict[id][x] = y
else:
mydict[id][label] = value
I tried below comprehension but it gives syntax error:
mydict = {
id: {x: y} for x, y in value.items() if label == 'data' else {label: value}
for id, values in data.items() for label, value in values.items()}
Is there a way to achieve this using comprehensions only?
With dict expansions:
mydict = {i:{**v['data'], **{k:u for k, u in v.items() if k != "data"}} for i, v in data.items()}
The if clause in a comprehension (dict, list, set, generator) applies to the iteration itself, it can not be used for the production. For that you need conditionals in the production.
Generally speaking, comprehensions are really a reorganisation of a specific kind of (possibly nested) iterations:
a bunch of iterations and conditions, possibly nested
a single append/set
So
for a in b:
if c:
for d in e:
for f in g:
if h:
thing.append(i)
can be comprehension-ified, just move the production (i) to the head and put the other bits in a flat sequence:
thing = [
i
for a in b
if c
for d in e
for f in g
if h
]
Now your comprehension makes no sense, because it starts with iterating value, and there's no else in comprehension filter, and even if we add parens {x: y} for x, y in value.items() is not a value. Comprehensions also do not "merge" items, so with:
mydict = {
id: {label: value}
for id, values in data.items() for label, value in values.items()
}
Well you'll get only the last {label: value} for each id, because that's how dicts work.
Here if you consider the production loop, it's this:
for id, values in data.items():
mydict[id] = {}
This means that is your dict comprehension:
mydict = {
id: {}
for id, values in data.items()
}
the rest of the iteration is filling the value, so it needs to be a separate iteration inside the production:
mydict = {
id: {
label: value ???
for label, value in values.items()
}
for id, values in data.items()
}
In which case you hit the issue that this doesn't quite work, because you can't "conditionally iterate" in comprehensions, it's all or nothing.
Except you can: the right side of in is a normal expression, so you can do whatever you want with it, meaning you can unfold-or-refold:
mydict = {
id: {
x: y
for label, value in values.items()
for x, y in (value.items() if label == 'data' else [(label, value)])
}
for id, values in data.items()
}
This is a touch more expensive in the non-data case as you need to re-wrap the key and value in a tuple and list, but that's unlikely to be a huge deal.
An other alternative, instead of using a conditional comprehension, is to use splatting to merge the two dicts (once of which you create via a comp):
mydict = {
id: {
**values['data'],
**{label: value for label, value in values.items() if label != 'data'}
}
for id, values in data.items()
}
This can also be applied to the original to simplify it:
mydict = {}
for id, values in data.items():
mydict[id] = {}
for label, value in values.items():
if label == 'data':
mydict[id].update(value)
else:
mydict[id][label] = value
let me simplify;
sample_data = {
"001": {
"data": {
"fruit": 'apple',
"vegetable": 'spinach'
},
"text": 'lorem ipsum',
"status": 10
},
"002": {
"data": {
"fruit": 'apple',
"vegetable": 'spinach'
},
"text": 'lorem ipsum',
"status": 10
}
}
for key, row in sample_data.items():
if 'data' in row.keys():
info = sample_data[key].pop('data')
sample_data[key] = {**row, **info}
print(sample_data)

Building dictionary dynamically from json config

I have an input list from users and a standard config. Only user_input can change. based on the user_input , would need to select only required data in a dictionary. ie Most of the config would remain as it is, just fruits are filtered based on user_input.
user_input = ['Apple','Grapes','Watermelon']
superset_config = """
[
{
"input":"source_1",
"operation":"add",
"fruits": {
"Apple":"Red",
"Grapes": ["Red","Yellow"],
"Orange": "Yellow"
},
"output":"target_1"
},
{
"input":"source_2",
"fruits": { "Watermelon":"green"},
"output":"target_2"
}
]
"""
Desired results: just remove 'Orange' from fruits, as Orange is not part of user input.rest everything is same.
[
{
"input":"source_1",
"operation":"add",
"fruits": {
"Apple":"Red",
"Grapes": ["Red","Yellow"]
},
"output":"target_1"
},
{
"input":"source_2",
"fruits": { "Watermelon":"green"},
"output":"target_2"
}
]
Transform:
import json
superset_definitions = json.loads(superset_config)
superset_definitions
filtered_common_defintion = []
for each_input in user_input:
for each_node in superset_definitions:
if each_input in each_node['fruits'].keys():
temp_dictionary = {}
temp_dictionary[each_input] = each_node['fruits'][each_input]
filtered_common_defintion.append(temp_dictionary)
filtered_common_defintion
The above code performs filter on fruits, but I am not sure how to capture remaining elements of the config. Can someone please guide?
You can use json.load to convert JSON string to a python dictionary, then iterate the list of the dictionary, and create a temporary dictionary to hold the values, if the key is fruits take only the key in user_input, and corresponding values from the dictionary, otherwise, just store it in temporary dictionary, finally, append each such dictionary to a resulting list:
result = []
for d in json.loads(superset_config):
temp = {}
for k in d:
if k=='fruits':
fruits = {key:value for key,value in d[k].items() if key in user_input}
temp[k] = fruits
else:
temp[k] = d[k ]
result.append(temp)
OUTPUT:
[{'input': 'source_1',
'operation': 'add',
'fruits': {'Apple': 'Red',
'Grapes': ['Red', 'Yellow']
},
'output': 'target_1'},
{'input': 'source_2',
'fruits': {'Watermelon': 'green'
},
'output': 'target_2'}]
You can use Python's Dictionary Comprehension
for each in superset_definitions:
each['fruits'] = {k: v for k, v in each['fruits'].items() if k in user_input}
Here is another approach for the same:
import json
import copy
user_input = ['Apple','Grapes','Watermelon']
superset_config = """
[
{
"input":"source_1",
"operation":"add",
"fruits": {
"Apple":"Red",
"Grapes": ["Red","Yellow"],
"Orange": "Yellow"
},
"output":"target_1"
},
{
"input":"source_2",
"fruits": { "Watermelon":"green"},
"output":"target_2"
}
]
"""
config = json.loads(superset_config)
for item in config:
for fruit_name, fruit_value in list(item["fruits"].items()):
if fruit_name not in user_input:
del item["fruits"][fruit_name]
print (config)
Output:
[{'input': 'source_1', 'operation': 'add', 'fruits': {'Apple': 'Red', 'Grapes': ['Red', 'Yellow']}, 'output': 'target_1'}, {'input': 'source_2', 'fruits': {'Watermelon': 'green'}, 'output': 'target_2'}]

python itertools groupby return tuple

I need to parse the flatten structure and create nested structure using the list of keys provided. I have solved the problem but I am looking for an improvement and I would like to learn what I can change in my code. Can somebody review it and refactor using better knowledge?
src_data = [
{
"key1": "XX",
"key2": "X111",
"key3": "1aa",
"key4": 1
},
{
"key1": "YY",
"key2": "Y111",
"key3": "1bb",
"key4": 11
},
{
"key1": "ZZ",
"key2": "Z111",
"key3": "1cc",
"key4": 2.4
},
{
"key1": "AA",
"key2": "A111",
"key3": "1cc",
"key4": 33333.2122
},
{
"key1": "BB",
"key2": "B111",
"key3": "1bb",
"key4": 2
},
]
this is my code I developed so far creating the final result.
def plant_tree(ll):
master_tree = {}
for i in ll:
tree = master_tree
for n in i:
if n not in tree:
tree[n] = {}
tree = tree[n]
return master_tree
def make_nested_object(tt, var):
elo = lambda l: reduce(lambda x, y: {y: x}, l[::-1], var)
return {'n_path': tt, 'n_structure': elo(tt)}
def getFromDict(dataDict, mapList):
return reduce(operator.getitem, mapList, dataDict)
def set_nested_item(dataDict, mapList, val):
"""Set item in nested dictionary"""
reduce(getitem, mapList[:-1], dataDict)[mapList[-1]] = val
return dataDict
def update_tree(data_tree):
# MAKE NESTED OBJECT
out = (make_nested_object(k, v) for k,v, in res_out.items())
for dd in out:
leaf_data = dd['n_structure']
leaf_path = dd['n_path']
data_tree = set_nested_item(data_tree, leaf_path, getFromDict(leaf_data, leaf_path))
return data_tree
this is the customed itemgeter function from this question
def customed_itemgetter(*args):
# this handles the case when one key is provided
f = itemgetter(*args)
if len(args) > 2:
return f
return lambda obj: (f(obj),)
define the nesting level
nesting_keys = ['key1', 'key3', 'key2']
grouper = customed_itemgetter(*nesting_keys)
ii = groupby(sorted(src_data, key=grouper), grouper)
res_out = {key: [{k:v for k,v in i.items() if k not in nesting_keys} for i in group] for key,group in ii}
#
ll = ([dd[x] for x in nesting_keys] for dd in src_data)
data_tree = plant_tree(ll)
get results
result = update_tree(data_tree)
How can I improve my code?
If the itemgetter [Python-doc] is given a single element, it returns that single element, and does not wrap it in a singleton-tuple.
We can however construct a function for that, like:
from operator import itemgetter
def itemgetter2(*args):
f = itemgetter(*args)
if len(args) > 2:
return f
return lambda obj: (f(obj),)
then we can thus use the new itemgetter2, like:
grouper = itemgetter2(*ll)
ii = groupby(sorted(src_data, key=grouper), grouper)
EDIT: Based on your question however, you want to perform multilevel grouping, we can make a function for that, like:
def multigroup(groups, iterable, index=0):
if len(groups) <= index:
return list(iterable)
else:
f = itemgetter(groups[index])
i1 = index + 1
return {
k: multigroup(groups, vs, index=i1)
for k, vs in groupby(sorted(iterable, key=f), f)
}
For the data_src in the question, this then generates:
>>> multigroup(['a', 'b'], src_data)
{1: {2: [{'a': 1, 'b': 2, 'z': 3}]}, 2: {3: [{'a': 2, 'b': 3, 'e': 2}]}, 4: {3: [{'a': 4, 'x': 3, 'b': 3}]}}
You can post-process the values in the list(..) call however. We can for example generate dictionaries without the elements in the grouping columns:
def multigroup(groups, iterable):
group_set = set(groups)
fs = [itemgetter(group) for group in groups]
def mg(iterable, index=0):
if len(groups) <= index:
return [
{k: v for k, v in item.items() if k not in group_set}
for item in iterable
]
else:
i1 = index + 1
return {
k: mg(vs, index=i1)
for k, vs in groupby(sorted(iterable, key=fs[index]), fs[index])
}
return mg(iterable)
For the given sample input, we get:
>>> multigroup(['a', 'b'], src_data)
{1: {2: [{'z': 3}]}, 2: {3: [{'e': 2}]}, 4: {3: [{'x': 3}]}}
or for the new sample data:
>>> pprint(multigroup(['key1', 'key3', 'key2'], src_data))
{'AA': {'1cc': {'A111': [{'key4': 33333.2122}]}},
'BB': {'1bb': {'B111': [{'key4': 2}]}},
'XX': {'1aa': {'X111': [{'key4': 1}]}},
'YY': {'1bb': {'Y111': [{'key4': 11}]}},
'ZZ': {'1cc': {'Z111': [{'key4': 2.4}]}}}

generating config styled file from list of dictionaries in most pythonic way

I have a list of dictionaries as,
[{'section_id': 1,
'parent_sec_id': 0,
'sec_name': 'apple',
'key1': 'val1'},
{'section_id': 2,
'parent_sec_id': 0,
'sec_name': 'banana',
'key2': 'val2'},
{'section_id': 3,
'parent_sec_id': 1,
'sec_name': 'orange',
'key3': 'val3'},
{'section_id': 4,
'parent_sec_id': 2,
'sec_name': 'guava',
'key4': 'val4'},
{'section_id': 5,
'parent_sec_id': 3,
'sec_name': 'grape',
'key5': 'val5'}]
Each dictionary has an identifier for the dictionaries as 'section_id' and also a key as 'parent_section_id' which tells whether its a child dictionary of any other dictionary. So basically, if the parent_section_id is set to 0 (zero) then its a parent dictionary otherwise its the child of the dictionary mentioned with that section id.
Now from the above list of dictionaries, I was asked to achieve the following format (yes i was asked, part of interview):
apple
{
'key1': 'val1'
orange
{
'key3': 'val3'
grape
{
'key5': 'val5'
}
}
}
banana
{
'key2': 'val2'
guava
{
'key4': 'val4'
}
}
I was told this is the format used to write config files for any program.
I'm just curious as to what could have been the best possible way to generate a file from this list of dictionaries.
You can recursively output sections whose parent_sec_id matches the given parent ID, with the output from the children indented:
def transform(sections, parent=0):
output = []
indent = ' ' * 4
for section in sections:
if section['parent_sec_id'] == parent:
output.extend((section['sec_name'], '{'))
for key, value in section.items():
if key not in ('section_id', 'parent_sec_id', 'sec_name'):
output.append("%s'%s': '%s'" % (indent, key, value))
output.extend(indent + line for line in transform(sections, section['section_id']))
output.append('}')
return output
Assuming your sample list of dicts is stored as variable sections, then '\n'.join(transform(sections)) would return:
apple
{
'key1': 'val1'
orange
{
'key3': 'val3'
grape
{
'key5': 'val5'
}
}
}
banana
{
'key2': 'val2'
guava
{
'key4': 'val4'
}
}
Not very elegant, but you can collect your items in a collections.defaultdict(), then output your dictionary paths to a new file.
The basic idea is to first collection your root parent ids with the value of 0, the add the proceeding child dictionaries to these roots. You can use the last value in each list for the parent id of the most recently added item.
Demo:
from collections import defaultdict
def group_sections(data, parent_id, section_id, root_id = 0):
"""Groups sections into dictionary of lists, connecting on parent keys"""
groups = defaultdict(list)
# Separate root and rest of children
roots = [dic for dic in data if dic[parent_id] == root_id]
children = [dic for dic in data if dic[parent_id] != root_id]
# Add roots first
for root in roots:
groups[root[section_id]].append(root)
# Append children next
for child in children:
for key, collection in list(groups.items()):
# Get most recently added child
recent = collection[-1]
# Only add child if equal to parent
if child[parent_id] == recent[section_id]:
groups[key].append(child)
# Filter out result dictionary to not include parent and section ids
return {
k1: [
{k2: v2 for k2, v2 in d.items() if k2 != parent_id and k2 != section_id}
for d in v2
]
for k1, v2 in groups.items()
}
def write_config_file(filename, data, name_key):
"""Write config file, using dictionary of lists"""
# Writes n tabs to string
tab_str = lambda n: "\t" * n
with open(filename, mode="w") as config_file:
for group in data.values():
tabs = 0
for dic in group:
for key in dic:
# Write name key
if key == name_key:
config_file.write(
"%s%s\n%s{\n" % (tab_str(tabs), dic[key], tab_str(tabs))
)
tabs += 1
# Otherwise write key-value pairs
else:
config_file.write(
"%s'%s': '%s'\n" % (tab_str(tabs), key, dic[key])
)
# Write ending curly braces
for i in range(tabs - 1, -1, -1):
config_file.write("%s}\n" % (tab_str(i)))
if __name__ == "__main__":
list_dicts = [
{"section_id": 1, "parent_sec_id": 0, "sec_name": "apple", "key1": "val1"},
{"section_id": 2, "parent_sec_id": 0, "sec_name": "banana", "key2": "val2"},
{"section_id": 3, "parent_sec_id": 1, "sec_name": "orange", "key3": "val3"},
{"section_id": 4, "parent_sec_id": 2, "sec_name": "guava", "key4": "val4"},
{"section_id": 5, "parent_sec_id": 3, "sec_name": "grape", "key5": "val5"},
]
data = group_sections(data=list_dicts, parent_id="parent_sec_id", section_id="section_id")
write_config_file(filename='config', data=data, name_key='sec_name')
config file:
apple
{
'key1': 'val1'
orange
{
'key3': 'val3'
grape
{
'key5': 'val5'
}
}
}
banana
{
'key2': 'val2'
guava
{
'key4': 'val4'
}
}
Note: This is an iterative solution, not a recursive one.

Create dict from list of list

I have a text file which I read in. This is a log file so it follows a particular pattern. I need to create a JSON ultimately, but from researching this problem, once it is in a dict it will be a matter of using json.loads() or json.dumps().
A sample of the text file is below.
INFO:20180606_141527:submit:is_test=False
INFO:20180606_141527:submit:username=Mary
INFO:20180606_141527:env:sys.platform=linux2
INFO:20180606_141527:env:os.name=ubuntu
The dict structure which I am ultimatly looking for is
{
"INFO": {
"submit": {
"is_test": false,
"username": "Mary"
},
"env": {
"sys.platform": "linux2",
"os.name": "ubuntu"
}
}
}
I am ignoring the timestamp information in each list for now.
This is a snippet of the code I am using,
import csv
tree_dict = {}
with open('file.log') as file:
for row in file:
for key in reversed(row.split(":")):
tree_dict = {key: tree_dict}
Which results in an undesired output,
{'INFO': {'20180606_141527': {'submit': {'os.name=posix\n': {'INFO': {'20180606_141527': {'submit': {'sys.platform=linux2\n': {'INFO': {'20180606_141527': {'submit': {'username=a227874\n': {'INFO': {'20180606_141527': {'submit': {'is_test=False\n': {}}}}}}}}}}}}}}}}}
I need to dynamically populate the dict because I don't know the actual field/key names.
with open('demo.txt') as f:
lines = f.readlines()
dct = {}
for line in lines:
# param1 == INFO
# param2 == submit or env
# params3 == is_test=False etc.
param1, _, param2, params3 = line.strip().split(':')
# create dct[param1] = {} if it is not created
dct.setdefault(param1, {})
# create dct[param1][param2] = {} if it is no created
dct[param1].setdefault(param2, {})
# for example params3 == is_test=False
# split it by '=' and now we unpack it
# k == is_test
# v == False
k, v = params3.split('=')
# and update our `dict` with the new values
dct[param1][param2].update({k: v})
print(dct)
Output
{
'INFO': {
'submit': {
'is_test': 'False', 'username': 'Mary'
},
'env': {
'sys.platform': 'linux2', 'os.name': 'ubuntu'
}
}
}
This is one of the rare cases where recursion in Python seems to be appropriate and helpful. The following function adds a value to the hierarchical dictionary d specified by the list of keys:
def add_to_dict(d, keys, value):
if len(keys) == 1: # The last key
d[keys[0]] = value
return
if keys[0] not in d:
d[keys[0]] = {} # Create a new subdict
add_to_dict(d[keys[0]], keys[1:], value)
The function works with the dictionaries of arbitrary depth. The rest is just the matter of calling the function:
d = {}
for line in file:
keys, value = line.split("=")
keys = keys.split(":")
add_to_dict(d, keys, value.strip())
Result:
{'INFO': {'20180606_141527': {
'submit': {'is_test': 'False',
'username': 'Mary'},
'env': {'sys.platform': 'linux2',
'os.name': 'ubuntu'}}}}
You can modify the code to exclude certain levels (like the timestamp).
You could use a nested collections.defaultdict() here:
from collections import defaultdict
from pprint import pprint
d = defaultdict(lambda: defaultdict(dict))
with open('sample.txt') as in_file:
for line in in_file:
info, _, category, pair = line.strip().split(':')
props, value = pair.split('=')
d[info][category][props] = value
pprint(d)
Which gives the following:
defaultdict(<function <lambda> at 0x7ff8a341aea0>,
{'INFO': defaultdict(<class 'dict'>,
{'env': {'os.name': 'ubuntu',
'sys.platform': 'linux2'},
'submit': {'is_test': 'False',
'username': 'Mary'}})})
Note: defaultdict() is a subclass of the builtin dict, so their is not reason to convert it to dict in the end result. Additionally, defaultdict() can also be serialized to JSON with json.dumps().
You can use itertools.groupby:
import itertools, re
content = [re.split('\=|:', i.strip('\n')) for i in open('filename.txt')]
new_content = [[a, *c] for a, _, *c in content]
def group_vals(d):
new_d = [[a, [c for _, *c in b]] for a, b in itertools.groupby(sorted(d, key=lambda x:x[0]), key=lambda x:x[0])]
return {a:b[0][0] if len(b) ==1 else group_vals(b) for a, b in new_d}
import json
print(json.dumps(group_vals(new_content), indent=4))
Output:
{
"INFO": {
"env": {
"os.name": "ubuntu",
"sys.platform": "linux2"
},
"submit": {
"is_test": "False",
"username": "Mary"
}
}
}
Check for the presence of keys:
import csv
import json
tree_dict = {}
with open('file.log') as file:
tree_dict = {}
for row in file:
keys = row.split(":")
if keys[0] not in tree_dict:
tree_dict[keys[0]] = {}
if keys[-2] not in tree_dict[keys[0]]:
tree_dict[keys[0]][keys[-2]] = {}
key, value = keys[-1].split("=")
if value == "False":
value = False
if value == "True":
value = True
tree_dict[keys[0]][keys[-2]][key] = value
dumped = json.dumps(tree_dict)
import re
from functools import reduce
with open('file.txt') as f:
lines = f.readlines()
def rec_merge(d1, d2):
for k, v in d1.items():
if k in d2:
d2[k] = rec_merge(v, d2[k])
d3 = d1.copy()
d3.update(d2)
return d3
lst_of_tup = re.findall(r'^([^:]*):[\d_]+:([^:]*):([^=]*)=(.*)$', lines, re.MULTILINE)
lst_of_dct = [reduce(lambda x,y: {y:x}, reversed(t)) for t in lst_of_tup]
dct = reduce(rec_merge, lst_of_dct)
pprint(dct)
# {'INFO': {'env': {'os.name': 'ubuntu', 'sys.platform': 'linux2'},
# 'submit': {'is_test': 'False', 'username': 'Mary'}}}
Source :
import os
with open('file.log') as file:
tree_dict = {}
is_test = False
username = ""
sysplatform = ""
osname = ""
for row in file:
row = row.rstrip('\n')
for key in reversed(row.split(":")):
if not key.find('is_test'):
is_test = key.split('=')[1]
elif not key.find('username'):
username =key.split('=')[1]
elif not key.find('sys.platform'):
sysplatform = key.split('=')[1]
elif not key.find('os.name'):
osname = key.split('=')[1]
tree_dict = {
"INFO": {
"submit": {
"is_test": is_test,
"username": username
},
"env": {
"sys.platform": sysplatform,
"os.name": osname
}
}
}
print(tree_dict)
Result :
{'INFO': {'submit': {'is_test': 'False', 'username': 'Mary'}, 'env': {'sys.platform': 'linux2', 'os.name': 'ubuntu'}}}

Categories