How to set dictionary element with key as array? - python

If I wanted to use an array to get a value from a dictionary, I would do something like this:
def get_dict_with_arr(d, arr):
accumulator = d
for elem in arr:
accumulator = accumulator[elem]
return accumulator
and use it like this:
test_dict = {
'this': {
'is': {
'it': 'test'
}
}
}
get_dict_with_arr(test_dict, ['this', 'is', 'it']) # returns 'test'
My question is, how may I write a function that sets the value instead of getting it? Basically I want to write a set_dict_with_arr(d, arr, value) function.

Try:
def set_dict_with_arr(d, arr, value):
cur_d = d
for v in arr[:-1]:
cur_d.setdefault(v, {})
cur_d = cur_d[v]
cur_d[arr[-1]] = value
return d
test_dict = {"this": {"is": {"it": "test"}}}
test_dict = set_dict_with_arr(test_dict, ["this", "is", "it"], "new value")
print(test_dict)
Prints:
{"this": {"is": {"it": "new value"}}}

Related

Python: recursively append dictionary to another

I've searched and found this Append a dictionary to a dictionary but that clobbers keys from b if they exist in a..
I'd like to essentially recursively append 1 dictionary to another, where:
keys are unique (obviously, it's a dictionary), but each dictionary is fully represented in the result such that a.keys() and b.keys() are both subsets of c.keys()
if the same key is in both dictionaries, the resulting key contains a list of values from both, such that a[key] and b[key] are in c[key]
the values could be another dictionary, (but nothing deeper than 1 level), in which case the same logic should apply (append values) such that a[key1][key2] and b[key1][key2] are in c[key][key2]
The basic example is where 2 dictionary have keys that don't overlap, and I can accomplish that in multiple ways.. c = {**a, **b} for example, so I haven't covered that below
A trickier case:
a = {
"key1": "value_a1"
"key2": "value_a2"
}
b = {
"key1": "value_b1"
"key3": "value_b3"
}
c = combine(a, b)
c >> {
"key1": ["value_a1", "value_b1"],
"key2": "value_a2",
"key3": "value_b3"
}
An even trickier case
a = {
"key1": {
"sub_key_1": ["sub_value_a1", "sub_value_a2"],
"sub_key_2": "sub_value_a3"
},
"key2": "value_a2"
}
b = {
"key1": {
"sub_key_1": ["sub_value_a1", "sub_value_b1"],
"sub_key_2": "sub_value_b3"
},
"key3": "value_b3" # I'm okay with converting this to a list even if it's not one
}
c = combine(a, b)
c >> {
"key1": {
"sub_key_1": ["sub_value_a1", "sub_value_a2", "sub_value_b1"], #sub_value_a1 is not duplicated
"sub_key_2": ["sub_value_a3", "sub_value_b3"]
},
"key2": "value_a2",
"key3": "value_b3" # ["value_b3"] this would be okay, following from the code comment above
}
Caveats:
Python 3.6
The examples show lists being created as_needed, but I'm okay with every non-dict value being a list, as mentioned in the code comments
The values within the lists will always be strings
I tried to explain as best I could but can elaborate more if needed. Been working on this for a few days and keep getting stuck on the sub key part
There is no simple built-in way of doing this, but you can recreate the logic in python.
def combine_lists(a: list, b: list) -> list:
return a + [i for i in b if i not in a]
def combine_strs(a: str, b: str) -> str:
if a == b:
return a
return [a, b]
class EMPTY:
"A sentinel representing an empty value."
def combine_dicts(a: dict, b: dict) -> dict:
output = {}
keys = list(a) + [k for k in b if k not in a]
for key in keys:
aval = a.get(key, EMPTY)
bval = b.get(key, EMPTY)
if isinstance(aval, list) and isinstance(bval, list):
output[key] = combine_lists(aval, bval)
elif isinstance(aval, str) and isinstance(bval, str):
output[key] = combine_strs(aval, bval)
elif isinstance(aval, dict) and isinstance(bval, dict):
output[key] = combine_dicts(aval, bval)
elif bval is EMPTY:
output[key] = aval
elif aval is EMPTY:
output[key] = bval
else:
raise RuntimeError(
f"Cannot combine types: {type(aval)} and {type(bval)}"
)
return output
Sounds like you want a specialised version of dict. So, you could subclass it to give you the behaviour you want. Being a bit of a Python noob, I started with the answer here : Subclassing Python dictionary to override __setitem__
Then I added the behaviour in your couple of examples.
I also added a MultiValue class which is a subclass of list. This makes it easy to tell if a value in the dict already has multiple values. Also it removes duplicates, as it looks like you don't want them.
class MultiValue(list):
# Class to hold multiple values for a dictionary key. Prevents duplicates.
def append(self, value):
if isinstance(value, MultiValue):
for v in value:
if not v in self:
super(MultiValue, self).append(v)
else:
super(MultiValue, self).append(value)
class MultiValueDict(dict):
# dict which converts a key's value to a MultiValue when the key already exists.
def __init__(self, *args, **kwargs):
self.update(*args, **kwargs)
def __setitem__(self, key, value):
# optional processing here
if key in self:
existing_value = self[key]
if isinstance(existing_value, MultiValueDict) and isinstance(value, dict):
existing_value.update(value)
return
if isinstance(existing_value, MultiValue):
existing_value.append(value)
value = existing_value
else:
value = MultiValue([existing_value, value])
super(MultiValueDict, self).__setitem__(key, value)
def update(self, *args, **kwargs):
if args:
if len(args) > 1:
raise TypeError("update expected at most 1 arguments, "
"got %d" % len(args))
other = dict(args[0])
for key in other:
self[key] = other[key]
for key in kwargs:
self[key] = kwargs[key]
def setdefault(self, key, value=None):
if key not in self:
self[key] = value
return self[key]
Example 1:
a = {
"key1": "value_a1",
"key2": "value_a2"
}
b = {
"key1": "value_b1",
"key3": "value_b3"
}
# combine by creating a MultiValueDict then using update to add b to it.
c = MultiValueDict(a)
c.update(b)
print(c)
# gives {'key1': ['value_a1', 'value_b1'], 'key2': 'value_a2', 'key3': 'value_b3'}
Example 2: The value for key1 is created as a MultiValueDict and the value for the sub_key_1 is a MultiValue, so this may not fit what you're trying to do. It depends how you're building you data set.
a = {
"key1": MultiValueDict({
"sub_key_1": MultiValue(["sub_value_a1", "sub_value_a2"]),
"sub_key_2": "sub_value_a3"
}),
"key2": "value_a2"
}
b = {
"key1": MultiValueDict({
"sub_key_1": MultiValue(["sub_value_a1", "sub_value_b1"]),
"sub_key_2": "sub_value_b3"
}),
"key3": "value_b3" # I'm okay with converting this to a list even if it's not one
}
c = MultiValueDict(a)
c.update(b)
print(c)
# gives {'key1': {'sub_key_1': ['sub_value_a1', 'sub_value_a2', 'sub_value_b1'], 'sub_key_2': ['sub_value_a3', 'sub_value_b3']}, 'key2': 'value_a2', 'key3': 'value_b3'}
a = {
"key1": "value_a1",
"key2": "value_a2"
}
b = {
"key1": "value_b1",
"key3": "value_b3"
}
def appendValues(ax,cx):
if type(ax)==list:#is key's value in a, a list?
cx.extend(ax)#if it is a list then extend
else:#key's value in a, os not a list
cx.append(ax)#so use append
cx=list(set(cx))#make values unique with set
return cx
def combine(a,b):
c={}
for x in b:#first copy b keys and values to c
c[x]=b[x]
for x in a:#now combine a with c
if not x in c:#this key is not in c
c[x]=a[x]#so add it
else:#key exists in c
if type(c[x])==list:#is key's value in c ,a list?
c[x]=appendValues(a[x],c[x])
elif type(c[x])==dict:#is key's value in c a dictionary?
c[x]=combine(c[x],a[x])#combine dictionaries
else:#so key';'s value is not list or dict
c[x]=[c[x]]#make value a list
c[x]=appendValues(a[x],c[x])
return c
c = combine(a, b)
print(c)
print("==========================")
a = {
"key1": {
"sub_key_1": ["sub_value_a1", "sub_value_a2"],
"sub_key_2": "sub_value_a3"
},
"key2": "value_a2"
}
b = {
"key1": {
"sub_key_1": ["sub_value_a1", "sub_value_b1"],
"sub_key_2": "sub_value_b3"
},
"key3": "value_b3" # I'm okay with converting this to a list even if it's not one
}
c = combine(a, b)
print(c)

Call a dictionary recursively to find the last value

How can I call a dictionary recursively to find the last value, assuming dictionaries may have different depths?
a = {
'b': {
'c':'d'
}
}
m = {
'b': {
'c':{
'd':'e'
}
}
}
It's just two examples, I'm trying to get the last value, no matter how deep it's located.
The function doesn't work. How should I pass the final value when it get to the string type?
def get_value(x):
if isinstance(x, dict):
return get_value(x)
else:
return x.get(list(x.keys())[0])
Expected outputs are:
get_value(a) == 'd'
get_value(m) == 'e'
This seems to work:
a = {
'b': {
'c':'d'
}
}
m = {
'b': {
'c':{
'd':'e'
}
}
}
def get_value(x):
if not isinstance(x, dict):
return x
else:
return get_value(x[list(x.keys())[0]])
print (get_value(a))
print (get_value(m))
Output
d
e
def get_value(x):
for key in x.keys():
if isinstance(x[key], dict):
return get_value(x[key])
else:
return x.get(list(x.keys())[0])
The key had to be passed when recursive function was called again.

Flattening a dictionary of dictionaries that contain lists

I have a dictionary of dictionaries that looks like this:
data={'data': 'input',
'test':
{
'and':
{
'range': {'month': [{'start': 'Jan','end': 'July'}]},
'Student': {'Name': ['ABC'], 'Class': ['10']}
}
}
}
I need to flatten this dict into a dataframe.I tried to use json_normalize() to flatten the dictionary and the output I got looked like this:
My desired output is something like the one given below.
This can be done in R by using as.data.frame(unlist(data)) but I want to do the same flattening in Python. I am a novice in python so I dont have much idea about doing this.
I have made an attempt to normalize your json object by writing a recursive function as follows:
data={'data': 'input',
'test':
{
'and':
{
'range': {'month': [{'start': 'Jan','end': 'July'}]},
'Student': {'Name': ['ABC'], 'Class': ['10']}
}
}
}
sequence = ""
subDicts = []
def findAllSubDicts(data):
global subDicts
global sequence
for key, value in data.items():
sequence += key
#print(sequence)
if isinstance(value, str):
subDicts.append([sequence,value])
sequence = sequence[:sequence.rfind(".")+1]
#print(sequence)
elif isinstance(value, dict):
tempSequence = sequence[:sequence.rfind(".")+1]
sequence += "."
#print(sequence)
findAllSubDicts(value)
sequence = tempSequence
elif isinstance(value, list) and isinstance(value[0], dict):
sequence += "."
tempSequence = sequence[:sequence.rfind(".")+1]
#print(sequence)
findAllSubDicts(value[0])
sequence = tempSequence
elif isinstance(value, list) and len(value)==1:
tempSequence = sequence[:sequence.rfind(".")+1]
subDicts.append([sequence,value[0]])
sequence = tempSequence
return subDicts
outDict = findAllSubDicts(data)
for i in outDict:
print(i[0].ljust(40," "), end=" ")
print(i[1])
Printing the results will give you:
data input
test.and.range.month.start Jan
test.and.range.month.end July
test.and.Student.Name ABC
test.and.Student.Class 10
Notify me if you need any clarification or any modification in my code.

Make nested dictionary, unflatten

I have a list of lists containing key and value like so:
[
['mounts:device', '/dev/sda3'],
['mounts:fstype:[0]', 'ext1'],
['mounts:fstype:[1]', 'ext3']
]
Well I can easily change the list to this
(Lists arent seperated by ':')
[
['mounts:device', '/dev/sda3'],
['mounts:fstype[0]', 'ext1'],
['mounts:fstype[1]', 'ext3']
]
Whatever suits better for this problem:
Problem is to create a dictionary:
{
'mounts': {
'device': '/dev/sda3',
'fstype': [
'ext1',
'ext3'
]
}
It should also be possible to have lists in lists for example:
['mounts:test:lala:fstype[0][0]', 'abc']
or
['mounts:test:lala:fstype:[0]:[0]', 'abc']
This is what I have so far:
def unflatten(pair_list):
root = {}
for pair in pair_list:
context = root
key_list = pair[0].split(':')
key_list_last_item = key_list.pop()
for key in key_list:
if key not in context:
context[key] = {}
context = context[key]
context[key_list_last_item] = pair[1]
return root
Based on this answer https://stackoverflow.com/a/18648007/5413035 but as requested I need recursivness and lists in the mix
Thanks in advance
Here is a solution using a tree of dict:
import collections
def tree():
return collections.defaultdict(tree)
def unflatten(pair_list):
root = tree()
for mount, path in pair_list:
parts = mount.split(":")
curr = root
for part in parts[:-1]:
index = int(part[1:-1]) if part[0] == "[" else part
curr = curr[index]
part = parts[-1]
index = int(part[1:-1]) if part[0] == "[" else part
curr[index] = path
return root
With the following input:
pair_list = [
['mounts:device', '/dev/sda3'],
['mounts:fstype:[0]', 'ext1'],
['mounts:fstype:[1]', 'ext3'],
['mounts:test:lala:fstype:[0]:[0]', 'abc']
]
You'll get:
{
"mounts": {
"fstype": {
"0": "ext1",
"1": "ext3"
},
"test": {
"lala": {
"fstype": {
"0": {
"0": "abc"
}
}
}
},
"device": "/dev/sda3"
}
}
Then you can use the recursive function make_listbellow to turn the integer indexes in a list.
def make_list(root):
if isinstance(root, str):
return root
keys = list(root.keys())
if all(isinstance(k, int) for k in keys):
values = [None] * (max(keys) + 1)
for k in keys:
values[k] = make_list(root[k])
return values
else:
return {k: make_list(v) for k, v in root.items()}
Here is the result with the pair_list:
flat = unflatten(pair_list)
flat = make_list(flat)
You'll get:
{'mounts': {'device': '/dev/sda3',
'fstype': ['ext1', 'ext3'],
'test': {'lala': {'fstype': [['abc']]}}}}
Is it fine?
input1=[
['mounts:device', '/dev/sda3'],
['mounts:fstype:[0]', 'ext1'],
['mounts:fstype:[1]', 'ext3']
]
input2={x[1]:x[0].split(':')[1] for x in input1}
input3=['ext3', 'ext1', '/dev/sda3']
input4=['fstype', 'fstype', 'device']
res={}
for x,y in zip(input3, input4):
res.setdefault(y,[]).append(x)
res1=res.keys()
res2=res.values()
res3=[x[0] for x in res2 if len(x)==1]+[x for x in res2 if len(x)>1]
result=dict(zip(res1,res3))
print result
Output :
{'device': '/dev/sda3', 'fstype': ['ext3', 'ext1']}

Better way to filter the rows with python

import pprint
full_key_list = set(["F1", "F2", "F3", "F4", "F5"]) # all expected field
filt_key_list = set(["F2", "F5"]) # fields should not be included
cont_list = [] # stores all filtered documents
read_in_cont1 = { "F1" : 1, "F2" : True, "F3" : 'abc', "F4" : 130, "F5" : 'X1Z'} # document1
read_in_cont2 = { "F1" : 2, "F2" : False, "F3" : 'efg', "F4" : 100, "F5" : 'X4Z'} # document1
read_in_cont3 = { "F1" : 3, "F2" : True, "F3" : 'acd', "F4" : 400, "F5" : 'X2Z'} # document1
# assume that read_in_conts contains list of documents
read_in_conts = [read_in_cont1, read_in_cont2, read_in_cont3]
for one_item in read_in_conts: # for each document in the list
cont_dict = {}
for key, value in one_item.iteritems():
if key not in filt_key_list: # if the field should be included
cont_dict[key] = value # add this field to the temporary document
cont_list.append(cont_dict)
pprint.pprint(cont_list)
Output:
[{'F1': 1, 'F3': 'abc', 'F4': 130},
{'F1': 2, 'F3': 'efg', 'F4': 100},
{'F1': 3, 'F3': 'acd', 'F4': 400}]
Here is what I want to achieve:
Given an original raw collection of documents (i.e. read_in_conts for simulation),
I need to filter the fields so that they are not included in further process. Above
is my implementation in Python. However, I think it is too heavy and expect to see
a clean solution for this task.
Thank you
cont_list = [dict((k,v) for k,v in d.iteritems() if k not in filt_key_list)
for d in read_in_conts]
or if you want a slightly more factored version:
filter_out_keys = lambda d, x: dict((k,v) for k,v in d.iteritems() if k not in x)
cont_list = [filter_out_keys(d, filt_key_list) for d in read_in_conts]
P.S. I'd suggest making filt_key_list a set() instead - it will make in checks faster.
def filter_dict(d, keys):
return dict((key, value) for key, value in d.iteritems() if key not in filt_key_list))
cont_list = [filter_dict(d, filt_key_list) for d in read_in_conts]
You code is fine. You can make it slightly shorter:
# sets can be faster if `ignored_keys` is actually much longer
ignored_keys = set(["F2", "F5"])
# the inline version of your loop
# a dict comprehension inside a list comprehension
filtered = [{k : v for k,v in row.iteritems() if k not in ignored_keys}
for row in read_in_conts]

Categories