Flatten nested dictionary and overwrite values - python

I have a nested dictionary which I want to flatten while overwriting values of duplicate keys. Example input looks like this:
{
'abc': 1,
'foo': 2
'cba': {'abc': 3, 'baz': {
'foo': 4
}}
}
The goal is to overwrite values of keys in the top level dict with a value of the same key in a lower level dict, where the key in the lowest level dict is ruling.
and the output needs to be this:
{
'abc': 3,
'foo': 4,
'cba': {'abc': 3, 'baz': {
'foo': 4
}}
}
I was trying to find a solution on SO but couldn't find one... Hopefully someone can help me out :)

Not sure how robust this is, but I guess this is what you are looking for (credit to https://stackoverflow.com/a/6027615/5417511):
import collections
d = {
'abc': 1,
'foo': 2,
'cba': {'abc': 3, 'baz': {
'foo': 4
}}
}
def flatten(d):
items = []
for k, v in d.items():
if isinstance(v, collections.MutableMapping):
items.extend(flatten(v).items())
else:
items.append((k, v))
return dict(items)
d.update(flatten(d))
print(d)
{'abc': 3, 'foo': 4, 'cba': {'abc': 3, 'baz': {'foo': 4}}}

The code below finds the maximum depth value for each key and then recursively updates the original input:
from collections import defaultdict
data = {'abc': 1, 'foo': 2, 'cba': {'abc': 3, 'baz': {'foo': 4}}}
def levels(d, c = 0):
for a, b in d.items():
yield from [(c, a, b)] if not isinstance(b, dict) else levels(b, c+1)
l = defaultdict(dict)
for _l, a, b in levels(data):
l[a][_l] = b
def new_d(d):
return {a:new_d(b) if isinstance(b, dict) else l[a][max(l[a])] for a, b in d.items()}
result = new_d(data)
Output:
{'abc': 3, 'foo': 4, 'cba': {'abc': 3, 'baz': {'foo': 4}}}

you look probably for something like
ndir = {
'abc': 1,
'foo': 2,
'cba': {'abc': 3, 'baz': { 'foo': 4 }}
}
print(ndir)
res = {}
def GetContent(ndir):
for k, v in ndir.items():
if isinstance(v, dict):
GetContent(v)
else:
res[k]=v
GetContent(ndir)
print(res)

Related

Add a dictionary as a value to another dictionary with the same key

I have a dictionary like the following -
parent_key : {child : 1, child :2}
I have another dictionary with the same format -
parent_key : {child : 3, child :2}
Both have the same parent key and same child keys.I need to get the output like the following -
parent_key : {{child : 1, child :2},{child : 3, child :2}}
If I use update() method, it simply updates the keys with the latest value but I need in the format I specified. Kindly help!
Your output represents a set of dictionaries:
parent_key : {{child : 1, child :2},{child : 3, child :2}}
which is invalid. I'm also assuming those are not actually duplicate keys and you just replaced every key with child. Otherwise, the final result is even more invalid, since dictionaries can't have duplicate keys.
Instead, I suggest creating this structure instead:
{parent_key : [{child1 : 1, child2 :2}, {child1 : 3, child2 :2}]}
Which collects each inner child dictionary into a list, which seems to be closest to what you were trying to achieve.
Demo:
from collections import defaultdict
d1 = {"a": {"b": 1, "c": 2}}
d2 = {"a": {"b": 3, "c": 2}}
final_d = defaultdict(list)
for d in (d1, d2):
for k, v in d.items():
final_d[k].append(v)
print(final_d)
# defaultdict(<class 'list'>, {'a': [{'b': 1, 'c': 2}, {'b': 3, 'c': 2}]})
print(dict(d))
# {'a': [{'b': 1, 'c': 2}, {'b': 3, 'c': 2}]}
The above uses a collections.defaultdict of lists to aggregate the dictionaries into a list.
You could also achieve a nested dictionary result like this:
{parent_key : {child1: [1, 3], child2: [2, 2]}}
Demo:
from collections import defaultdict
d1 = {"a": {"b": 1, "c": 2}}
d2 = {"a": {"b": 3, "c": 2}}
final_d = defaultdict(lambda: defaultdict(list))
for d in (d1, d2):
for k1, v1 in d.items():
for k2, v2 in v1.items():
final_d[k1][k2].append(v2)
print(final_d)
# {'a': defaultdict(<class 'list'>, {'b': [1, 3], 'c': [2, 2]})}
print({k: dict(v) for k, v in final_d.items()})
# {'a': {'b': [1, 3], 'c': [2, 2]}}
Note: defaultdict is a subclass of dict, so it acts as a normal dictionary. I've just printed two versions with defaultdict and without for convenience.
You can do like this:
dic_arr = [{'a' : 2, 'b' : 4, 'c' : 6},
{'a' : 5, 'b' : 7, 'c' : 8},
{'d' : 10}]
res = {}
for sub in dic_arr:
for key, val in sub.items():
res.setdefault(key, []).append(val)
print(str(res))

adding value in nested dictionaries in python

I am trying to add value to inside dictionary.
aDict = { "id" :
{"name": None },
"id2" :
{"foo": None},
...
}
for k, v in aDict.items():
temp = [1,2,3,4]
aDict[k][v] = temp
then I got error, TypeError: unhashable type: 'dict'
How can I add value in nested dictionary?
---edit ---
My expected output is,
aDict = { "id" :
{"name": [1,2,3,4] },
"id2" :
{"foo": [1,2,3,4] },
...
}
When you do aDict[k], you already got the value which is dict and then you assign the temp to the specific key of the dict.
aDict = { 'id' :
{'name': None },
'id2':
{'foo':None}
}
for k, v in aDict.items():
temp = [1,2,3,4]
for keys in v.keys():
aDict[k][keys] = temp
Output
{'id': {'name': [1, 2, 3, 4]}, 'id2': {'foo': [1, 2, 3, 4]}}
For any arbitrary dictionary of dictionaries (no matter how deep it is), this works:
def go_deeper(aDict):
for k, v in aDict.items():
if v is None:
aDict[k] = temp
else:
go_deeper(v)
return aDict
Usage:
>>> temp = [1,2,3,4]
>>> go_deeper(aDict)
For example, for input:
aDict = { 'id' :
{'name': None },
"id2" :
{"foo":
{'bar': None }
}
}
the above code returns:
{'id': {'name': [1, 2, 3, 4]}, 'id2': {'foo': {'bar': [1, 2, 3, 4]}}}
Try this :
temp = [1,2,3,4]
for k in aDict:
for j in aDict[k]:
aDict[k][j]=temp
OUTPUT :
{'id': {'name': [1, 2, 3, 4]}, 'id2': {'foo': [1, 2, 3, 4]}}
You can get all keys by using d.keys() then add temp to this dictionary.
aDict = { "id" :
{"name": None },
"id2" :
{"foo": None},
"id3" :
{"bar": None, "boo": None},
}
temp = [1, 2, 3, 4]
for k, v in aDict.items():
for newKey in v.keys():
v[newKey] = temp
Result:
{'id': {'name': [1, 2, 3, 4]},
'id2': {'foo': [1, 2, 3, 4]},
'id3': {'bar': [1, 2, 3, 4], 'boo': [1, 2, 3, 4]}}
I would avoid the use of nested loops. And hence to get your desired output, I would do the following:
aDict = { "id" :
{"name": None },
"id2" :
{"foo": None}
}
for value in aDict.values():
temp = [1,2,3,4]
# obtain the key of inner dictionary
# this line could be written even better, if you're concerned about memory!
key = list(value.keys())[0]
# update the inner value
value[key] = temp
When I run this it gives your desired output
user#Kareems-MBP:Desktop$ python aDict.py
{'id': {'name': [1, 2, 3, 4]}, 'id2': {'foo': [1, 2, 3, 4]}}
Lastly, the TypeError you were getting TypeError: unhashable type: 'dict' is because you're trying to reference an item in the dictionary using a dictionary. Items in a dictionary can be referenced only using their keys. E.g. if we have the following dictionary:
myDict = {
'firstname': 'John',
'lastname': 'White'
}
and we want to reference the first item, we can do that only using myDict['firstname'], we cannot do that even with indices myDict[0]. You can imagine that you were doing something like this myDict[{'firstname': 'John'}].
I hope this has been helpful!

Extracting key from the nested dictionary [duplicate]

So I have this block of code
dictionary = {
'key1': {'a': 1, 'b': 2, 'c': 10},
'key2': {'d': 1, 'e': 1, 'c': 11},
'key3': {'d': 2, 'b': 1, 'g': 12}}
and
list1 = (a,b,c)
What I want to do is run a loop that finds the maximums of all the items in the list and returns the key. So for example, the maximum of 'c' would return 'key2', the maximum of 'b' would return 'key1', etc.
So far I have
for value in list1:
m = max(dictionary, key=lambda v: dictionary[v][value])
print(m + "\n")
But this only works if the same subkey exists in all keys in the dictionary. Any ideas on what to do?
Use float('-inf') when the key is missing:
m = max(dictionary, key=lambda v: dictionary[v].get(value, float('-inf')))
Negative infinity is guaranteed to be smaller than any existing value in the dictionaries, ensuring that nested dictionaries with the specific key missing are ignored.
Demo:
>>> dictionary = {
... 'key1': {'a': 1, 'b': 2, 'c': 10},
... 'key2': {'d': 1, 'e': 1, 'c': 11},
... 'key3': {'d': 2, 'b': 1, 'g': 12}}
>>> list1 = ('a', 'b', 'c')
>>> for value in list1:
... print(value, max(dictionary, key=lambda v: dictionary[v].get(value, float('-inf'))))
...
a key1
b key1
c key2
However, it'll be more efficient if you looped over all your dictionary values just once instead:
maximi = dict.fromkeys(list1, (None, float('-inf')))
for key, nested in dictionary.items():
for k in nested.keys() & maximi: # intersection of keys
if maximi[k][0] is None or dictionary[maximi[k][0]][k] < nested[k]:
maximi[k] = (key, nested[k])
for value in list1:
print(value, maximi[value][0])
That's presuming you are using Python 3; in Python 2, replace .items() with .iteritems() and .keys() with .viewkeys().
Demo:
>>> maximi = dict.fromkeys(list1, (None, float('-inf')))
>>> for key, nested in dictionary.items():
... for k in nested.keys() & maximi: # intersection of keys
... if maximi[k][0] is None or dictionary[maximi[k][0]][k] < nested[k]:
... maximi[k] = (key, nested[k])
...
>>> maximi
{'a': ('key1', 1), 'b': ('key1', 2), 'c': ('key2', 11)}
>>> for value in list1:
... print(value, maximi[value][0])
...
a key1
b key1
c key2

Find Maximum Value in Nested Dictionary and return Key

So I have this block of code
dictionary = {
'key1': {'a': 1, 'b': 2, 'c': 10},
'key2': {'d': 1, 'e': 1, 'c': 11},
'key3': {'d': 2, 'b': 1, 'g': 12}}
and
list1 = (a,b,c)
What I want to do is run a loop that finds the maximums of all the items in the list and returns the key. So for example, the maximum of 'c' would return 'key2', the maximum of 'b' would return 'key1', etc.
So far I have
for value in list1:
m = max(dictionary, key=lambda v: dictionary[v][value])
print(m + "\n")
But this only works if the same subkey exists in all keys in the dictionary. Any ideas on what to do?
Use float('-inf') when the key is missing:
m = max(dictionary, key=lambda v: dictionary[v].get(value, float('-inf')))
Negative infinity is guaranteed to be smaller than any existing value in the dictionaries, ensuring that nested dictionaries with the specific key missing are ignored.
Demo:
>>> dictionary = {
... 'key1': {'a': 1, 'b': 2, 'c': 10},
... 'key2': {'d': 1, 'e': 1, 'c': 11},
... 'key3': {'d': 2, 'b': 1, 'g': 12}}
>>> list1 = ('a', 'b', 'c')
>>> for value in list1:
... print(value, max(dictionary, key=lambda v: dictionary[v].get(value, float('-inf'))))
...
a key1
b key1
c key2
However, it'll be more efficient if you looped over all your dictionary values just once instead:
maximi = dict.fromkeys(list1, (None, float('-inf')))
for key, nested in dictionary.items():
for k in nested.keys() & maximi: # intersection of keys
if maximi[k][0] is None or dictionary[maximi[k][0]][k] < nested[k]:
maximi[k] = (key, nested[k])
for value in list1:
print(value, maximi[value][0])
That's presuming you are using Python 3; in Python 2, replace .items() with .iteritems() and .keys() with .viewkeys().
Demo:
>>> maximi = dict.fromkeys(list1, (None, float('-inf')))
>>> for key, nested in dictionary.items():
... for k in nested.keys() & maximi: # intersection of keys
... if maximi[k][0] is None or dictionary[maximi[k][0]][k] < nested[k]:
... maximi[k] = (key, nested[k])
...
>>> maximi
{'a': ('key1', 1), 'b': ('key1', 2), 'c': ('key2', 11)}
>>> for value in list1:
... print(value, maximi[value][0])
...
a key1
b key1
c key2

Split a dictionary in half?

What is the best way to split a dictionary in half?
d = {'key1': 1, 'key2': 2, 'key3': 3, 'key4': 4, 'key5': 5}
I'm looking to do this:
d1 = {'key1': 1, 'key2': 2, 'key3': 3}
d2 = {'key4': 4, 'key5': 5}
It does not matter which keys/values go into each dictionary. I am simply looking for the simplest way to divide a dictionary into two.
This would work, although I didn't test edge-cases:
>>> d = {'key1': 1, 'key2': 2, 'key3': 3, 'key4': 4, 'key5': 5}
>>> d1 = dict(d.items()[len(d)/2:])
>>> d2 = dict(d.items()[:len(d)/2])
>>> print d1
{'key1': 1, 'key5': 5, 'key4': 4}
>>> print d2
{'key3': 3, 'key2': 2}
In python3:
d = {'key1': 1, 'key2': 2, 'key3': 3, 'key4': 4, 'key5': 5}
d1 = dict(list(d.items())[len(d)//2:])
d2 = dict(list(d.items())[:len(d)//2])
Also note that order of items is not guaranteed
Here's a way to do it using an iterator over the items in the dictionary and itertools.islice:
import itertools
def splitDict(d):
n = len(d) // 2 # length of smaller half
i = iter(d.items()) # alternatively, i = d.iteritems() works in Python 2
d1 = dict(itertools.islice(i, n)) # grab first n items
d2 = dict(i) # grab the rest
return d1, d2
d1 = {key: value for i, (key, value) in enumerate(d.viewitems()) if i % 2 == 0}
d2 = {key: value for i, (key, value) in enumerate(d.viewitems()) if i % 2 == 1}
If you use python +3.3, and want your splitted dictionaries to be the same across different python invocations, do not use .items, since the hash-values of the keys, which determines the order of .items() will change between python invocations.
See Hash randomization
The answer by jone did not work for me. I had to cast to a list before I could index the result of the .items() call. (I am running Python 3.6 in the example)
d = {'one':1, 'two':2, 'three':3, 'four':4, 'five':5}
split_idx = 3
d1 = dict(list(d.items())[:split_idx])
d2 = dict(list(d.items())[split_idx:])
"""
output:
d1
{'one': 1, 'three': 3, 'two': 2}
d2
{'five': 5, 'four': 4}
"""
Note the dicts are not necessarily stored in the order of creation so the indexes may be mixed up.
Here is the function which can be used to split a dictionary to any divisions.
import math
def linch_dict_divider(raw_dict, num):
list_result = []
len_raw_dict = len(raw_dict)
if len_raw_dict > num:
base_num = len_raw_dict / num
addr_num = len_raw_dict % num
for i in range(num):
this_dict = dict()
keys = list()
if addr_num > 0:
keys = raw_dict.keys()[:base_num + 1]
addr_num -= 1
else:
keys = raw_dict.keys()[:base_num]
for key in keys:
this_dict[key] = raw_dict[key]
del raw_dict[key]
list_result.append(this_dict)
else:
for d in raw_dict:
this_dict = dict()
this_dict[d] = raw_dict[d]
list_result.append(this_dict)
return list_result
myDict = {'key1': 1, 'key2': 2, 'key3': 3, 'key4': 4, 'key5': 5}
print myDict
myList = linch_dict_divider(myDict, 2)
print myList
We can do this efficiently with itertools.zip_longest() (note this is itertools.izip_longest() in 2.x):
from itertools import zip_longest
d = {'key1': 1, 'key2': 2, 'key3': 3, 'key4': 4, 'key5': 5}
items1, items2 = zip(*zip_longest(*[iter(d.items())]*2))
d1 = dict(item for item in items1 if item is not None)
d2 = dict(item for item in items2 if item is not None)
Which gives us:
>>> d1
{'key3': 3, 'key1': 1, 'key4': 4}
>>> d2
{'key2': 2, 'key5': 5}
Here's a function that I use in Python 3.8 that can split a dict into a list containing the desired number of parts. If you specify more parts than elements, you'll get some empty dicts in the resulting list.
def split_dict(input_dict: dict, num_parts: int) -> list:
list_len: int = len(input_dict)
return [dict(list(input_dict.items())[i * list_len // num_parts:(i + 1) * list_len // num_parts])
for i in range(num_parts)]
Output:
>>> d = {'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5}
>>> split_dict(d, 2)
[{'a': 1, 'b': 2}, {'c': 3, 'd': 4, 'e': 5}]
>>> split_dict(d, 3)
[{'a': 1}, {'b': 2, 'c': 3}, {'d': 4, 'e': 5}]
>>> split_dict(d, 7)
[{}, {'a': 1}, {'b': 2}, {}, {'c': 3}, {'d': 4}, {'e': 5}]
If you used numpy, then you could do this :
def divide_dict(dictionary, chunk_size):
'''
Divide one dictionary into several dictionaries
Return a list, each item is a dictionary
'''
import numpy, collections
count_ar = numpy.linspace(0, len(dictionary), chunk_size+1, dtype= int)
group_lst = []
temp_dict = collections.defaultdict(lambda : None)
i = 1
for key, value in dictionary.items():
temp_dict[key] = value
if i in count_ar:
group_lst.append(temp_dict)
temp_dict = collections.defaultdict(lambda : None)
i += 1
return group_lst

Categories