Split a dictionary in half? - python

What is the best way to split a dictionary in half?
d = {'key1': 1, 'key2': 2, 'key3': 3, 'key4': 4, 'key5': 5}
I'm looking to do this:
d1 = {'key1': 1, 'key2': 2, 'key3': 3}
d2 = {'key4': 4, 'key5': 5}
It does not matter which keys/values go into each dictionary. I am simply looking for the simplest way to divide a dictionary into two.

This would work, although I didn't test edge-cases:
>>> d = {'key1': 1, 'key2': 2, 'key3': 3, 'key4': 4, 'key5': 5}
>>> d1 = dict(d.items()[len(d)/2:])
>>> d2 = dict(d.items()[:len(d)/2])
>>> print d1
{'key1': 1, 'key5': 5, 'key4': 4}
>>> print d2
{'key3': 3, 'key2': 2}
In python3:
d = {'key1': 1, 'key2': 2, 'key3': 3, 'key4': 4, 'key5': 5}
d1 = dict(list(d.items())[len(d)//2:])
d2 = dict(list(d.items())[:len(d)//2])
Also note that order of items is not guaranteed

Here's a way to do it using an iterator over the items in the dictionary and itertools.islice:
import itertools
def splitDict(d):
n = len(d) // 2 # length of smaller half
i = iter(d.items()) # alternatively, i = d.iteritems() works in Python 2
d1 = dict(itertools.islice(i, n)) # grab first n items
d2 = dict(i) # grab the rest
return d1, d2

d1 = {key: value for i, (key, value) in enumerate(d.viewitems()) if i % 2 == 0}
d2 = {key: value for i, (key, value) in enumerate(d.viewitems()) if i % 2 == 1}

If you use python +3.3, and want your splitted dictionaries to be the same across different python invocations, do not use .items, since the hash-values of the keys, which determines the order of .items() will change between python invocations.
See Hash randomization

The answer by jone did not work for me. I had to cast to a list before I could index the result of the .items() call. (I am running Python 3.6 in the example)
d = {'one':1, 'two':2, 'three':3, 'four':4, 'five':5}
split_idx = 3
d1 = dict(list(d.items())[:split_idx])
d2 = dict(list(d.items())[split_idx:])
"""
output:
d1
{'one': 1, 'three': 3, 'two': 2}
d2
{'five': 5, 'four': 4}
"""
Note the dicts are not necessarily stored in the order of creation so the indexes may be mixed up.

Here is the function which can be used to split a dictionary to any divisions.
import math
def linch_dict_divider(raw_dict, num):
list_result = []
len_raw_dict = len(raw_dict)
if len_raw_dict > num:
base_num = len_raw_dict / num
addr_num = len_raw_dict % num
for i in range(num):
this_dict = dict()
keys = list()
if addr_num > 0:
keys = raw_dict.keys()[:base_num + 1]
addr_num -= 1
else:
keys = raw_dict.keys()[:base_num]
for key in keys:
this_dict[key] = raw_dict[key]
del raw_dict[key]
list_result.append(this_dict)
else:
for d in raw_dict:
this_dict = dict()
this_dict[d] = raw_dict[d]
list_result.append(this_dict)
return list_result
myDict = {'key1': 1, 'key2': 2, 'key3': 3, 'key4': 4, 'key5': 5}
print myDict
myList = linch_dict_divider(myDict, 2)
print myList

We can do this efficiently with itertools.zip_longest() (note this is itertools.izip_longest() in 2.x):
from itertools import zip_longest
d = {'key1': 1, 'key2': 2, 'key3': 3, 'key4': 4, 'key5': 5}
items1, items2 = zip(*zip_longest(*[iter(d.items())]*2))
d1 = dict(item for item in items1 if item is not None)
d2 = dict(item for item in items2 if item is not None)
Which gives us:
>>> d1
{'key3': 3, 'key1': 1, 'key4': 4}
>>> d2
{'key2': 2, 'key5': 5}

Here's a function that I use in Python 3.8 that can split a dict into a list containing the desired number of parts. If you specify more parts than elements, you'll get some empty dicts in the resulting list.
def split_dict(input_dict: dict, num_parts: int) -> list:
list_len: int = len(input_dict)
return [dict(list(input_dict.items())[i * list_len // num_parts:(i + 1) * list_len // num_parts])
for i in range(num_parts)]
Output:
>>> d = {'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5}
>>> split_dict(d, 2)
[{'a': 1, 'b': 2}, {'c': 3, 'd': 4, 'e': 5}]
>>> split_dict(d, 3)
[{'a': 1}, {'b': 2, 'c': 3}, {'d': 4, 'e': 5}]
>>> split_dict(d, 7)
[{}, {'a': 1}, {'b': 2}, {}, {'c': 3}, {'d': 4}, {'e': 5}]

If you used numpy, then you could do this :
def divide_dict(dictionary, chunk_size):
'''
Divide one dictionary into several dictionaries
Return a list, each item is a dictionary
'''
import numpy, collections
count_ar = numpy.linspace(0, len(dictionary), chunk_size+1, dtype= int)
group_lst = []
temp_dict = collections.defaultdict(lambda : None)
i = 1
for key, value in dictionary.items():
temp_dict[key] = value
if i in count_ar:
group_lst.append(temp_dict)
temp_dict = collections.defaultdict(lambda : None)
i += 1
return group_lst

Related

Adjust / norm python dictionary values to certain size / length

I have a question regarding how to limit the value length of a dictionary:
real_vertrices_copy = {
'key': {1,3,8,4,2},
'key1': {9,4,2,4},
'key2': {6,4,2},
'key3': {9,3,5,3,5}
}
The dictionary consists of key value pairs, with the values varying in length.
I want to limit the length of the values to 3, so if the length of the values is > 3, I want to pop random elements as many times necessary to reduce it to len=3.
I tried it like this:
for x in real_vertrices_copy.values():
if len(x) > 20:
real_vertrices_copy.popitem()
but this just removes random dictionary entries (I did not expect this to work, I have to admit)
Does anyone have an idea how to solve this?
Thank you and BR
You could just pop three times from each set with more than three elements to get three arbitrary elements:
d = {
'key': {1,3,8,4,2},
'key1': {9,4,2,4},
'key2': {6,4,2},
'key3': {9,3,5,3,5}
}
d = {
k: {v.pop() for _ in range(3)} if len(v) > 3 else v
for k, v in d.items()
}
This assumes that there are no other references to the sets in d because the mutation via pop will be seen across all references.
You can use a dictionary comprehension with random sampling (random.sample):
d = {'key': {1,3,8,4,2},
'key1': {9,4,2,4},
'key2': {6,4,2},
'key3': {9,3,5,3,5}
}
import random
out = {k: set(random.sample(list(v), 3)) for k,v in d.items()}
or, if it is possible that the input has less than 3 elements:
N = 3
out = {k: set(random.sample(list(v), N)) if len(v)>=N else v
for k,v in d.items()}
example output:
{'key': {1, 3, 8}, 'key1': {2, 4, 9}, 'key2': {2, 4, 6}, 'key3': {3, 5, 9}}
classical loop
If you need a classical loop and want to get both kept and discarded items, take advantage of set operations:
N = 3
keep = {}
discard = {}
for k,v in d.items():
keep[k] = set(random.sample(list(v), min(len(v), N)))
discard[k] = v.difference(keep[k])
print(keep)
# {'key': {1, 2, 4}, 'key1': {9, 2, 4}, 'key2': {2, 4, 6}, 'key3': {9, 3, 5}}
print(discard)
# {'key': {8, 3}, 'key1': set(), 'key2': set(), 'key3': set()}
Inplace update of real_vertrices_copy:
import random
real_vertrices_copy = {'key': {1,3,8,4,2},
'key1': {9,4,2,4},
'key2': {6,4,2},
'key3': {9,3,5,3,5}
}
for k, v in real_vertrices_copy.items():
while len(v) > 3:
v.remove(random.choice(tuple(v)))

Group all keys with the same value in a dictionary of sets

I am trying to transform a dictionary of sets as the values with duplication to a dictionary with the unique sets as the value and at the same time join the keys together.
dic = {'a': {1, 2, 3}, 'b': {1, 2}, 'c': {1, 3, 2}, 'd': {1, 2, 3}}
Should be changed to
{'a-c-d': {1, 2, 3}, 'b': {1, 2}}
My try is as below, but I think there has to be a better way.
def transform_dictionary(dic: dict) -> dict:
dic = {k: frozenset(v) for k, v in dic.items()}
key_list = list(dic.keys())
value_list = list(dic.values())
dict_transformed = {}
for v_uinque in set(value_list):
sub_key_list = []
for i, v in enumerate(value_list):
if v == v_uinque:
sub_key_list.append(str(key_list[i]))
dict_transformed['-'.join(sub_key_list)] = set(v_uinque)
return dict_transformed
print(transform_dictionary(dic))
You can "invert" the input dictionary into a dictionary mapping frozensets into a set of keys.
import collections
dic = {'a': {1, 2, 3}, 'b': {1, 2}, 'c': {1, 3, 2}, 'd': {1, 2, 3}}
keys_per_set = collections.defaultdict(list)
for key, value in dic.items():
keys_per_set[frozenset(value)].append(key)
Then invert that dictionary mapping back into the desired form:
{'-'.join(keys): value for (value, keys) in keys_per_set.items()}
Output:
{'a-c-d': frozenset({1, 2, 3}), 'b': frozenset({1, 2})}
This will turn the values into a frozenset, but you could "thaw" them with a set(value) in the last list comprehension.
from itertools import groupby
dic_output = {'-'.join(v):g for g,v in groupby(sorted(dic_input,
key=dic_input.get),
key=lambda x: dic_input[x])}
Output
{'b': {1, 2}, 'a-c-d': {1, 2, 3}}

Join multiple dicts to create new list with value as list of values of original dict

I'm on Python 2.7 and have looked at several solutions here which works if you know how many dictionaries you are merging, but I could have anything between 2 to 5.
I have a loop which generates a dict with the same keys but different values. I want to add the new values to the previous.
Such as:
for num in numbers:
dict = (function which outputs a dictionary)
[merge with dictionary from previous run of the loop]
So if:
dict (from loop one) = {'key1': 1,
'key2': 2,
'key3': 3}
and
dict (from loop two) = {'key1': 4,
'key2': 5,
'key3': 6}
The resultant dict would be:
dict = {'key1': [1,4]
'key2': [2,5],
'key3': [3,6]}
Use a defaultdict:
In [18]: def gen_dictionaries():
...: yield {'key1': 1, 'key2': 2, 'key3': 3}
...: yield {'key1': 4, 'key2': 5, 'key3': 6}
...:
In [19]: from collections import defaultdict
In [20]: final = defaultdict(list)
In [21]: for d in gen_dictionaries():
...: for k, v in d.iteritems():
...: final[k].append(v)
...:
In [22]: final
Out[22]: defaultdict(list, {'key1': [1, 4], 'key2': [2, 5], 'key3': [3, 6]})
One way to achieve this in a generic way is via using set to find the union of keys of both the dicts and then use a dictionary comprehension to get the desired dict as:
>>> dict_list = [d1, d2] # list of all the dictionaries which are to be joined
# set of the keys present in all the dicts;
>>> common_keys = set(dict_list[0]).union(*dict_list[1:])
>>> {k: [d[k] for d in dict_list if k in d] for k in common_keys}
{'key3': [3, 6], 'key2': [2, 5], 'key1': [1, 4]}
where d1 and d2 are:
d1 = {'key1': 1,
'key2': 2,
'key3': 3}
d2 = {'key1': 4,
'key2': 5,
'key3': 6}
Explanation: Here, dict_list is the list of all the dict objects you want to combine. Then I am creating the common_keys set of the keys in all the dict object. Finally I am creating a new dictionary via using dictionary comprehension (with nested list comprehension with filter).
Based on comment from OP, since all the dicts hold the same keys, we can skip the usage of set. Hence the code could be written as:
>>> dict_list = [d1, d2]
>>> {k: [d[k] for d in dict_list] for k in dict_list[0]}
{'key3': [3, 6], 'key2': [2, 5], 'key1': [1, 4]}
dict1 = {'m': 2, 'n':4, 'o':7, 'p':90}
dict2 = {'m': 1, 'n': 3}
dict3 = {}
for key,value in dict1.iteritems():
if key not in dict3:
dict3[key] = list()
dict3[key].append(value)
for key,value in dict2.iteritems():
if key not in dict3:
dict3[key] = list()
dict3[key].append(value)
print(dict3)
The output looks like this :
{'p': [90], 'm': [2, 1], 'o': [7], 'n': [4, 3]}

Extracting key from the nested dictionary [duplicate]

So I have this block of code
dictionary = {
'key1': {'a': 1, 'b': 2, 'c': 10},
'key2': {'d': 1, 'e': 1, 'c': 11},
'key3': {'d': 2, 'b': 1, 'g': 12}}
and
list1 = (a,b,c)
What I want to do is run a loop that finds the maximums of all the items in the list and returns the key. So for example, the maximum of 'c' would return 'key2', the maximum of 'b' would return 'key1', etc.
So far I have
for value in list1:
m = max(dictionary, key=lambda v: dictionary[v][value])
print(m + "\n")
But this only works if the same subkey exists in all keys in the dictionary. Any ideas on what to do?
Use float('-inf') when the key is missing:
m = max(dictionary, key=lambda v: dictionary[v].get(value, float('-inf')))
Negative infinity is guaranteed to be smaller than any existing value in the dictionaries, ensuring that nested dictionaries with the specific key missing are ignored.
Demo:
>>> dictionary = {
... 'key1': {'a': 1, 'b': 2, 'c': 10},
... 'key2': {'d': 1, 'e': 1, 'c': 11},
... 'key3': {'d': 2, 'b': 1, 'g': 12}}
>>> list1 = ('a', 'b', 'c')
>>> for value in list1:
... print(value, max(dictionary, key=lambda v: dictionary[v].get(value, float('-inf'))))
...
a key1
b key1
c key2
However, it'll be more efficient if you looped over all your dictionary values just once instead:
maximi = dict.fromkeys(list1, (None, float('-inf')))
for key, nested in dictionary.items():
for k in nested.keys() & maximi: # intersection of keys
if maximi[k][0] is None or dictionary[maximi[k][0]][k] < nested[k]:
maximi[k] = (key, nested[k])
for value in list1:
print(value, maximi[value][0])
That's presuming you are using Python 3; in Python 2, replace .items() with .iteritems() and .keys() with .viewkeys().
Demo:
>>> maximi = dict.fromkeys(list1, (None, float('-inf')))
>>> for key, nested in dictionary.items():
... for k in nested.keys() & maximi: # intersection of keys
... if maximi[k][0] is None or dictionary[maximi[k][0]][k] < nested[k]:
... maximi[k] = (key, nested[k])
...
>>> maximi
{'a': ('key1', 1), 'b': ('key1', 2), 'c': ('key2', 11)}
>>> for value in list1:
... print(value, maximi[value][0])
...
a key1
b key1
c key2

Find Maximum Value in Nested Dictionary and return Key

So I have this block of code
dictionary = {
'key1': {'a': 1, 'b': 2, 'c': 10},
'key2': {'d': 1, 'e': 1, 'c': 11},
'key3': {'d': 2, 'b': 1, 'g': 12}}
and
list1 = (a,b,c)
What I want to do is run a loop that finds the maximums of all the items in the list and returns the key. So for example, the maximum of 'c' would return 'key2', the maximum of 'b' would return 'key1', etc.
So far I have
for value in list1:
m = max(dictionary, key=lambda v: dictionary[v][value])
print(m + "\n")
But this only works if the same subkey exists in all keys in the dictionary. Any ideas on what to do?
Use float('-inf') when the key is missing:
m = max(dictionary, key=lambda v: dictionary[v].get(value, float('-inf')))
Negative infinity is guaranteed to be smaller than any existing value in the dictionaries, ensuring that nested dictionaries with the specific key missing are ignored.
Demo:
>>> dictionary = {
... 'key1': {'a': 1, 'b': 2, 'c': 10},
... 'key2': {'d': 1, 'e': 1, 'c': 11},
... 'key3': {'d': 2, 'b': 1, 'g': 12}}
>>> list1 = ('a', 'b', 'c')
>>> for value in list1:
... print(value, max(dictionary, key=lambda v: dictionary[v].get(value, float('-inf'))))
...
a key1
b key1
c key2
However, it'll be more efficient if you looped over all your dictionary values just once instead:
maximi = dict.fromkeys(list1, (None, float('-inf')))
for key, nested in dictionary.items():
for k in nested.keys() & maximi: # intersection of keys
if maximi[k][0] is None or dictionary[maximi[k][0]][k] < nested[k]:
maximi[k] = (key, nested[k])
for value in list1:
print(value, maximi[value][0])
That's presuming you are using Python 3; in Python 2, replace .items() with .iteritems() and .keys() with .viewkeys().
Demo:
>>> maximi = dict.fromkeys(list1, (None, float('-inf')))
>>> for key, nested in dictionary.items():
... for k in nested.keys() & maximi: # intersection of keys
... if maximi[k][0] is None or dictionary[maximi[k][0]][k] < nested[k]:
... maximi[k] = (key, nested[k])
...
>>> maximi
{'a': ('key1', 1), 'b': ('key1', 2), 'c': ('key2', 11)}
>>> for value in list1:
... print(value, maximi[value][0])
...
a key1
b key1
c key2

Categories