I have a dictionary with almost 100,000 (key, value) pairs and the majority of the keys map to the same values. For example:
mydict = {'a': 1, 'c': 2, 'b': 1, 'e': 2, 'd': 3, 'h': 1, 'j': 3}
What I want to do, is to reverse the dictionary so that each value in mydict is going to be a key at the reverse_dict and is going to map to a list of all the mydict.keys() that used to map to that value in mydict. So based on the example above I would get:
reversed_dict = {1: ['a', 'b', 'h'], 2: ['c', 'e'] , 3: ['d', 'j']}
I came up with a solution that is very expensive and I want to hear any ideas for doing this more efficiently than this:
reversed_dict = {}
for value in mydict.values():
reversed_dict[value] = []
for key in mydict.keys():
if mydict[key] == value:
if key not in reversed_dict[value]:
reversed_dict[value].append(key)
Using collections.defaultdict:
from collections import defaultdict
reversed_dict = defaultdict(list)
for key, value in mydict.items():
reversed_dict[value].append(key)
reversed_dict = {}
for key, value in mydict.items():
reversed_dict.setdefault(value, [])
reversed_dict[value].append(key)
for k,v in dict.iteritems():
try:
reversed_dict[v].append(k)
except KeyError:
reversed_dict[v]=[k]
I think you're wasting a few cycles by replacing a key with the same key again and again...
reversed_dict = {}
for value in mydict.values():
if value not in reversed_dict.keys(): #checking to be sure it hasn't been done.
reversed_dict[value] = []
for key in mydict.keys():
if mydict[key] == value:
if key not in reversed_dict[value]: reversed_dict[value].append(key)
Using itertools.groupby:
from operator import itemgetter
from itertools import groupby
snd = itemgetter(1)
def sort_and_group(itr, f):
return groupby(sorted(itr, key=f), f)
mydict = {'a': 1, 'c': 2, 'b': 1, 'e': 2, 'd': 3, 'h': 1, 'j': 3}
reversed_dict = {number: [char for char,_ in v]
for number, v in sort_and_group(mydict.items(), snd)}
reversed_dict = collections.defaultdict(list)
for key, value in dict_.iteritems():
reversed_dict[value].append(key)
def reverse_dict(mydict):
v={}
for x,y in mydict.items():
if y not in v:
v[y]=[x]
else:
v[y].append(x)
return v
print(reverse_dict(mydict))
Related
For example, in dict1 the keys 1, 2, 3 all have the same value 'a', but the keys 3 and 5 have different values, 'b' and 'd'. What I want is:
If N keys have the same value and N >=3, then I want to remove all other elements from the dict and only keep those N key values, which means 'b' & 'd' have to be removed from the dict.
The following code works, but it seems very verbose. Is there a better way to do this?
from collections import defaultdict
dict1 = {1:'a', 2:'a', '3':'b', '4': 'a', '5':'d'}
l1 = [1, 2, 3, 4, 5]
dict2 = defaultdict(list)
for k, v in dict1.items():
dict2[v].append(k)
to_be_removed = []
is_to_be_removed = False
for k, values in dict2.items():
majority = len(values)
if majority>=3:
is_to_be_removed = True
else:
to_be_removed.extend(values)
if is_to_be_removed:
for d in to_be_removed:
del dict1[d]
print(f'New dict: {dict1}')
You can use collections.Counter to get the frequency of every value, then use a dictionary comprehension to retain only the keys that have the desired corresponding value:
from collections import Counter
dict1 = {1:'a', 2:'a', '3':'b', '4': 'a', '5':'d'}
ctr = Counter(dict1.values())
result = {key: value for key, value in dict1.items() if ctr[value] >= 3}
print(result)
This outputs:
{1: 'a', 2: 'a', '4': 'a'}
Let's say I have a dictionary:
data = {'a':1, 'b':2, 'c': 3, 'd': 3}
I want to get the maximum value(s) in the dictionary. So far, I have been just doing:
max(zip(data.values(), data.keys()))[1]
but I'm aware that I could be missing another max value. What would be the most efficient way to approach this?
Based on your example, it seems like you're looking for the key(s) which map to the maximum value. You could use a list comprehension:
[k for k, v in data.items() if v == max(data.values())]
# ['c', 'd']
If you have a large dictionary, break this into two lines to avoid calculating max for as many items as you have:
mx = max(data.values())
[k for k, v in data.items() if v == mx]
In Python 2.x you will need .iteritems().
You could try collecting reverse value -> key pairs in a defaultdict, then output the values with the highest key:
from collections import defaultdict
def get_max_value(data):
d = defaultdict(list)
for key, value in data.items():
d[value].append(key)
return max(d.items())[1]
Which Outputs:
>>> get_max_value({'a':1, 'b':2, 'c': 3, 'd': 3})
['c', 'd']
>>> get_max_value({'a': 10, 'b': 10, 'c': 4, 'd': 5})
['a', 'b']
First of all, find what is the max value that occurs in the dictionary. If you are trying to create a list of all the max value(s), then try something like this:
data = {'a':1, 'b':2, 'c': 3, 'd': 3}
max_value = data.get(max(data))
list_num_max_value = []
for letter in data:
if data.get(letter) == max_value:
list_num_max_value.append(max_value)
print (list_num_max_value)
Please let me know if that's not what you are trying to do and I will guide you through the right process.
I have a dictionary, what:
what = {'a': ['b'], 'c': ['d\n', 'e\n', 'f'], 'd': ['f', 'g']}
and need to get the items with '/n' separate and without '/n' (order is important, need to sort the values.):
[[{'a': ['b'], 'c': ['f'], 'd': ['f', 'g']}], [{'c': ['d\n', 'e\n']}]]
This is what I tried:
def lst(profiles_file: TextIO):
solve_lst = []
new_lst = fix_files(profiles_file)
for k, v in new_lst.items():
for i in v:
if i.find('\n') != -1:
get_index = [v.index(i)]
solve_lst.append(get_index)
return solve_lst
How can i get this without doing anything to complicated?
Here's a solution using defaultdict
from collections import defaultdict
def lst(profiles_file: TextIO):
initial_dict = fix_files(profiles_file)
with_n = defaultdict(list)
without_n = defaultdict(list)
for k, v in initial_dict.items():
for item in v:
if '\n' in item:
with_n[k].append(item)
else:
without_n[k].append(item)
return [without_n, with_n]
I have a Dictionary here:
dic = {'A':1, 'B':6, 'C':42, 'D':1, 'E':12}
and a list here:
lis = ['C', 'D', 'C', 'C', 'F']
What I'm trying to do is (also a requirement of the homework) to check whether the values in the lis matches the key in dic, if so then it increment by 1 (for example there's 3 'C's in the lis then in the output of dic 'C' should be 45). If not, then we create a new item in the dic and set the value to 1.
So the example output should be look like this:
dic = {'A':1, 'B':6, 'C':45, 'D':2, 'E':12, 'F':1}
Here's what my code is:
def addToInventory(dic, lis):
for k,v in dic.items():
for i in lis:
if i == k:
dic[k] += 1
else:
dic[i] = 1
return dic
and execute by this code:
dic = addToInventory(dic,lis)
It compiles without error but the output is strange, it added the missing F into the dic but didn't update the values correctly.
dic = {'A':1, 'B':6, 'C':1, 'D':1, 'E':12, 'F':1}
What am I missing here?
There's no need to iterate over a dictionary when it supports random lookup. You can use if x in dict to do this. Furthermore, you'd need your return statement outside the loop.
Try, instead:
def addToInventory(dic, lis):
for i in lis:
if i in dic:
dic[i] += 1
else:
dic[i] = 1
return dic
out = addToInventory(dic, lis)
print(out)
{'A': 1, 'B': 6, 'C': 45, 'D': 2, 'E': 12, 'F': 1}
As Harvey suggested, you can shorten the function a little by making use of dict.get.
def addToInventory(dic, lis):
for i in lis:
dic[i] = dic.get(i, 0) + 1
return dic
The dic.get function takes two parameters - the key, and a default value to be passed if the value associated with that key does not already exist.
If your professor allows the use of libraries, you can use the collections.Counter data structure, it's meant precisely for keeping counts.
from collections import Counter
c = Counter(dic)
for i in lis:
c[i] += 1
print(dict(c))
{'A': 1, 'B': 6, 'C': 45, 'D': 2, 'E': 12, 'F': 1}
I have a dictionary composed of {key: value}.
I select a set of keys from this dictionary.
I'd like to build a new dictionary with {keyA: set of all keys wich have the same value as keyA}.
I already have a solution: Is there a faster way to do it?
It seems very slow to me, and I imagine I'm not the only one in this case!
for key1 in selectedkeys:
if key1 not in seen:
seen.add(key1)
equal[key1] = set([key1])#egual to itself
for key2 in selectedkeys:
if key2 not in seen and dico[key1] == dico[key2]:
equal[key1].add(key2)
seen.update(equal[key1])
Try this
>>> a = {1:1, 2:1, 3:2, 4:2}
>>> ret_val = {}
>>> for k, v in a.iteritems():
... ret_val.setdefault(v, []).append(k)
...
>>> ret_val
{1: [1, 2], 2: [3, 4]}
def convert(d):
result = {}
for k, v in d.items(): # or d.iteritems() if using python 2
if v not in result:
result[v] = set()
result[v].add(k)
return result
or just use collections.defaultdict(set) if you are careful enough not to access any non key later :-)
So you want to create a dictionary that maps key to "the set of all keys which have the same value as key" for each selected key in a given source dictionary.
Thus, if the source dictionary is:
{'a': 1, 'b': 2, 'c': 1, 'd': 2, 'e': 3, 'f': 1, 'g': 3)
and the selected keys are a, b, and e, the result should be:
{'a': {'a', 'c', 'f'}, 'e': {'g', 'e'}, 'b': {'b', 'd'}}
One way to achieve this would be to use a defaultdict to build a value to key table, and then use that to build the required result from the specified keys:
from collections import defaultdict
def value_map(source, keys):
table = defaultdict(set)
for key, value in source.items():
table[value].add(key)
return {key: table[source[key]] for key in keys}
source = {'a': 1, 'b': 2, 'c': 1, 'd': 2, 'e': 3, 'f': 1, 'g': 3)
print(value_map(source, ['a', 'b', 'e']))
Output:
{'a': {'a', 'c', 'f'}, 'e': {'g', 'e'}, 'b': {'b', 'd'}}
Since you select a set of keys from the original dictionary. We can modify #Nilesh solution for your purpose.
a = {1:1, 2:1, 3:2, 4:2}
keys = [1, 3] # lets say this is the list of keys
ret_val = {}
for i in keys:
for k,v in a.items():
if a[i]==v:
ret_val.setdefault(i, []).append(k)
print (ret_val)
{1: [1, 2], 3: [3, 4]}
This was sort of stated in the comments by #Patrick Haugh:
d=your dictionary
s=set(d.values())
d2={i:[] for i in s}
for k in d:
d2[d[k]].append(k)