How to find which element occurs the most in a python dictionary - python

If you have a dictionary in python how would you find which element occurs in it the most amount of times. For example if you had the following dictionary, the name Bob occurs the most(3 times)(Once as a key and twice as a value). How would you find that name that occurs the most?
Also, I would prefer not to import anything (as I am a beginner)
dict = {'Mark': ['Paul', 'Bob', 'Carol', 'Leanne', 'Will'], 'Paul': ['Will', 'Zach'], 'Bob': ['Sarah', 'Don'], 'Tim': ['Bob', 'Carol']}

You can count the keys using a Counter, and update it with the count of the values.
You can then use the most_common method of the Counter to get the most common name:
from collections import Counter
from itertools import chain
d = {'Mark': ['Paul', 'Bob', 'Carol', 'Leanne', 'Will'], 'Paul': ['Will', 'Zach'], 'Bob': ['Sarah', 'Don'], 'Tim': ['Bob', 'Carol']}
count = Counter(d.keys())
count.update(chain.from_iterable(d.values()))
print(count.most_common(1))
# [('Bob', 3)]
print(count.most_common(1)[0][0])
# Bob

I guess what you mean is how to find which element is the most common among all the lists that appear in your dictionary as values. If that's the case, the following should do the trick:
from collections import Counter
from itertools import chain
dict = {
'Mark': ['Paul', 'Bob', 'Carol', 'Leanne', 'Will'],
'Paul': ['Will', 'Zach'],
'Bob': ['Sarah', 'Don'],
'Tim': ['Bob', 'Carol']
}
counter = Counter(chain.from_iterable(list(dict.values())))
counter.most_common()
[('Bob', 2), ('Carol', 2), ('Will', 2), ('Paul', 1), ('Leanne', 1), ('Zach', 1), ('Sarah', 1), ('Don', 1)]
If you also need to take keys into account, then:
counter = Counter(chain.from_iterable(list(dict.values()) + [dict.keys()]))
counter.most_common()
[('Bob', 3), ('Paul', 2), ('Carol', 2), ('Will', 2), ('Leanne', 1), ('Zach', 1), ('Sarah', 1), ('Don', 1), ('Mark', 1), ('Tim', 1)]
If you don't want to use external libraries:
l = list(dict.keys()) + sum(list(dict.values()), []) # flatten list of lists
max(l, key=l.count)
>>> 'Bob'

Here is a way to do this without imports. It defines a check function, iterates through dic once to generate a dic_count, then uses another for-loop to get the max_count and the most_common_name.
Sidenote: Never name variables or functions after built-in Python functions or objects. This is why I renamed dict to dic.
dic = {'Mark': ['Paul', 'Bob', 'Carol', 'Leanne', 'Will'], 'Paul': ['Will', 'Zach'], 'Bob': ['Sarah', 'Don'], 'Tim': ['Bob', 'Carol']}
dic_count = {}
# Adds string to dic_count if it's not in,
# otherwise increments its count
def check(string):
if string in dic_count:
dic_count[string] += 1
else:
dic_count[string] = 1
for key, value in dic.items():
# Calls the check function for both keys and values
check(key)
for name in value:
check(name)
max_num = 0
most_common_name = ""
for key, value in dic_count.items():
# If the count is greater than max_num,
# updates both max_num and most_common_name
if value > max_num:
max_num = value
most_common_name = key
print(most_common_name)
# Prints Bob
If you would like to get multiple names, change the last part to
max_num = 0
most_common_names = ""
for key, value in dic_count.items():
# If the count is greater than max_num,
# updates both max_num and most_common_name
if value > max_num:
max_num = value
most_common_names = key
elif value == max_num:
most_common_names += " " + key
print(most_common_names)
# Prints Bob Will after adding an extra
# 'Will' to the dictionary
Alternatively, if you would like to avoid defining a function, simply replace the top part with:
for key, value in dic.items():
# Adds string to dic_count if it's not in,
# otherwise increments its count
if key in dic_count:
dic_count[key] += 1
else:
dic_count[key] = 1
for name in value:
if name in dic_count:
dic_count[name] += 1
else:
dic_count[name] = 1

You can create a list with all items (keys + values) of the dict and use collections.Counter. d is your dictionary (dict that you used is not a proper name for Python as its already used fro built in structure)
from collections import Counter
l=[i for i in d.keys()]+[i for k in d.values() for i in k]
res=Counter(l)
>>> print(res)
Counter({'Bob': 3, 'Paul': 2, 'Carol': 2, 'Will': 2, 'Mark': 1, 'Tim': 1, 'Leanne': 1, 'Zach': 1, 'Sarah': 1, 'Don': 1})

By any chance are you looking for something like this
dic = {'Mark': ['Paul', 'Bob', 'Carol', 'Leanne', 'Will'], 'Paul': ['Will', 'Zach'], 'Bob': ['Sarah', 'Don'], 'Tim': ['Bob', 'Carol']}
#getting all the keys
keyslist=dic.keys()
#fetting all the values of dic as list
valuelist=list(dic.values())
#valuelist.append(keyslist)
test_list=[]
test_list.extend(list(keyslist))
for x in valuelist:
test_list.extend(x)
#list with all elements from dict
print(test_list)
# get most frequent element
max = 0
res = test_list[0]
for i in test_list:
freq = test_list.count(i)
if freq > max:
max = freq
res = i
# printing result
print ("Most frequent element is : " + str(res)+ " Frequency :" +str(max))
Output:
Most frequent element is : Bob Frequency :3
I know this is not the best way ..if anybody have any suggestion to make please leave them in the comment i will edit my answer with those

Please check the comments in the code
Use chain to combine keys and values
Use defaultdict which is a special case of dict
where key is appended if not present
Code:
from itertools import chain
from collections import defaultdict
# do not use dict - no shadowing built-in dict
my_dict = {'Mark': ['Paul', 'Bob', 'Carol', 'Leanne', 'Will'], 'Paul': ['Will', 'Zach'], 'Bob': ['Sarah', 'Don'], 'Tim': ['Bob', 'Carol']}
#searching for one specific name occurence
name_to_search = 'Bob'
name_ctr = sum([1 for ele in chain(my_dict.keys(), *(my_dict.values())) if ele == name_to_search])
print(f'{name_to_search} occurs {name_ctr} times')
#searching for max occuring name in a dictionary
my_dict_name_ctr = defaultdict(int)
for name in chain(my_dict.keys(), *(my_dict.values())):
my_dict_name_ctr[name] += 1
max_occuring_val = max(my_dict_name_ctr.values())
most_occuring_names = [name for name,val in my_dict_name_ctr.items() if val == max_occuring_val]
print(most_occuring_names, 'occurs', max_occuring_val, 'times')
Output:
Bob occurs 3 times
['Bob'] occurs 3 times

Related

how to add/merge values together in a python dictionary

I have two list
alist = [1,2,5,3,7,3,21,7,2]
blist = [mary, tom, ken, mary, tom, peter, joseph, mary, ken]
in the end I would like to have a python dictionary:
{"mary": 11, "tom": 9, "ken": 7, "peter": 7, "joseph":21}
adding all their marks together according to their names.
I tried something like this:
for (marks, name) in zip(alist,blist):
dict[name] += marks
I have this solution:
d = dict()
for (marks, name) in zip(alist,blist):
if name in d:
d[name] += marks
else:
d[name] = marks
Maybe something more efficient could be written but I think this works.
You can use collections.Counter, it is probably the most efficient way of doing this.
from collections import Counter
c = Counter()
for mark, name in zip(alist, blist):
c[name] += mark
print(c)
Output:
Counter({'joseph': 21, 'mary': 11, 'tom': 9, 'ken': 7, 'peter': 3})
Counter works just like a dictionary, but extends it with some additional methods like:
print(c.most_common(3))
Output:
[('joseph', 21), ('mary', 11), ('tom', 9)]
zip(*iterables)
Make an iterator that aggregates elements from each of the iterables.
https://docs.python.org/3.3/library/functions.html#zip
from collections import defaultdict
alist = [1,2,5,3,7,3,21,7,2]
blist = ['mary', 'tom', 'ken', 'mary', 'tom', 'peter', 'joseph', 'mary', 'ken']
data_dict = defaultdict(int)
for i, count in zip(blist, alist):
data_dict[i] += count
You can use this solution alternatively
alist = [1,2,5,3,7,3,21,7,2]
blist = ['mary', 'tom', 'ken', 'mary', 'tom', 'peter', 'joseph', 'mary', 'ken']
my_dict = {}
for key, value in zip(blist, alist):
my_dict[key] = my_dict.get(key, 0) + value
dict(zip(alist, blist))
This may help taken from this article https://www.geeksforgeeks.org/python-convert-two-lists-into-a-dictionary/amp/

sort a list of dictionaries based on explicitly defined order of keys

I have a list of dictionary for ex:
names = [{'Mark':'Volvo'}, {'John':'BMW'}, {'Eliza':'Merci'}, {'Calen':'Audi'}]
I would like to set the explicit ordering by the key names of the dictionary.
For example if I give this order:
['John','Mark','Calen','Eliza']
The expected output would be:
[{'John':'BMW'},{'Mark':'Volvo'},{'Calen':'Audi'},{'Eliza':'Merci'}]
I want to add a custom logic order. To be displayed through the template by their names, based on how I defined the order of their names.
Similar to Is there a way to sort a list of string by a “predicate” list?: Since names is a list of dictionaries with just one key-value pair each, use the index of the person's name in the order list as the key for the sort:
>>> names = [{'Mark': 'Volvo'}, {'John': 'BMW'}, {'Eliza': 'Merci'}, {'Calen' :'Audi'}]
>>> order = ['John', 'Mark', 'Calen', 'Eliza']
>>>
>>> # with `sorted()`
>>> sorted(names, key=lambda d: order.index(list(d.keys())[0]))
[{'John': 'BMW'}, {'Mark': 'Volvo'}, {'Calen': 'Audi'}, {'Eliza': 'Merci'}]
>>>
>>> # or with `list.sort()`
>>> names.sort(key=lambda d: order.index(list(d.keys())[0]))
>>> names
[{'John': 'BMW'}, {'Mark': 'Volvo'}, {'Calen': 'Audi'}, {'Eliza': 'Merci'}]
dict.keys() is not not subscriptable, so dict.keys()[0] doesn't work. So first convert that to a list and then use its one-and-only key list(dict.keys())[0]. That would give 'Mark', 'John', etc. Then get the index of that person's name in the order list. Note: it will fail if a person is not listed in order.
Even if names is a list of dictionaries with more than one key-value pair each, as long as the person's name is the first key, it will still work as of Python 3.7/3.6. See the note below this item:
Changed in version 3.7: Dictionary order is guaranteed to be insertion order. This behavior was an implementation detail of CPython from 3.6.
>>> names = [{'Mark': 'Volvo', 'age': 30},
... {'John': 'BMW', 'age': 40},
... {'Eliza': 'Merci', 'age': 50},
... {'Calen': 'Audi', 'age': 60}]
>>> sorted(names, key=lambda d: order.index(list(d.keys())[0]))
[{'John': 'BMW', 'age': 40}, {'Mark': 'Volvo', 'age': 30}, {'Calen': 'Audi', 'age': 60}, {'Eliza': 'Merci', 'age': 50}]
>>>
First, if your dictionaries only have one entry, tuples seem to be a better choice for this data:
>>> names = [('Mark', 'Volvo'), ('John', 'BMW'), ('Eliza', 'Merci'), ('Calen', 'Audi')]
Now, given this order:
>>> order = ['John', 'Mark', 'Calen', 'Eliza']
you can create a dict that maps the names to the indices:
>>> order_map = { k: v for v, k in enumerate(order) }
>>> order_map
{'John': 0, 'Mark': 1, 'Calen': 2, 'Eliza': 3}
and use it in a key function for sort:
>>> names.sort(key=lambda v: order_map[v[0]])
>>> names
[('John', 'BMW'), ('Mark', 'Volvo'), ('Calen', 'Audi'), ('Eliza', 'Merci')]
names = [{'Mark':'Volvo'}, {'John':'BMW'}, {'Eliza':'Merci'}, {'Calen':'Audi'}]
ordered_keys = ['John','Mark','Calen','Eliza']
sorted_names = [name for key in ordered_keys for name in names if key in name]
It iterates over the ordered_keys in order and extracts any name in the list of dict that has that key.

Appending a list in a dictionary with a value of index of a list

I have a dictionary and a list:
results = {"Alice":[], "Bob":[], "Clare":[], "Dennis":[], "Eva":[]}
list_of_names = ['Bob', 'Alice', 'Clare', 'Eva', 'Dennis']
and I want to fill those lists with a value of index+1 accordingly.
So that if we have the list above the dictionary would look like this
results = {"Alice":[2], "Bob":[1], "Clare":[3], "Dennis":[5], "Eva":[4]}
this is my current code
Aindex = list_of_names.index("Alice")
Bindex = list_of_names.index("Bob")
Cindex = list_of_names.index("Clare")
Dindex = list_of_names.index("Dennis")
Eindex = list_of_names.index("Eva")
Aindex = Aindex + 1
Bindex = Bindex + 1
Cindex = Cindex + 1
Dindex = Dindex + 1
Eindex = Eindex + 1
results["Alice"].append(Aindex)
results["Bob"].append(Bindex)
results["Clare"].append(Cindex)
results["Dennis"].append(Dindex)
results["Eva"].append(Eindex)
Is there any way to shorten this code and make it work for any amount of dictionary/list entries?
yes. One line with dictionary comprehension and enumerate starting at 1:
list_of_names = ['Bob', 'Alice', 'Clare', 'Eva', 'Dennis']
results = {name:[i] for i,name in enumerate(list_of_names,1)}
>>> results
{'Alice': [2], 'Bob': [1], 'Clare': [3], 'Dennis': [5], 'Eva': [4]}
If you want to use the existing list inside the dict
results = {"Alice":[], "Bob":[], "Clare":[], "Dennis":[], "Eva":[]}
list_of_names = ['Bob', 'Alice', 'Clare', 'Eva', 'Dennis']
for i,j in enumerate(list_of_names,1):
results[j].append(i)

count how many a combination occurs in a list

I created a list by doing this:
list3= [zip(Indiener1, Indiener2)]
Both elements are long lists of names.
But as a third element in the small combined list I want the number of times the combination of names occurs in the whole list3 as I have to do calculations with that number.
I tried list3.count() but that function only wanted to take one item.
How can I do this?
from collections import Counter
list1=["a","b","d","b"]
list2=["5","u","55","u"]
list3=zip(list1,list2)
print Counter(list3)
it outputs:
Counter({('b', 'u'): 2, ('d', '55'): 1, ('a', '5'): 1})
Use a counter and reverse the pairings to get ("foo","bar") == ("bar","foo"):
l1 =["foo","bar","foobar"]
l2 = ["bar","foo","bar"]
from collections import Counter
c = Counter(zip(l1,l2))
for k,v in c.items():
rev = tuple(reversed(k))
print("pairing {} appears {}".format(k,v + c.get(rev,0)))
To avoid getting double output ('foo', 'bar') and ('bar', 'foo') you can add rev to a set and check that it has not been seen already:
from collections import Counter
c = Counter(zip(l1,l2))
seen = set()
for k, v in c.items():
rev = tuple(reversed(k))
if k not in seen:
seen.add(rev)
print("pairing {} appears {} times".format(k,v + c.get(rev,0)))
pairing ('foo', 'bar') appears 2 times
pairing ('foobar', 'bar') appears 1 times
Since ("foo","bar") and ("bar","foo") are considered the same, you have to count on something like sets, where order doesn't matter:
>>> from collections import Counter
>>> l1 = ['John', 'Doe', 'Paul', 'Pablo', 'Paul', 'Doe']
>>> l2 = ['Doe', 'John', 'Doe', 'Doe', 'Doe', 'Paul']
>>> print Counter(frozenset(pair) for pair in zip(l1, l2))
Counter({
frozenset(['Paul', 'Doe']): 3,
frozenset(['John', 'Doe']): 2,
frozenset(['Doe', 'Pablo']): 1
})
You can also sort the pairs before counting, but a set makes the purpose more explicit.

finding top k largest keys in a dictionary python

Lets say I have a dictionary:
{key1:value1........... keyn:valuen}
So lets say I want to write a function
def return_top_k(dictionary, k):
return list_of_keys_sorted
What is the most efficient way (in terms of big O) to get the keys which have the top k values (maintaining the order i.e the highest value key is present in the beginning.. and so on.)
O(n log k):
import heapq
k_keys_sorted = heapq.nlargest(k, dictionary)
You could use key keyword parameter to specify what should be used as a sorting key e.g.:
k_keys_sorted_by_values = heapq.nlargest(k, dictionary, key=dictionary.get)
return sorted(dictionary, key=dictionary.get, reverse=True)[:10]
Should be at worst O(NlogN) (although heapq proposed by others is probably better) ...
It might also make sense to use a Counter instead of a regular dictionary. In that case, the most_common method will do (approximately) what you want (dictionary.most_common(10)), but only if it makes sense to use a Counter in your API.
portfolio = [
{'name': 'IBM', 'shares': 100, 'price': 91.1},
{'name': 'AAPL', 'shares': 50, 'price': 543.22},
{'name': 'FB', 'shares': 200, 'price': 21.09},
{'name': 'HPQ', 'shares': 35, 'price': 31.75},
{'name': 'YHOO', 'shares': 45, 'price': 16.35},
{'name': 'ACME', 'shares': 75, 'price': 115.65}
]
cheap = heapq.nsmallest(3, portfolio, key=lambda s: s['price'])
expensive = heapq.nlargest(3, portfolio, key=lambda s: s['price'])
For top-3 step by step:
>>> from operator import itemgetter
>>> dct = {"a": 1, "b": 2, "c": 3, "d": 4, "e": 5}
>>> sorted(dct.items(), key=itemgetter(1), reverse=True)
[('e', 5), ('d', 4), ('c', 3), ('b', 2), ('a', 1)]
>>> map(itemgetter(0), sorted(dct.items(), key=itemgetter(1), reverse=True))
['e', 'd', 'c', 'b', 'a']
>>> map(itemgetter(0), sorted(dct.items(), key=itemgetter(1), reverse=True))[:3]
['e', 'd', 'c']
Or using heapq module
>>> import heapq
>>> from operator import itemgetter
>>> heapq.nlargest(3, dct.items(), key=itemgetter(1))
[('e', 5), ('d', 4), ('c', 3)]
>>> map(itemgetter(0), _)
['e', 'd', 'c']
In code
dct = {"a": 1, "b": 2, "c": 3, "d": 4, "e": 5}
k = 3
print sorted(dct.keys(), reverse=True)[:k]
If you also need values:
print sorted(dct.items(), reverse=True)[:k]
Or if you would want to use OrderedDict:
from collections import OrderedDict
d = OrderedDict(sorted(dct.items(), reverse=True))
print d.keys()[:k]
so if you want top K frequent Elements to be printed from the Dictionary; you have to use heapq.nlargest funtcion.
Here is the example for the same:
return heapq.nlargest(k,count.keys(), key = count.get)
Here, k is the number that helps us find out elements which are repeated in a dictionary k times or more than k times.
count.keys() : This gives you the keys or the elements present in the heap which is created using the collections.counter
key = count.get() : This is used to print the Keys of the heap. If we skip this; it will print the Values of the dictionary i.e. the number of times the element is occurring in the dictionary.

Categories