Mode of a List - Python - python

I am trying to write my own function that finds the mode of a list but it sputters when there are multiple modes. Can someone help me in adding something to the function that deals with the case with multiple modes. Thanks in advance!
def ModeList(nums):
subscript = 0
while subscript < len(nums):
if nums.count(nums[subscript]) > nums.count(nums[subscript + 1]):
return "The mode is " + str( nums[subscript] ) + "."
else:
subscript += 1
print ModeList( [2,4,6,8,6,8] )

Easiest is to use a collections.Counter():
from collections import Counter
def ModeList(lst):
return Counter(lst).most_common(1)[0][0]
Demo:
>>> from collections import Counter
>>> def ModeList(lst):
... return Counter(lst).most_common(1)[0][0]
...
>>> ModeList( [2,4,6,8,6,8] )
8
Add in itertools.groupby() if you need all values:
from collections import Counter
from itertools import groupby
from operator import itemgetter
def ModeList(lst):
counts = Counter(lst)
grouped = groupby(counts.most_common(), itemgetter(1))
return [i[0] for i in next(grouped)[1]]
Demo:
>>> from collections import Counter
>>> from itertools import groupby
>>> from operator import itemgetter
>>>
>>> def ModeList(lst):
... counts = Counter(lst)
... grouped = groupby(counts.most_common(), itemgetter(1))
... return [i[0] for i in next(grouped)[1]]
...
>>> ModeList( [2,4,6,8,6,8] )
[8, 6]
Without importing, use a dictionary to track counts, then sort by value:
def ModeList(lst):
counts = {}
for item in lst:
counts[item] = counts.get(item, 0) + 1
return sorted(counts, key=counts.get, reverse=True)[0]
or for a list:
def ModeList(lst):
counts = {}
for item in lst:
counts[item] = counts.get(item, 0) + 1
bycount = sorted(counts, key=counts.get)
result = [bycount.pop()]
while counts and counts[bycount[-1]] == counts[result[0]]:
result.append(bycount.pop())
return result

Related

Count occurrences of char in a single string

string = input(" ")
count = string.count()
print(string + str(count))
Need to use a for loop to get the output: ll2a1m1a1
Use groupby from itertools
>>> from itertools import groupby
>>> s = 'llama'
>>> [[k, len(list(g))] for k, g in groupby(s)]
[['l', 2], ['a', 1], ['m', 1], ['a', 1]]
If you want exactly that output you asked, try the following, and as suggested by #DanielMesejo, use sum(1 for _ in g) instead of len(list(g)):
>>> from itertools import groupby
>>> s = 'llama'
>> groups = [[k, sum(1 for _ in g)] for k, g in groupby(s)]
>>> ''.join(f'{a * b}{b}' for a, b in groups)
'll2a1m1a1'
This works for any word you want, let's say the word is 'happen', so
>>> from itertools import groupby
>>> s = 'happen'
>> groups = [[k, sum(1 for _ in g)] for k, g in groupby(s)]
>>> ''.join(f'{a * b}{b}' for a, b in groups)
'h1a1pp2e1n1'
a more basic approach:
string = 'llama'
def get_count_str(s):
previous = s[0]
for c in s[1:]:
if c != previous:
yield f'{previous}{len(previous)}'
previous = c
else:
previous += c
# yield last
yield f'{previous}{len(previous)}'
print(*get_count_str(string ), sep='')
output:
ll2a1m1a1
Look bud, you gotta explain more, this loops through and counts how many times each letter and prints it out.
greeting = 'llama'
for i in range(0, len(greeting)):
#start count at 1 for original instance.
count = 1
for k in range(0, len(greeting)):
# check letters are not the same position letter.
if not k == i:
#check if letters match
if greeting[i] == greeting[k]:
count += 1
print(greeting[i] + str(count))

Filter a list of sets with specific criteria

I have a list of sets:
a = [{'foo','cpu','phone'},{'foo','mouse'}, {'dog','cat'}, {'cpu'}]
Expected outcome:
I want to look at each individual string, do a count and return everything x >= 2 in the original format:
a = [{'foo','cpu'}, {'foo'}, {'cpu'}]
Here's what I have so far but I'm stuck on the last part where I need to append the new list:
from collections import Counter
counter = Counter()
for a_set in a:
# Created a counter to count the occurrences a word
counter.update(a_set)
result = []
for a_set in a:
for word in a_set:
if counter[word] >= 2:
# Not sure how I should append my new set below.
result.append(a_set)
break
print(result)
You are just appending the original set. So you should create a new set with the words that occur at least twice.
result = []
for a_set in a:
new_set = {
word for word in a_set
if counter[word] >= 2
}
if new_set: # check if new set is not empty
result.append(new_set)
Instead, use the following short approach based on sets intersection:
from collections import Counter
a = [{'foo','cpu','phone'},{'foo','mouse'}, {'dog','cat'}, {'cpu'}]
c = Counter([i for s in a for i in s])
valid_keys = {k for k,v in c.items() if v >= 2}
res = [s & valid_keys for s in a if s & valid_keys]
print(res) # [{'cpu', 'foo'}, {'foo'}, {'cpu'}]
Here's what I ended up doing:
Build a counter then iterate over the original list of sets and filter items with <2 counts, then filter any empty sets:
from itertools import chain
from collections import Counter
a = [{'foo','cpu','phone'},{'foo','mouse'}, {'dog','cat'}, {'cpu'}]
c = Counter(chain.from_iterable(map(list, a)))
res = list(filter(None, ({item for item in s if c[item] >= 2} for s in a)))
print(res)
Out: [{'foo', 'cpu'}, {'foo'}, {'cpu'}]

Improving index search speed in a large (million entry) list

I have a list which is a million items long of random, repeatable integers. I need to sort that list, and then find the index of the first iteration of every unique element in the list. When I do this, I am running into run time >5 minutes long. Can anyone give me any suggestions to speed up my code? An example of my process is shown below.
import random
a = []
for x in range(1000000):
a.append(random.randint(1,10000))
unique_a = set(a)
inds=[0]
inds = [a.index(i) for i in sorted(unique_a) if i not in inds]
inds = [a.index(i) for i in sorted(unique_a) if i not in inds] is implicitly quadratic is a.index(i) is linear. Use a dictionary to grab the indices in one pass over the sorted list:
a =sorted([0,4,3,5,21,5,6,3,1,23,4,6,1,93,34,10])
unique_a = set(a)
first_inds = {}
for i,x in enumerate(a):
if not x in first_inds:
first_inds[x] = i
my_inds = [first_inds[x] for x in sorted(unique_a)]
Just store the first position for every unique element:
first_position = {}
for i, value in enumerate(a):
if value not in first_position:
first_position[value] = i
And then replace a.index(i) for first_position[i]
Or just use:
_, indices = zip(*sorted(first_position.items()))
You can use the bisect_left function from the standard library's bisect module to do this. On a sorted list, a bisection search is faster than searching through the list as index does.
>>> L = [random.randint(0, 10) for _ in range(100)]
>>> L.sort()
>>> L.index(9)
83
>>> bisect.bisect_left(L, 9)
83
>>> timeit.timeit(setup="from __main__ import L", stmt="L.index(9)")
2.1408978551626205
>>> timeit.timeit(setup="from __main__ import L;from bisect import bisect_left", stmt="bisect_left(L, 9)")
0.5187544231303036
On my machine, using bisect.bisect_left is faster than iterating over the list and accumulating indexes on the way:
>>> L = [random.randint(0, 100) for _ in range(10000)]
>>> L.sort()
>>> def iterative_approach(list_):
... unique = set(list_)
... first_inds = {}
... for i, x in enumerate(list_):
... if x not in first_inds:
... first_inds[x] = i
... return [first_inds[x] for x in sorted(unique)]
...
>>> ia = iterative_approach(L)
>>> bisect_left = bisect.bisect_left
>>> def bisect_approach(list_):
... unique = set(list_)
... out = {}
... for x in unique:
... out[x] = bisect_left(list_, x)
... return [out[x] for x in sorted(unique)]
...
>>> ba = bisect_approach(L)
>>> ia == ba
True
>>> timeit.timeit(setup="from __main__ import L, iterative_approach", stmt="iterative_approach(L)")
1488.956467495067
>>> timeit.timeit(setup="from __main__ import L, bisect_approach", stmt="bisect_approach(L)")
407.6803469741717

Max index of repeating elements divisible by 'n' in a List in Python

I have a sorted list of numbers like:
a = [77,98,99,100,101,102,198,199,200,200,278,299,300,300,300]
I need to find the max index of each values which is divisible by 100.
Output should be like: 4,10,15
My Code:
a = [77,98,99,100,101,102,198,199,200,200,278,299,300,300,300]
idx = 1
for i in (a):
if i%100 == 0:
print idx
idx = idx+1
Output of above code:
4
9
10
13
14
15
In case people are curious, I benchmarked the dict comprehension technique against the backward iteration technique. Dict comprehension is about twice the speed. Changing to OrderedDict resulted in MASSIVE slowdown. About 15x slower than the dict comprehension.
def test1():
a = [77,98,99,100,101,102,198,199,200,200,278,299,300,300,300]
max_index = {}
for i, item in enumerate(a[::-1]):
if item not in max_index:
max_index[item] = len(a) - (i + 1)
return max_index
def test2():
a = [77,98,99,100,101,102,198,199,200,200,278,299,300,300,300]
return {item: index for index, item in enumerate(a, 1)}
def test3():
a = [77,98,99,100,101,102,198,199,200,200,278,299,300,300,300]
OrderedDict((item, index) for index, item in enumerate(a, 1))
if __name__ == "__main__":
import timeit
print(timeit.timeit("test1()", setup="from __main__ import test1"))
print(timeit.timeit("test2()", setup="from __main__ import test2"))
print(timeit.timeit("test3()", setup="from __main__ import test3; from collections import OrderedDict"))
3.40622282028
1.97545695305
26.347012043
Use a simple dict-comprehension or OrderedDict with divisible items as the keys, old values will be replaced by newest values automatically.
>>> {item: index for index, item in enumerate(lst, 1) if not item % 100}.values()
dict_values([4, 10, 15])
# if order matters
>>> from collections import OrderedDict
>>> OrderedDict((item, index) for index, item in enumerate(lst, 1) if not item % 100).values()
odict_values([4, 10, 15])
Another way will be to loop over reversed list and use a set to keep track of items seen so far(lst[::-1] may be slightly faster than reversed(lst) for tiny lists).
>>> seen = set()
>>> [len(lst) - index for index, item in enumerate(reversed(lst))
if not item % 100 and item not in seen and not seen.add(item)][::-1]
[4, 10, 15]
You can see the sort-of equivalent code of the above here.
You could use itertools.groupby since your data is sorted:
>>> a = [77,98,99,100,101,102,198,199,200,200,278,299,300,300,300]
>>> from itertools import groupby
>>> [list(g)[-1][0] for k,g in groupby(enumerate(a), lambda t: (t[1] % 100, t[1])) if k[0] == 0]
[3, 9, 14]
Although this is a little cryptic.
Here's a complicated approach using only a list-iterator and accumulating into a list:
>>> run, prev, idx = False, None, []
>>> for i, e in enumerate(a):
... if not (e % 100 == 0):
... if not run:
... prev = e
... continue
... idx.append(i - 1)
... run = False
... else:
... if prev != e and run:
... idx.append(i - 1)
... run = True
... prev = e
...
>>> if run:
... idx.append(i)
...
>>> idx
[3, 9, 14]
I think this is best dealt with a dictionary approach like #AshwiniChaudhary It is more straightforward, and much faster:
>>> timeit.timeit("{item: index for index, item in enumerate(a, 1)}", "from __main__ import a")
1.842843743012054
>>> timeit.timeit("[list(g)[-1][0] for k,g in groupby(enumerate(a), lambda t: (t[1] % 100, t[1])) if k[0] == 0]", "from __main__ import a, groupby")
8.479677081981208
The groupby approach is pretty slow, note, the complicated approach is faster, and not far-off form the dict-comprehension approach:
>>> def complicated(a):
... run, prev, idx = False, None, []
... for i, e in enumerate(a):
... if not (e % 100 == 0):
... if not run:
... prev = e
... continue
... idx.append(i - 1)
... run = False
... else:
... if prev != e and run:
... idx.append(i - 1)
... run = True
... prev = e
... if run:
... idx.append(i)
... return idx
...
>>> timeit.timeit("complicated(a)", "from __main__ import a, complicated")
2.6667005629860796
Edit Note, the performance difference narrows if we call list on the dict-comprehension .values():
>>> timeit.timeit("list({item: index for index, item in enumerate(a, 1)}.values())", "from __main__ import a")
2.3839886570058297
>>> timeit.timeit("complicated(a)", "from __main__ import a, complicated")
2.708565960987471
it seemed like a good idea at the start, got a bit twisty, had to patch a couple of cases...
a = [0,77,98,99,100,101,102,198,199,200,200,278,299,300,300,300, 459, 700,700]
bz = [*zip(*((i, d//100) for i, d in enumerate(a) if d%100 == 0 and d != 0))]
[a for a, b, c in zip(*bz, bz[1][1:]) if c-b != 0] + [bz[0][-1]]
Out[78]: [4, 10, 15, 18]
enumerate, zip to create bz which mates 100's numerator(s) with indices
bz = [*zip(*((i, d//100) for i, d in enumerate(a) if d%100 == 0 and d != 0))]
print(*bz, sep='\n')
(4, 9, 10, 13, 14, 15, 17, 18)
(1, 2, 2, 3, 3, 3, 7, 7)
then zip again, zip(*bz, bz[1][1:]) lagging the numerator tuple to allow the lagged difference to give a selection logic if c-b != 0for the last index of each run but the last
add the last 100's match because its always the end of the last run + [bz[0][-1]]

Count how many times a part of a key appears in a dictionary python

I have the following dictionary and i want to count how many times keys appear, dictionary is very big.
a = { (1,2):3, (1,3):5, (2,1):6 }
and I want this result
1: 3 times
2: 2 times
3: 1 time
Use itertools.chain and a collections.Counter:
collections.Counter(itertools.chain(*a.keys()))
Alternatively:
collections.Counter(itertools.chain.from_iterable(a.keys()))
>>> from collections import Counter
>>> a = { (1,2):3, (1,3):5, (2,1):6 }
>>>
>>> Counter(j for k in a for j in k)
Counter({1: 3, 2: 2, 3: 1})
Use itertools and collections.defaultdict
In [43]: a={(1,2):3,(1,3):5,(2,1):6}
In [44]: counts = collections.defaultdict(int)
In [45]: for k in itertools.chain.from_iterable(a.keys()):
....: counts[k] += 1
....:
In [46]: for k in counts:
print k, ": %d times" %counts[k]
....:
1 : 3 times
2 : 2 times
3 : 1 times
from collections import Counter
items = Counter(val[2] for val in dic.values())
Hope that sorts it.
Using python 3.2
from collections import Counter
from itertools import chain
res = Counter(list(chain(*a)))
In Python:
import collections
s = collections.defaultdict(int)
for j, k in a.keys():
s[j] += 1
s[k] += 1
for x in s.keys():
print x + ": " + s[x] + " times"

Categories