Calculating element with most occurrences in list

Calculating element with most occurrences in list - python

I have
dict = {
0: ['9', '3', '3', '5', '1'],
1: ['9', '4', '1', '2'],
2: ['3', '4', '1', '5', '5'],
3: ['4', '4', '5', '5', '1'],
4: ['2', '8', '9', '5', '1']
}
take an example 0:['9', '3', '3', '5', '1'] here 3 has more number of occurance so iw ant to update enter list with only 3 so at index 0 becomes [3]
if every element has equal weight like 1: ['9', '4', '1', '2'] there will be no change
another ['4', '4', '5', '5', '1'] returns ['4','5']
i tried using collection.counter but donot know how to update the original dict with max repeated
for i,j in dictonary.items():
dictonary[i]=Counter(j)
expected output:
{0: ['3'], 1: ['9', '4', '1', '2'], 2: ['5'], 3: ['4'], 4: ['2', '8', '9', '5', '1']}
Edit: the list size may vary
[1,1,1,1,1,2,2,2,2,2,3,3,3]->[1,2]
[3,3,3,3,4,4,4,4,4,5,6,6,6,6,6]->[4,6]

from collections import *
d ={0: ['9', '3', '3', '5', '1'], 1: ['9', '4', '1', '2'], 2: ['3', '4', '1', '5', '5'], 3: ['4', '5', '0', '4', '3'], 4: ['2', '8', '9', '5', '1']}
for i,j in d.items():
c = Counter(j)
top = c.most_common(1)
if top[0][1] > 1:
d[i] = [ top[0][0] ]
print d
{0: ['3'], 1: ['9', '4', '1', '2'], 2: ['5'], 3: ['4'], 4: ['2', '8', '9', '5', '1']}
EDIT:
from collections import *
d = {
0: ['4', '4', '5', '5', '1'],
1: ['9', '4', '1', '2'],
2: ['3', '4', '1', '5', '5']
}
for i,j in d.items():
c = Counter(j)
result = []
for x in c:
if c[x] > 1:
result.append(x)
if result:
d[i] = result
print d
{0: ['5', '4'], 1: ['9', '4', '1', '2'], 2: ['5']}
EDIT:
from collections import *
d = {
0: ['4', '4', '5', '5', '1'],
1: [1,1,1,1,1,2,2,2,2,2,3,3,4],
2: ['3', '4', '1', '5', '5']
}
for i,j in d.items():
c = Counter(j)
longest = c.most_common(1)[0][1]
if longest > 1:
result = []
for x in c:
if c[x] == longest:
result.append(x)
d[i] = result
print d
{0: ['5', '4'], 1: [1, 2], 2: ['5']}

It appears you're looking for the mode of each element in your dictionary.
from collections import Counter
def mode(L):
result = []
for x in L:
if Counter(L)[x] == max(Counter(L).values()):
if x not in result: result.append(x)
return result
for item in dict:
dict[item] = mode(dict[item])

Related

How to add specific elements from list of lists?

Let's say I have the following list that contains other lists:
episodes = [
['1', '2', '3', '4', '5', '6', '7'],
['1', '2', '3', '4', '5', '1', '2', '3', '4', '5', '6'],
['1', '2', '3', '1', '2', '3', '4', '5', '6', '1', '2', '3', '4', '5', '6', '1', '2', '3', '4', '1', '2', '3']
]
Each list refer to episodes of TV-shows. For example, the first list has one season with 7 episodes, whereas the last list has five seasons with season one having 3 episodes, season 2 6 episodes and so on. I would like to save the total number of episodes for each TV-show by adding the total number of episodes for each season of the TV-show. In other words, first list is just taking the last element of that list, but the last list I have to add 3 & 6 & 6 & 4 & 3.
I hope you understand what I'm trying to do. I thought about using indexing and see where the element '1' lies in each list with more than one season so I can choose the element before (number of episodes of previous season) and so on. But it gets a bit tricky doing it for all other seasons as well.

if you just want the total number of episodes regardless of seasons just count the length of the each show list.
episodes = [
['1', '2', '3', '4', '5', '6', '7'],
['1', '2', '3', '4', '5', '1', '2', '3', '4', '5', '6'],
['1', '2', '3', '1', '2', '3', '4', '5', '6', '1', '2', '3', '4', '5', '6', '1', '2', '3', '4', '1', '2', '3']
]
print(*[len(show) for show in episodes], sep="\n")
OUTPUT
7
11
22

If I understand you correctly you can just sum up the length of each list, i.e. sum(map(len, episodes))?

I came up with something like this to check for number of episodes in each season:
episodes = [
['1', '2', '3', '4', '5', '6', '7', "8", "9", "10", "11"],
['1', '2', '3', '4', '5',
'1', '2', '3', '4', '5', '6'],
['1', '2', '3',
'1', '2', '3', '4', '5', '6',
'1', '2', '3', '4', '5', '6',
'1', '2', '3', '4',
'1', '2', '3']
]
def count_episodes(list_of_episodes):
all_episodes = [e for l in list_of_episodes for e in l]
previous_episode = 0
season = 1
seasons = {}
tmp_list = []
for i in all_episodes:
if previous_episode > int(i):
seasons["season "+str(season)] = tmp_list
tmp_list = []
season += 1
previous_episode = int(i)
tmp_list.append(i)
if len(tmp_list) > 0:
seasons["season "+str(season)] = tmp_list
for i in seasons:
print(i + " has {} episodes".format(len(seasons[i])))
count_episodes(episodes)
OUTPUT
season 1 has 11 episodes
season 2 has 5 episodes
season 3 has 6 episodes
season 4 has 3 episodes
season 5 has 6 episodes
season 6 has 6 episodes
season 7 has 4 episodes
season 8 has 3 episodes

simpler solution
At the end, it is sth as simple as:
list(map(len, episodes))
## [7, 11, 22]
If all episodes are listed always, then you can just count the total number of episodes - it will be always the same as if you determined the maximum numbers and summed them up.
slighlty more elaborate solution
from functools import reduce
def list_to_max_seasons(lst, first_element='1'):
return reduce(lambda l, x: l + [x] if x == first_element else l[:-1] + [x], lst, [])
def sum_max_episodes(lol):
return [(lambda l: sum([int(x) for x in l]))(list_to_max_seasons(il)) for il in lol]
Test by:
sum_max_episodes(episodes)
## [7, 11, 22]
more elaborate (recursive) solution
These solutions are not very pythonic, I admit.
But they are quite universal.
def list_to_seasons(l, acc=[], by='1', tmp=[]):
if l == []:
return acc+[tmp]
elif l[0] == by:
if tmp == []:
return list_to_seasons(l[1:], acc=[], by=by, tmp=[l[0]])
else:
return list_to_seasons(l[1:], acc=acc+[tmp], by=by, tmp=[l[0]])
else:
return list_to_seasons(l[1:], acc=acc, by=by, tmp=tmp+[l[0]])
list_to_seasons(['1','2','3','1','2','1','2','3','1','1'])
## [['1', '2', '3'], ['1', '2'], ['1', '2', '3'], ['1'], ['1']]
Or:
def list_to_max_seasons(l, acc=[], by='1', last=None):
if l == []:
return acc+[last]
elif l[0] == by:
if last is None:
return list_to_max_seasons(l[1:], acc=[], by=by, last=l[0])
else:
return list_to_max_seasons(l[1:], acc=acc+[last], by=by, last=l[0])
else:
return list_to_max_seasons(l[1:], acc=acc, by=by, last=l[0])
list_to_max_seasons(['1','2','3','1','2','1','2','3','1','1'])
## ['3', '2', '3', '1', '1']
Then:
def sum_max_episodes(lol):
return [(lambda l: sum([int(x) for x in l]))(list_to_max_seasons(il)) for il in lol]
Test by:
sum_max_episodes(episodes)
## [7, 11, 22]

Loop to gradually to remove elements to dictionary in Python

I have a dictionary that for example looks like this:
new_dict = {
0: ['1'],
1: ['1', '2'],
2: ['1', '2', '3'],
3: ['1', '2', '3', '4'],
4: ['1', '2', '3', '4', '5'],
5: ['1', '2', '3', '4', '5', '6']
}
the values are just gradually appended, meaning that the next value contains the elements of the previous value + its own value.
and my question is, how can I, starting from key=3 included, remove gradually all the starting values?
so for example, for key=3, after the code the new key=3 should look like this:
3: ['2', '3', '4'] #Should remove the value of key=0
then for key=4, the new key=4 should look like this:
4: ['3', '4', '5'] #Should remove the value of key=1
And so on til the end of new_dict.

Here is a relatively compact solution:
new_dict = {0: ['1'], 1: ['1', '2'], 2: ['1', '2', '3'], 3: ['1', '2', '3', '4'], 4: ['1', '2', '3', '4', '5'], 5: ['1', '2', '3', '4', '5', '6']}
up_dict={i:[item for item in new_dict[i] if item not in new_dict[i-3]] if i>2 else new_dict[i] for i in new_dict}
print(up_dict)
Output:
{0: ['1'], 1: ['1', '2'], 2: ['1', '2', '3'], 3: ['2', '3', '4'], 4: ['3', '4', '5'], 5: ['4', '5', '6']}

You can loop through the items and reassign the list to the last three elements:
In [1]: new_dict = {0: ['1'], 1: ['1', '2'], 2: ['1', '2', '3'], 3: ['1', '2', '3', '4'], 4: ['1', '2', '3', '4', '5'], 5: ['1', '2', '3', '4', '5', '6']}
In [2]: for k, v in new_dict.items():
...: if len(v) > 3:
...: new_dict[k] = v[-3:]
...:
In [3]: new_dict
Out[3]:
{0: ['1'],
1: ['1', '2'],
2: ['1', '2', '3'],
3: ['2', '3', '4'],
4: ['3', '4', '5'],
5: ['4', '5', '6']}
As a dict comprehension:
out_dict = {k: v[-3:] if len(v) > 3 else v for k, v in new_dict.items()}

You can use dictionary comprehension, and take the key value pair as it is if key<3 else take only the items that are not in the values for key at 0,1,2, and so on.
{k:new_dict[k] if idx<0 else [i
for i in new_dict[k] if i not in new_dict[idx]]
for idx,k in enumerate(new_dict, -3)}
OUTPUT:
{0: ['1'],
1: ['1', '2'],
2: ['1', '2', '3'],
3: ['2', '3', '4'],
4: ['3', '4', '5'],
5: ['4', '5', '6']}

This should do the job and be quite efficient as well, if that's important to you:
new_dict = {0: ['1'], 1: ['1', '2'], 2: ['1', '2', '3'], 3: ['1', '2', '3', '4'], 4: ['1', '2', '3', '4', '5'], 5: ['1', '2', '3', '4', '5', '6']}
print({key:(value,value[key-2:])[key//3] for key,value in new_dict.items()})
>>> {0: ['1'], 1: ['1', '2'], 2: ['1', '2', '3'], 3: ['2', '3', '4'], 4: ['3', '4', '5'], 5: ['4', '5', '6']}

Shorter comprehension-based solution:
new_dict = {0: ['1'], 1: ['1', '2'], 2: ['1', '2', '3'], 3: ['1', '2', '3', '4'], 4: ['1', '2', '3', '4', '5'], 5: ['1', '2', '3', '4', '5', '6']}
c = 0
r = {a:b if a < 3 else b[(c:=c+1):] for a, b in new_dict.items()}
Output:
{0: ['1'], 1: ['1', '2'], 2: ['1', '2', '3'], 3: ['2', '3', '4'], 4: ['3', '4', '5'], 5: ['4', '5', '6']}

How to count the number of separate regions in python array

I need to count the number of separate regions in an array.
An example array is below. I can count the number of unique strings but if there are two distinct islands like in the grid below where 64733 is in the upper left and bottom right my function won't properly count the number of regions.
Can anyone help me to find a way to count the number of regions if two of the regions are comprised of the same numbers? Im writing in python 2.x.
|64733|20996|92360|92360|04478|04478|04478|04478|04478|98101
|64733|92360|92360|92360|04478|04478|04478|04478|04478|04478
|64733|92360|29136|92360|04478|04478|04478|04478|04478|04478
|64733|92360|29136|92360|04478|04478|04478|04478|04478|04478
|92360|92360|92360|92360|04478|04478|04478|04478|04478|04478
|04478|04478|04478|04478|04478|04478|04478|04478|04478|04478
|04478|04478|04478|04478|04478|04478|04478|04478|04478|04478
|04478|04478|04478|04478|04478|04478|04478|04478|04478|04478
|04478|04478|04478|04478|04478|04478|04478|04478|04478|64773
|04478|04478|04478|04478|04478|04478|04478|04478|64773|64773
The input is a 3d array and looks like
[[['6', '4', '7', '3', '3'],
['2', '0', '9', '9', '6'],
['9', '2', '3', '6', '0'],
['9', '2', '3', '6', '0'],
['0', '4', '4', '7', '8'],
['0', '4', '4', '7', '8'],
['0', '4', '4', '7', '8'],
['0', '4', '4', '7', '8'],
['0', '4', '4', '7', '8'],
['9', '8', '1', '0', '1']],
[['6', '4', '7', '3', '3'],
['9', '2', '3', '6', '0'],
['9', '2', '3', '6', '0'],
['9', '2', '3', '6', '0'],
['0', '4', '4', '7', '8'],
['0', '4', '4', '7', '8'],
['0', '4', '4', '7', '8'],
['0', '4', '4', '7', '8'],
['0', '4', '4', '7', '8'],
['0', '4', '4', '7', '8']],
[['6', '4', '7', '3', '3'],
['9', '2', '3', '6', '0'],
This is not the complete input (because it was huge) but it gets the point across i think
So would be where any unique symbol string of numbers lies and the total number of adjacent strings that are identical (adjacent being up down left right, not diagonal).
enter code here

You didn't post your full array, so I made some assumptions about the original pip separated value you gave us. I've written this in Python 3, but I think it should work in Python 2 only modifying the print function calls to statements.
arr = [["64733","20996","92360","92360","04478","04478","04478","04478","04478","98101"],
["64733","92360","92360","92360","04478","04478","04478","04478","04478","04478"],
["64733","92360","29136","92360","04478","04478","04478","04478","04478","04478"],
["64733","92360","29136","92360","04478","04478","04478","04478","04478","04478"],
["92360","92360","92360","92360","04478","04478","04478","04478","04478","04478"],
["04478","04478","04478","04478","04478","04478","04478","04478","04478","04478"],
["04478","04478","04478","04478","04478","04478","04478","04478","04478","04478"],
["04478","04478","04478","04478","04478","04478","04478","04478","04478","04478"],
["04478","04478","04478","04478","04478","04478","04478","04478","04478","64773"],
["04478","04478","04478","04478","04478","04478","04478","04478","64773","64773"]]
x = len(arr)
y = len(arr[0])
#create a new array the same size as the original
regions = [[None for _ in range(y)] for _ in range(x)]
print(regions)
label = 0
queue = []
def check_neighbor(i, j, v):
if not regions[i][j] and arr[i][j] == v:
regions[i][j] = label
queue.insert(0, (i, j))
for i in range(x):
for j in range(y):
#don't check an already labelled region
if regions[i][j]: continue
label += 1 #new label
regions[i][j] = label
queue = [(i, j)]
v = arr[i][j]
#keep checking neighbours until we run out
while queue:
(X, Y) = queue.pop()
if X > 0:
check_neighbor(X-1, Y, v)
if X < x-1:
check_neighbor(X+1, Y, v)
if Y > 0:
check_neighbor(X, Y-1, v)
if Y < y-1:
check_neighbor(X, Y+1, v)
print(regions)
print(label) # this is the number of regions

Deleting one position to another position elements from list in python

I have a list like the one below and I would like to delete all entries between any word (inclusive) and the next '0' (exclusive).
So for example this list:
array = ['1', '1', '0', '3', '0', '2', 'Continue', '1', '5', '1', '4', '0', '7', 'test', '3', '6', '0']
should become:
['1', '1', '0', '3', '0', '2', '0', '7', '0']

You can also do it by using exclusively list comprehension:
array = ['1', '1', '0', '3', '0', '2', 'Continue', '1', '5', '1', '4', '0', '7', 'test', '3', '6', '0']
# Find indices of strings in list
alphaIndex = [i for i in range(len(array)) if any(k.isalpha() for k in array[i])]
# Find indices of first zero following each string
zeroIndex = [array.index('0',i) for i in alphaIndex]
# Create a list with indices to be `blacklisted`
zippedIndex = [k for i,j in zip(alphaIndex, zeroIndex) for k in range(i,j)]
# Filter the original list
array = [i for j,i in enumerate(array) if j not in zippedIndex]
print(array)
Output:
['1', '1', '0', '3', '0', '2', '0', '7', '0']

array = ['1', '1', '0', '3', '0', '2', 'Continue', '1', '5', '1', '4', '0', '7', 'test', '3', '6', '0']
res = []
skip = False #Flag to skip elements after a word
for i in array:
if not skip:
if i.isalpha(): #Check if element is alpha
skip = True
continue
else:
res.append(i)
else:
if i.isdigit(): #Check if element is digit
if i == '0':
res.append(i)
skip = False
print res
Output:
['1', '1', '0', '3', '0', '2', '0', '7', '0']

Kicking it old school -
array = ['1', '1', '0', '3', '0', '2', 'Continue', '1', '5', '1', '4', '0', '7', 'test', '3', '6', '0']
print(array)
array_op = []
i=0
while i < len(array):
if not array[i].isdigit():
i = array[i:].index('0')+i
continue
array_op.append(array[i])
i += 1
print(array_op)

Python - TypeError: unorderable types: str() < int()

I get this error when I try to run my code. Please help, and explain what it's trying to say!
This program is meant to find a persons ballot paper vote preferences (1st, 2nd, 3rd etc) for each candidate, and remove their first preference. That first preference is replaced by 99999, to make it easier for me.
Error:
ERROR CODE: minIndex = int(vote.index(min(vote)))
TypeError: unorderable types: str() < int()
Code:
def countVotes(voteList):
candidatePreferences = []
for vote in voteList:
minIndex = int(vote.index(min(vote)))
candidatePreferences.append(minIndex)
vote[minIndex] = 99999
return candidatePreferences
Value of vList (after calling countVotes function first time):
[[99999, '3', '4', '5', '2'], ['4', '2', '5', '3', 99999], [99999, '3', '2', '5', '4'], [99999, '2', '4', '3', '5'], [99999, '3', '4', '5', '2'], ['2', 99999, '3', '5', '4'], [99999, '3', '4', '5', '2'], ['3', '5', '2', '4', 99999], [99999, '4', '5', '2', '3'], ['5', 99999, '4', '3', '2'], ['3', '2', '5', '4', 99999], ['3', 99999, '2', '5', '4'], ['2', '5', 99999, '4', '3'], ['3', '2', 99999, '4', '5'], ['4', '5', '3', 99999, '2'], [99999, '5', '4', '3', '2'], [99999, '5', '3', '4', '2'], ['2', 99999, '4', '3', '5'], ['4', 99999, '2', '5', '3']]
Calling the function (second time):
cp = countVotes(vList)
minIndex = int(vote.index(min(vote)))
TypeError: unorderable types: str() < int()

Python cannot order str values and int values together.
In your example 99999 is an int value while all the other values are str values.

I think is this line minIndex = int(vote.index(min(vote)))
You are trying to find min on a string list. so what you can
def countVotes(voteList):
candidatePreferences = []
for vote in voteList:
vote = map(int, vote)
minIndex = int(vote.index(min(vote)))
candidatePreferences.append(minIndex)
vote[minIndex] = 99999
return candidatePreferences
After converting string list to Int list you can perform min operation.
Please check and let me know.

Try this code. it fixed the error:
def countVotes(voteList):
candidatePreferences = []
for vote in voteList:
minIndex = int(vote.index(min(vote)))
candidatePreferences.append(minIndex)
vote[minIndex] = '99999'
return candidatePreferences
vList = [['1', '3', '4', '5', '2'], ['4', '2', '5', '3', '1'], ['1', '3', '2', '5', '4'], ['1', '2', '4', '3', '5'], ['1', '3', '4', '5', '2'], ['2', '1', '3', '5', '4'], ['1', '3', '4', '5', '2'], ['3', '5', '2', '4', '1'], ['1', '4', '5', '2', '3'], ['5', '1', '4', '3', '2'], ['3', '2', '5', '4', '1'], ['3', '1', '2', '5', '4'], ['2', '5', '1', '4', '3'], ['3', '2', '1', '4', '5'], ['4', '5', '3', '1', '2'], ['1', '5', '4', '3', '2'], ['1', '5', '3', '4', '2'], ['2', '1', '4', '3', '5'], ['4', '1', '2', '5', '3']]
cp = countVotes(vList)
print(str(cp))
output:
[0, 4, 0, 0, 0, 1, 0, 4, 0, 1, 4, 1, 2, 2, 3, 0, 0, 1, 1]

Convert string elements to integer.You can use python map function to convert all the list elements to integer
def countVotes(voteList):
candidatePreferences = []
for vote in voteList:
vote = list(map(int, vote)) # add this line to the code to convert string elements to integer
minIndex = int(vote.index(min(vote)))
candidatePreferences.append(minIndex)
vote[minIndex] = 99999
return candidatePreferences

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Calculating element with most occurrences in list - python

It appears you're looking for the mode of each element in your dictionary. from collections import Counter def mode(L): result = [] for x in L: if Counter(L)[x] == max(Counter(L).values()): if x not in result: result.append(x) return result for item in dict: dict[item] = mode(dict[item])

Related

How to add specific elements from list of lists?

Loop to gradually to remove elements to dictionary in Python

How to count the number of separate regions in python array

Deleting one position to another position elements from list in python

Python - TypeError: unorderable types: str() < int()

Categories

Resources