Sort list by frequency - python

Is there any way in Python, wherein I can sort a list by its frequency?
For example,
[1,2,3,4,3,3,3,6,7,1,1,9,3,2]
the above list would be sorted in the order of the frequency of its values to create the following list, where the item with the greatest frequency is placed at the front:
[3,3,3,3,3,1,1,1,2,2,4,6,7,9]

I think this would be a good job for a collections.Counter:
counts = collections.Counter(lst)
new_list = sorted(lst, key=lambda x: -counts[x])
Alternatively, you could write the second line without a lambda:
counts = collections.Counter(lst)
new_list = sorted(lst, key=counts.get, reverse=True)
If you have multiple elements with the same frequency and you care that those remain grouped, we can do that by changing our sort key to include not only the counts, but also the value:
counts = collections.Counter(lst)
new_list = sorted(lst, key=lambda x: (counts[x], x), reverse=True)

l = [1,2,3,4,3,3,3,6,7,1,1,9,3,2]
print sorted(l,key=l.count,reverse=True)
[3, 3, 3, 3, 3, 1, 1, 1, 2, 2, 4, 6, 7, 9]

You can use a Counter to get the count of each item, use its most_common method to get it in sorted order, then use a list comprehension to expand again
>>> lst = [1,2,3,4,3,3,3,6,7,1,1,9,3,2]
>>>
>>> from collections import Counter
>>> [n for n,count in Counter(lst).most_common() for i in range(count)]
[3, 3, 3, 3, 3, 1, 1, 1, 2, 2, 4, 6, 7, 9]

In case you want to use a double comparator.
For example: Sort the list by frequency in descending order and in case of a clash the smaller one comes first.
import collections
def frequency_sort(a):
f = collections.Counter(a)
a.sort(key = lambda x:(-f[x], x))
return a

Was practising this one for fun. This solution use less time complexity.
from collections import defaultdict
lis = [1,2,3,4,3,3,3,6,7,1,1,9,3,2]
dic = defaultdict(int)
for num in lis:
dic[num] += 1
s_list = sorted(dic, key=dic.__getitem__, reverse=True)
new_list = []
for num in s_list:
for rep in range(dic[num]):
new_list.append(num)
print(new_list)

def orderByFrequency(list):
listUniqueValues = np.unique(list)
listQty = []
listOrderedByFrequency = []
for i in range(len(listUniqueValues)):
listQty.append(list.count(listUniqueValues[i]))
for i in range(len(listQty)):
index_bigger = np.argmax(listQty)
for j in range(listQty[index_bigger]):
listOrderedByFrequency.append(listUniqueValues[index_bigger])
listQty[index_bigger] = -1
return listOrderedByFrequency
#tests:
print(orderByFrequency([1,2,3,4,3,3,3,6,7,1,1,9,3,2]))
print(orderByFrequency([1,2,2]))
print(orderByFrequency([1,2,1,2]))
print(orderByFrequency([2,1,2,1]))
print(orderByFrequency([3,3,3,4,4,4,4,1,5,5,5,5,5,2,2]))
print(orderByFrequency([3,3,3,6,6,6,4,4,4,4,1,6,6,5,5,5,5,5,2,2]))
print(orderByFrequency([10,20,30,30,30,40,40,50,50,50]))
results:
[3, 3, 3, 3, 3, 1, 1, 1, 2, 2, 4, 6, 7, 9]
[2, 2, 1]
[1, 1, 2, 2]
[1, 1, 2, 2]
[5, 5, 5, 5, 5, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1]
[5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1]
[30, 30, 30, 50, 50, 50, 40, 40, 10, 20]

from collections import Counter
a = [2, 5, 2, 6, -1, 9999999, 5, 8, 8, 8]
count = Counter(a)
a = []
while len(count) > 0:
c = count.most_common(1)
for i in range(c[0][1]):
a.append(c[0][0])
del count[c[0][0]]
print(a)

You can use below methods. It is written in simple python.
def frequencyIdentification(numArray):
frequency = dict({});
for i in numArray:
if i in frequency.keys():
frequency[i]=frequency[i]+1;
else:
frequency[i]=1;
return frequency;
def sortArrayBasedOnFrequency(numArray):
sortedNumArray = []
frequency = frequencyIdentification(numArray);
frequencyOrder = sorted(frequency, key=frequency.get);
loop = 0;
while len(frequencyOrder) > 0:
num = frequencyOrder.pop()
count = frequency[num];
loop = loop+1;
while count>0:
loop = loop+1;
sortedNumArray.append(num);
count=count-1;
print("loop count");
print(loop);
return sortedNumArray;
a=[1, 2, 3, 4, 3, 3, 3, 6, 7, 1, 1, 9, 3, 2]
print(a);
print("sorted array based on frequency of the number");
print(sortArrayBasedOnFrequency(a));

Related

How can I use a different separators in join() in Python

E.g., given a list nums = [1, 2, 3, 4, 5, 6, 7, 8] I want to join each pair by a semicolon and each number within that pair by a comma to receive the following string: result = 1,2;3,4;5,6;7,8.
How can I do it without for loop (e.g., using a join)?
Using slicing
Ex:
nums = [1, 2, 3, 4, 5, 6, 7, 8]
nums = [str(i) for i in nums]
print( ";".join(",".join(nums[i:i+2]) for i in range(0, len(nums), 2)) )
Output:
1,2;3,4;5,6;7,8
You could also cycle through the separators:
from itertools import cycle, chain
nums = [1, 2, 3, 4, 5, 6, 7, 8]
nums = [str(n) for n in nums]
''.join(chain.from_iterable(zip(nums, cycle(',;'))))[:-1]
# '1,2;3,4;5,6;7,8'
Try this :
result = ";".join([",".join([str(i) for i in nums[k:k+2] ]) for k in range(len(nums)-2)])
OUTPUT :
'1,2;2,3;3,4;4,5;5,6;6,7'

How can I find n smallest numbers without changing the order of the first list?

I intend to get the n smallest numbers in a list but keep the numbers in the same order they appear in the list. For example:
This is my list:
A = [1, 3, 4, 6, 7, 6, 8, 7, 2, 6, 8, 7, 0]
I like to get the first three lowest numbers as it has been ordered in the first list:
[1, 2, 0]
I do not want to sort the result as:
[0, 1, 2]
I have tried:
heapq.nsmallest(3,A)
but i wonder if it is possible to retain this list as:[1, 2, 0]
By the way, I'm not a Python coder so thanks for the help in advance.
You can try this:
new_a = []
A=[1, 3, 4, 6, 7, 6, 8, 7, 2, 6, 8, 7, 0]
for a in A:
if a not in new_a:
new_a.append(a)
new_a = [i for i in new_a if i in sorted(new_a)[:3]]
Output:
[1, 2, 0]
You could use heapq.nsmallest() to get the n smallest elements from the list. Then use collections.Counter to create a multiset from that list which you can use to check which elements from the original list to include in the result, e.g.
>>> from heapq import nsmallest
>>> from collections import Counter
>>> A = [1, 3, 4, 6, 7, 6, 8, 7, 2, 6, 8, 7, 0]
>>> n = 3
>>> c = Counter(nsmallest(n, A))
>>> result = []
>>> for elem in A:
... if c.get(elem, 0):
... result.append(elem)
... c[elem] -= 1
...
>>> result
[1, 2, 0]

I want to remove duplicate from list (without for loop)

Here I have a list
a = [1, 2, 1, 4, 5, 7, 8, 4, 6]
Now I want a following output but without for loop.
Remove all the duplicate from the list.
[2, 5, 7, 8, 6]
output list contain only single occurrence number
Given: a = [1, 2, 1, 4, 5, 7, 8, 4, 6]
One liner:
b = [x for x in a if a.count(x) == 1]
You can use a Counter and a conditional list comprehension or filter in order to maintain the original order:
from collections import Counter
c = Counter(a)
clean_a = filter(lambda x: c[x] == 1, a) # avoids 'for' ;-)
# clean_a = list(filter(lambda x: c[x] == 1, a)) # Python3, if you need a list
# clean_a = [x for x in a if c[a] == 1] # would be my choice
This is a very simple and inefficient implementation.
We use a while loop to access every element of a. In the loop we check if the current element appears only once in the list. If yes, we add it to a new list.
a = [1, 2, 1, 4, 5, 7, 8, 4, 6]
index = 0
result = []
while index < len(a):
if a.count(a[index]) == 1:
result.append(a[index])
index += 1
print(result)
def cleaner(LIST, pos):
if len(LIST)>pos:
if LIST[pos] in LIST[pos+1:]:
LIST.pop(pos)
# OR
# LIST.remove(LIST[pos])
cleaner(LIST, pos)
else:
pos+=1
cleaner(LIST, pos)
return LIST
LIST = [1, 2, 1, 4, 5, 7, 8, 4, 6]
print(cleaner(LIST, 0))

Fast dictionary population with list of keys

d = {} # or d = defaultdict(int)
list_of_lists = [[9, 7, 5, 3, 1], [2, 1, 3, 2, 5, 3, 7], [3, 5, 8, 1]]
for lst in list_of_lists:
for key in lst:
try:
d[key] += 1
except:
d[key] = 1
Is there a way to perform this operation without the for-loops?
Using a collections.Counter() object and a generator expression:
from collections import Counter
d = Counter(i for nested in list_of_lists for i in nested)
or replacing the generator expression with itertools.chain.from_iterable():
from itertools import chain
d = Counter(chain.from_iterable(list_of_lists))
Demo:
>>> from collections import Counter
>>> from itertools import chain
>>> list_of_lists = [[9, 7, 5, 3, 1], [2, 1, 3, 2, 5, 3, 7], [3, 5, 8, 1]]
>>> Counter(i for nested in list_of_lists for i in nested)
Counter({3: 4, 1: 3, 5: 3, 2: 2, 7: 2, 8: 1, 9: 1})
>>> Counter(chain.from_iterable(list_of_lists))
Counter({3: 4, 1: 3, 5: 3, 2: 2, 7: 2, 8: 1, 9: 1})
My understanding is that you want to count the frequency of each integer in your list of lists.
You can do this with numpy.bincount. The actual counting is very fast, as the core of numpy is C++. Some work needs to be done to get the data in the dictionary format -- you could potentially just use the numpy.array generated by this. The majority of this code is just converting from different formats, which you could do away with if your application allows.
list_of_lists = [[9, 7, 5, 3, 1], [2, 1, 3, 2, 5, 3, 7], [3, 5, 8, 1]]
import numpy as np
x = sum(list_of_lists, []) #convert your list of lists to a flat list
y = np.bincount(x) #count frequency of each element
#convert to dict
d = {}
ctr = 0
while ctr < len(y):
d[ctr] = y[ctr]
ctr += 1
If you are allergic to Counter (the right answer BTW), you can use setdefault:
d={}
for key in (e for sl in list_of_lists for e in sl):
d[key] = d.setdefault(key,0) + 1

Python list: exchange every n-th value with the (n+1)th

What is the best way to do this:
>>> replace2([1, 2, 3, 4, 5, 6])
[2, 1, 4, 3, 6, 5]
def replace2inplace(lst):
lst[1::2], lst[::2] = lst[::2], lst[1::2]
This uses slice assignment and slice step sizes to swap every pair in the list around, in-place:
>>> somelst = [1, 2, 3, 4, 5, 6]
>>> replace2inplace(somelst)
>>> somelst
[2, 1, 4, 3, 6, 5]
Otherwise you could use some itertools tricks:
from itertools import izip, chain
def replace2copy(lst):
lst1, lst2 = tee(iter(lst), 2)
return list(chain.from_iterable(izip(lst[1::2], lst[::2])))
which gives:
>>> replace2([1, 2, 3, 4, 5, 6])
[2, 1, 4, 3, 6, 5]
with the list() call optional; if you only need to loop over the result the generator is enough:
from itertools import izip, chain, islice, tee
def replace2gen(lst):
lst1, lst2 = tee(iter(lst))
return chain.from_iterable(izip(islice(lst1, 1, None, 2), islice(lst2, None, None, 2)))
for i in replace2gen([1, 2, 3, 4, 5, 6]):
print i
where replace2gen() can take arbitrary iterators too.
Out-of-place version:
def replace2(lst):
return [x for pair in zip(lst[1::2], lst[::2]) for x in pair]
My choice:
x = range(1,7)
res = [e for e in itertools.chain(*zip(x[1::2],x[0::2]))]
>>> a = [1, 2, 3, 4, 5, 6]
>>> sum([[x+1,x] for x in a if x&1 == True],[])
[2, 1, 4, 3, 6, 5]
EDIT: Some further explanation was requested:
The code steps through each element in the list a and, if the element is odd (x&1 == True) it puts that element and the next element into a list in reverse order ([x+1,x]).
With out the sum(...,[]) function we would have
[[2, 1], [4, 3], [6, 5]]
The sum(...,[]) function removes the internal square brackets giving
[2, 1, 4, 3, 6, 5]
This can be done more generally by using the index of the list rather than its value:
>>> a = [1, 2, 3, 4, 5, 6]
>>> sum([[a[x],a[x-1]] for x in range(len(a)) if x&1 == True],[])
[2, 1, 4, 3, 6, 5]
However, this will remove the last element of the list if its length is not even.

Categories