Fast dictionary population with list of keys - python

d = {} # or d = defaultdict(int)
list_of_lists = [[9, 7, 5, 3, 1], [2, 1, 3, 2, 5, 3, 7], [3, 5, 8, 1]]
for lst in list_of_lists:
for key in lst:
try:
d[key] += 1
except:
d[key] = 1
Is there a way to perform this operation without the for-loops?

Using a collections.Counter() object and a generator expression:
from collections import Counter
d = Counter(i for nested in list_of_lists for i in nested)
or replacing the generator expression with itertools.chain.from_iterable():
from itertools import chain
d = Counter(chain.from_iterable(list_of_lists))
Demo:
>>> from collections import Counter
>>> from itertools import chain
>>> list_of_lists = [[9, 7, 5, 3, 1], [2, 1, 3, 2, 5, 3, 7], [3, 5, 8, 1]]
>>> Counter(i for nested in list_of_lists for i in nested)
Counter({3: 4, 1: 3, 5: 3, 2: 2, 7: 2, 8: 1, 9: 1})
>>> Counter(chain.from_iterable(list_of_lists))
Counter({3: 4, 1: 3, 5: 3, 2: 2, 7: 2, 8: 1, 9: 1})

My understanding is that you want to count the frequency of each integer in your list of lists.
You can do this with numpy.bincount. The actual counting is very fast, as the core of numpy is C++. Some work needs to be done to get the data in the dictionary format -- you could potentially just use the numpy.array generated by this. The majority of this code is just converting from different formats, which you could do away with if your application allows.
list_of_lists = [[9, 7, 5, 3, 1], [2, 1, 3, 2, 5, 3, 7], [3, 5, 8, 1]]
import numpy as np
x = sum(list_of_lists, []) #convert your list of lists to a flat list
y = np.bincount(x) #count frequency of each element
#convert to dict
d = {}
ctr = 0
while ctr < len(y):
d[ctr] = y[ctr]
ctr += 1

If you are allergic to Counter (the right answer BTW), you can use setdefault:
d={}
for key in (e for sl in list_of_lists for e in sl):
d[key] = d.setdefault(key,0) + 1

Related

Dict from two lists including multiple values for keys

Is there a possibility to create a dict from two lists with same key occurring multiple times without iterating over the whole dataset?
Minimal example:
keys = [1, 2, 3, 2, 3, 4, 5, 1]
values = [1, 2, 3, 4, 5, 6, 7, 8]
# hoped for result:
dictionary = dict(???)
dictionary = {1 : [1,8], 2:[2,4], 3:[3,5], 4:[6], 5:[7]}
When using zip the key-value-pair is inserted overwriting the old one:
dictionary = dict(zip(keys,values))
dictionary = {1: 8, 2: 4, 3: 5, 4: 6, 5: 7}
I would be happy with a Multidict as well.
This is one approach that doesn't require 2 for loops
h = defaultdict(list)
for k, v in zip(keys, values):
h[k].append(v)
print(h)
# defaultdict(<class 'list'>, {1: [1, 8], 2: [2, 4], 3: [3, 5], 4: [6], 5: [7]})
print(dict(h))
# {1: [1, 8], 2: [2, 4], 3: [3, 5], 4: [6], 5: [7]}
This is the only one-liner I could do.
dictionary = {k: [values[i] for i in [j for j, x in enumerate(keys) if x == k]] for k in set(keys)}
It is far from readable. Remember that clear code is always better than pseudo-clever code ;)
Here is an example that I think is easy to follow logically. Unfortunately it does not use zip like you would prefer, nor does it avoid iterating, because a task like this has to involve iterating In some form.
# Your data
keys = [1, 2, 3, 2, 3, 4, 5, 1]
values = [1, 2, 3, 4, 5, 6, 7, 8]
# Make result dict
result = {}
for x in range(1, max(keys)+1):
result[x] = []
# Populate result dict
for index, num in enumerate(keys):
result[num].append(values[index])
# Print result
print(result)
If you know the range of values in the keys array, you could make this faster by providing the results dictionary as a literal with integer keys and empty list values.

How to generate a list with repeating key from a dictionary?

I have a dictionary
a_dict = {1: 1, 4: 2, 5: 3, 6: 4}
I want to create a list such that the dict key appears value number of times:
a_list = [1, 4, 4, 5, 5, 5, 6, 6, 6, 6]
My current code is like this:
a_list = []
for key in a_dict.keys():
for value in a_dict.values():
I do not know what to do next?
This can be done in a concise way using a list comprehension with nested for loops:
>>> d = {1: 1, 4: 2, 5: 3, 6: 4}
>>> [k for k, v in d.items() for _ in range(v)]
[1, 4, 4, 5, 5, 5, 6, 6, 6, 6]
However, please note that dict is an unordered data structure and therefore the order of keys in the resulting list is arbitrary.
May I ask for which purpose you want to use the resulting list? Maybe there is a better way of solving the actual problem.
How about this?
a={1: 1, 4: 2, 5: 3, 6: 4}
list=[]
for key, value in a.items():
list.extend([key] * value)
print list
A rather ugly list comprehension:
[vals for tuplei in d.items() for vals in [tuplei[0]] * tuplei[1]]
yields
[1, 4, 4, 5, 5, 5, 6, 6, 6, 6]
Slightly more readable (resulting in the same output):
[vals for (keyi, vali) in d.items() for vals in [keyi] * vali]
An itertools solution:
import itertools
list(itertools.chain.from_iterable([[k]*v for k, v in d.items()]))
will also give
[1, 4, 4, 5, 5, 5, 6, 6, 6, 6]
Short explanation:
[[k]*v for k, v in d.items()]
creates
[[1], [4, 4], [5, 5, 5], [6, 6, 6, 6]]
which is then flattened.
You are not mssing much!
a_dict = {1: 1, 4: 2, 5: 3, 6: 4}
a_list = []
for key, value in a_dict.items():
a_list.extend([key]*value)
print(a_list)
dict = {1: 1, 4: 2, 5: 3, 6: 4}
list=[]
for key, value in dict.items():
i = 0
while i < value:
list.append(key)
i+=1
print(list)
Should do the trick

Most pythonic way to initialize a dict

Suppose I have a dict like this
d = {
1: [1,4,7],
2: [2,5,8],
0: [3,6,9]
}
It can be constructed by
d = {}
for i in range(1,10):
key = i % 3
if key not in d: d[key] = []
d[key].append(i)
I used this line if key not in d: d[key] = [] to check existence of the key/value pair in the dict and initiate the pair.
Is there a more pythonic way to achieve this?
This is probably best handled with a defaultdict, which will automatically create any key-value mapping that is accessed if it doesn't already exist. You pass a callable to the defaultdict constructor that will be used to initialize the value. For example:
>>> from collections import defaultdict
>>> d = defaultdict(list)
>>> d
defaultdict(list, {})
>>> d[3]
[]
>>> d
defaultdict(list, {3: []})
Using a comprehension:
>>> {n%3: list(range(n, n+7, 3)) for n in range(1,4)}
{0: [3, 6, 9], 1: [1, 4, 7], 2: [2, 5, 8]}
Using dict.setdefault():
>>> d = {}
>>> for i in range(1, 10):
... d.setdefault(i%3, []).append(i)
...
>>> d
{0: [3, 6, 9], 1: [1, 4, 7], 2: [2, 5, 8]}
Using defaultdict:
>>> from collections import defaultdict
>>> d = defaultdict(list)
>>> for i in range(1, 10):
... d[i%3].append(i)
...
>>> d
defaultdict(<class 'list'>, {0: [3, 6, 9], 1: [1, 4, 7], 2: [2, 5, 8]})
from collections import defaultdict
d = defaultdict(list)
for i in range(1,10):
key = i % 3
d[key].append(i)
print(d)
out:
defaultdict(<class 'list'>, {0: [3, 6, 9], 1: [1, 4, 7], 2: [2, 5, 8]})
When each key is encountered for the first time, it is not already in
the mapping; so an entry is automatically created using the
default_factory function which returns an empty list. The
list.append() operation then attaches the value to the new list. When
keys are encountered again, the look-up proceeds normally (returning
the list for that key) and the list.append() operation adds another
value to the list. This technique is simpler and faster than an
equivalent technique using dict.setdefault():
>>> d = {}
>>> for k, v in s:
d.setdefault(k, []).append(v)
You can use list slices [start:stop:step] nominclature
d={}
for i in range(3):
d[i] = list(range(1,10))[(i+2)%3::3]
{0: [3, 6, 9],
1: [1, 4, 7],
2: [2, 5, 8]}
Given that you haven't given any input nor variable parts you might just initialize it with the literal you already have:
d = {1: [1,4,7],
2: [2,5,8],
0: [3,6,9]}
If you have variable input you may use collections.defaultdict with list as factory. Given that this operation is very common several external libraries have functions for this:
iteration_utilities.groupedby
toolz.groupby
For example:
>>> from iteration_utilities import groupedby
>>> groupedby(range(1, 10), lambda x: x % 3)
{0: [3, 6, 9], 1: [1, 4, 7], 2: [2, 5, 8]}
or:
>>> from toolz import groupby
>>> groupby(lambda x: x % 3, range(1, 10))
{0: [3, 6, 9], 1: [1, 4, 7], 2: [2, 5, 8]}

Cumulative occurrence count

I have a number of lists that have 2 dimensions and I need to get
cumulative count elements:
a=[1,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4]
b=[1,1,1,2,2,2,3,3,3,4]
c=[1,2,2,2,3,4]
c=[]
for i in a:
for x,y in enumerate(c):
print i
if y[0]==i:
y[1]+=1
else:
c.append([i,1])
I need to obtain:
[[1,9],[2,10],[3,7]...]
You can use Counter:
from collections import Counter
a=[1,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4]
b=[1,1,1,2,2,2,3,3,3,4]
c=[1,2,2,2,3,4]
res = Counter()
for x in (a, b, c):
res.update(x)
print res # Counter({2: 10, 1: 9, 3: 8, 4: 5})
If you need the result as list instead of dict you can then sort it to a list:
print sorted(res.iteritems()) # [(1, 9), (2, 10), (3, 8), (4, 5)]
Another solution, only for Python 3.5+:
>>> from collections import Counter
>>> Counter([*a, *b, *c])
Counter({2: 10, 1: 9, 3: 8, 4: 5})
If you really need the list of lists format, the conversion goes like this:
>>> [list(x) for x in Counter([*a, *b, *c]).items()]
[[1, 9], [2, 10], [3, 8], [4, 5]]
You should probably use counter and itertools:
from collections import Counter
import itertools
a = [1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4]
b = [1, 1, 1, 2, 2, 2, 3, 3, 3, 4]
c = [1, 2, 2, 2, 3, 4]
# this will iterate through the 3 lists in sequence, and count the number of occurrences of each element
res = Counter(itertools.chain(a, b, c))
[[key, value] for key, value in res.items()]
Result:
[[1, 9], [2, 10], [3, 8], [4, 5]]
There are two ways to do it.
from operator import add
from functools import reduce
from collections import Counter
reduce(add, (Counter(l) for l in (a,b,c)))
# Counter({1: 9, 2: 10, 3: 8, 4: 5})
from collections import defaultdict
d = defaultdict(int)
for l in (a,b,c):
for i in l:
d[i] += 1
# defaultdict(int, {1: 9, 2: 10, 3: 8, 4: 5})

Sort list by frequency

Is there any way in Python, wherein I can sort a list by its frequency?
For example,
[1,2,3,4,3,3,3,6,7,1,1,9,3,2]
the above list would be sorted in the order of the frequency of its values to create the following list, where the item with the greatest frequency is placed at the front:
[3,3,3,3,3,1,1,1,2,2,4,6,7,9]
I think this would be a good job for a collections.Counter:
counts = collections.Counter(lst)
new_list = sorted(lst, key=lambda x: -counts[x])
Alternatively, you could write the second line without a lambda:
counts = collections.Counter(lst)
new_list = sorted(lst, key=counts.get, reverse=True)
If you have multiple elements with the same frequency and you care that those remain grouped, we can do that by changing our sort key to include not only the counts, but also the value:
counts = collections.Counter(lst)
new_list = sorted(lst, key=lambda x: (counts[x], x), reverse=True)
l = [1,2,3,4,3,3,3,6,7,1,1,9,3,2]
print sorted(l,key=l.count,reverse=True)
[3, 3, 3, 3, 3, 1, 1, 1, 2, 2, 4, 6, 7, 9]
You can use a Counter to get the count of each item, use its most_common method to get it in sorted order, then use a list comprehension to expand again
>>> lst = [1,2,3,4,3,3,3,6,7,1,1,9,3,2]
>>>
>>> from collections import Counter
>>> [n for n,count in Counter(lst).most_common() for i in range(count)]
[3, 3, 3, 3, 3, 1, 1, 1, 2, 2, 4, 6, 7, 9]
In case you want to use a double comparator.
For example: Sort the list by frequency in descending order and in case of a clash the smaller one comes first.
import collections
def frequency_sort(a):
f = collections.Counter(a)
a.sort(key = lambda x:(-f[x], x))
return a
Was practising this one for fun. This solution use less time complexity.
from collections import defaultdict
lis = [1,2,3,4,3,3,3,6,7,1,1,9,3,2]
dic = defaultdict(int)
for num in lis:
dic[num] += 1
s_list = sorted(dic, key=dic.__getitem__, reverse=True)
new_list = []
for num in s_list:
for rep in range(dic[num]):
new_list.append(num)
print(new_list)
def orderByFrequency(list):
listUniqueValues = np.unique(list)
listQty = []
listOrderedByFrequency = []
for i in range(len(listUniqueValues)):
listQty.append(list.count(listUniqueValues[i]))
for i in range(len(listQty)):
index_bigger = np.argmax(listQty)
for j in range(listQty[index_bigger]):
listOrderedByFrequency.append(listUniqueValues[index_bigger])
listQty[index_bigger] = -1
return listOrderedByFrequency
#tests:
print(orderByFrequency([1,2,3,4,3,3,3,6,7,1,1,9,3,2]))
print(orderByFrequency([1,2,2]))
print(orderByFrequency([1,2,1,2]))
print(orderByFrequency([2,1,2,1]))
print(orderByFrequency([3,3,3,4,4,4,4,1,5,5,5,5,5,2,2]))
print(orderByFrequency([3,3,3,6,6,6,4,4,4,4,1,6,6,5,5,5,5,5,2,2]))
print(orderByFrequency([10,20,30,30,30,40,40,50,50,50]))
results:
[3, 3, 3, 3, 3, 1, 1, 1, 2, 2, 4, 6, 7, 9]
[2, 2, 1]
[1, 1, 2, 2]
[1, 1, 2, 2]
[5, 5, 5, 5, 5, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1]
[5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1]
[30, 30, 30, 50, 50, 50, 40, 40, 10, 20]
from collections import Counter
a = [2, 5, 2, 6, -1, 9999999, 5, 8, 8, 8]
count = Counter(a)
a = []
while len(count) > 0:
c = count.most_common(1)
for i in range(c[0][1]):
a.append(c[0][0])
del count[c[0][0]]
print(a)
You can use below methods. It is written in simple python.
def frequencyIdentification(numArray):
frequency = dict({});
for i in numArray:
if i in frequency.keys():
frequency[i]=frequency[i]+1;
else:
frequency[i]=1;
return frequency;
def sortArrayBasedOnFrequency(numArray):
sortedNumArray = []
frequency = frequencyIdentification(numArray);
frequencyOrder = sorted(frequency, key=frequency.get);
loop = 0;
while len(frequencyOrder) > 0:
num = frequencyOrder.pop()
count = frequency[num];
loop = loop+1;
while count>0:
loop = loop+1;
sortedNumArray.append(num);
count=count-1;
print("loop count");
print(loop);
return sortedNumArray;
a=[1, 2, 3, 4, 3, 3, 3, 6, 7, 1, 1, 9, 3, 2]
print(a);
print("sorted array based on frequency of the number");
print(sortArrayBasedOnFrequency(a));

Categories