Cumulative occurrence count - python

I have a number of lists that have 2 dimensions and I need to get
cumulative count elements:
a=[1,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4]
b=[1,1,1,2,2,2,3,3,3,4]
c=[1,2,2,2,3,4]
c=[]
for i in a:
for x,y in enumerate(c):
print i
if y[0]==i:
y[1]+=1
else:
c.append([i,1])
I need to obtain:
[[1,9],[2,10],[3,7]...]

You can use Counter:
from collections import Counter
a=[1,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4]
b=[1,1,1,2,2,2,3,3,3,4]
c=[1,2,2,2,3,4]
res = Counter()
for x in (a, b, c):
res.update(x)
print res # Counter({2: 10, 1: 9, 3: 8, 4: 5})
If you need the result as list instead of dict you can then sort it to a list:
print sorted(res.iteritems()) # [(1, 9), (2, 10), (3, 8), (4, 5)]

Another solution, only for Python 3.5+:
>>> from collections import Counter
>>> Counter([*a, *b, *c])
Counter({2: 10, 1: 9, 3: 8, 4: 5})
If you really need the list of lists format, the conversion goes like this:
>>> [list(x) for x in Counter([*a, *b, *c]).items()]
[[1, 9], [2, 10], [3, 8], [4, 5]]

You should probably use counter and itertools:
from collections import Counter
import itertools
a = [1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4]
b = [1, 1, 1, 2, 2, 2, 3, 3, 3, 4]
c = [1, 2, 2, 2, 3, 4]
# this will iterate through the 3 lists in sequence, and count the number of occurrences of each element
res = Counter(itertools.chain(a, b, c))
[[key, value] for key, value in res.items()]
Result:
[[1, 9], [2, 10], [3, 8], [4, 5]]

There are two ways to do it.
from operator import add
from functools import reduce
from collections import Counter
reduce(add, (Counter(l) for l in (a,b,c)))
# Counter({1: 9, 2: 10, 3: 8, 4: 5})
from collections import defaultdict
d = defaultdict(int)
for l in (a,b,c):
for i in l:
d[i] += 1
# defaultdict(int, {1: 9, 2: 10, 3: 8, 4: 5})

Related

How to generate a list with repeating key from a dictionary?

I have a dictionary
a_dict = {1: 1, 4: 2, 5: 3, 6: 4}
I want to create a list such that the dict key appears value number of times:
a_list = [1, 4, 4, 5, 5, 5, 6, 6, 6, 6]
My current code is like this:
a_list = []
for key in a_dict.keys():
for value in a_dict.values():
I do not know what to do next?
This can be done in a concise way using a list comprehension with nested for loops:
>>> d = {1: 1, 4: 2, 5: 3, 6: 4}
>>> [k for k, v in d.items() for _ in range(v)]
[1, 4, 4, 5, 5, 5, 6, 6, 6, 6]
However, please note that dict is an unordered data structure and therefore the order of keys in the resulting list is arbitrary.
May I ask for which purpose you want to use the resulting list? Maybe there is a better way of solving the actual problem.
How about this?
a={1: 1, 4: 2, 5: 3, 6: 4}
list=[]
for key, value in a.items():
list.extend([key] * value)
print list
A rather ugly list comprehension:
[vals for tuplei in d.items() for vals in [tuplei[0]] * tuplei[1]]
yields
[1, 4, 4, 5, 5, 5, 6, 6, 6, 6]
Slightly more readable (resulting in the same output):
[vals for (keyi, vali) in d.items() for vals in [keyi] * vali]
An itertools solution:
import itertools
list(itertools.chain.from_iterable([[k]*v for k, v in d.items()]))
will also give
[1, 4, 4, 5, 5, 5, 6, 6, 6, 6]
Short explanation:
[[k]*v for k, v in d.items()]
creates
[[1], [4, 4], [5, 5, 5], [6, 6, 6, 6]]
which is then flattened.
You are not mssing much!
a_dict = {1: 1, 4: 2, 5: 3, 6: 4}
a_list = []
for key, value in a_dict.items():
a_list.extend([key]*value)
print(a_list)
dict = {1: 1, 4: 2, 5: 3, 6: 4}
list=[]
for key, value in dict.items():
i = 0
while i < value:
list.append(key)
i+=1
print(list)
Should do the trick

Sort list by frequency

Is there any way in Python, wherein I can sort a list by its frequency?
For example,
[1,2,3,4,3,3,3,6,7,1,1,9,3,2]
the above list would be sorted in the order of the frequency of its values to create the following list, where the item with the greatest frequency is placed at the front:
[3,3,3,3,3,1,1,1,2,2,4,6,7,9]
I think this would be a good job for a collections.Counter:
counts = collections.Counter(lst)
new_list = sorted(lst, key=lambda x: -counts[x])
Alternatively, you could write the second line without a lambda:
counts = collections.Counter(lst)
new_list = sorted(lst, key=counts.get, reverse=True)
If you have multiple elements with the same frequency and you care that those remain grouped, we can do that by changing our sort key to include not only the counts, but also the value:
counts = collections.Counter(lst)
new_list = sorted(lst, key=lambda x: (counts[x], x), reverse=True)
l = [1,2,3,4,3,3,3,6,7,1,1,9,3,2]
print sorted(l,key=l.count,reverse=True)
[3, 3, 3, 3, 3, 1, 1, 1, 2, 2, 4, 6, 7, 9]
You can use a Counter to get the count of each item, use its most_common method to get it in sorted order, then use a list comprehension to expand again
>>> lst = [1,2,3,4,3,3,3,6,7,1,1,9,3,2]
>>>
>>> from collections import Counter
>>> [n for n,count in Counter(lst).most_common() for i in range(count)]
[3, 3, 3, 3, 3, 1, 1, 1, 2, 2, 4, 6, 7, 9]
In case you want to use a double comparator.
For example: Sort the list by frequency in descending order and in case of a clash the smaller one comes first.
import collections
def frequency_sort(a):
f = collections.Counter(a)
a.sort(key = lambda x:(-f[x], x))
return a
Was practising this one for fun. This solution use less time complexity.
from collections import defaultdict
lis = [1,2,3,4,3,3,3,6,7,1,1,9,3,2]
dic = defaultdict(int)
for num in lis:
dic[num] += 1
s_list = sorted(dic, key=dic.__getitem__, reverse=True)
new_list = []
for num in s_list:
for rep in range(dic[num]):
new_list.append(num)
print(new_list)
def orderByFrequency(list):
listUniqueValues = np.unique(list)
listQty = []
listOrderedByFrequency = []
for i in range(len(listUniqueValues)):
listQty.append(list.count(listUniqueValues[i]))
for i in range(len(listQty)):
index_bigger = np.argmax(listQty)
for j in range(listQty[index_bigger]):
listOrderedByFrequency.append(listUniqueValues[index_bigger])
listQty[index_bigger] = -1
return listOrderedByFrequency
#tests:
print(orderByFrequency([1,2,3,4,3,3,3,6,7,1,1,9,3,2]))
print(orderByFrequency([1,2,2]))
print(orderByFrequency([1,2,1,2]))
print(orderByFrequency([2,1,2,1]))
print(orderByFrequency([3,3,3,4,4,4,4,1,5,5,5,5,5,2,2]))
print(orderByFrequency([3,3,3,6,6,6,4,4,4,4,1,6,6,5,5,5,5,5,2,2]))
print(orderByFrequency([10,20,30,30,30,40,40,50,50,50]))
results:
[3, 3, 3, 3, 3, 1, 1, 1, 2, 2, 4, 6, 7, 9]
[2, 2, 1]
[1, 1, 2, 2]
[1, 1, 2, 2]
[5, 5, 5, 5, 5, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1]
[5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1]
[30, 30, 30, 50, 50, 50, 40, 40, 10, 20]
from collections import Counter
a = [2, 5, 2, 6, -1, 9999999, 5, 8, 8, 8]
count = Counter(a)
a = []
while len(count) > 0:
c = count.most_common(1)
for i in range(c[0][1]):
a.append(c[0][0])
del count[c[0][0]]
print(a)
You can use below methods. It is written in simple python.
def frequencyIdentification(numArray):
frequency = dict({});
for i in numArray:
if i in frequency.keys():
frequency[i]=frequency[i]+1;
else:
frequency[i]=1;
return frequency;
def sortArrayBasedOnFrequency(numArray):
sortedNumArray = []
frequency = frequencyIdentification(numArray);
frequencyOrder = sorted(frequency, key=frequency.get);
loop = 0;
while len(frequencyOrder) > 0:
num = frequencyOrder.pop()
count = frequency[num];
loop = loop+1;
while count>0:
loop = loop+1;
sortedNumArray.append(num);
count=count-1;
print("loop count");
print(loop);
return sortedNumArray;
a=[1, 2, 3, 4, 3, 3, 3, 6, 7, 1, 1, 9, 3, 2]
print(a);
print("sorted array based on frequency of the number");
print(sortArrayBasedOnFrequency(a));

Fast dictionary population with list of keys

d = {} # or d = defaultdict(int)
list_of_lists = [[9, 7, 5, 3, 1], [2, 1, 3, 2, 5, 3, 7], [3, 5, 8, 1]]
for lst in list_of_lists:
for key in lst:
try:
d[key] += 1
except:
d[key] = 1
Is there a way to perform this operation without the for-loops?
Using a collections.Counter() object and a generator expression:
from collections import Counter
d = Counter(i for nested in list_of_lists for i in nested)
or replacing the generator expression with itertools.chain.from_iterable():
from itertools import chain
d = Counter(chain.from_iterable(list_of_lists))
Demo:
>>> from collections import Counter
>>> from itertools import chain
>>> list_of_lists = [[9, 7, 5, 3, 1], [2, 1, 3, 2, 5, 3, 7], [3, 5, 8, 1]]
>>> Counter(i for nested in list_of_lists for i in nested)
Counter({3: 4, 1: 3, 5: 3, 2: 2, 7: 2, 8: 1, 9: 1})
>>> Counter(chain.from_iterable(list_of_lists))
Counter({3: 4, 1: 3, 5: 3, 2: 2, 7: 2, 8: 1, 9: 1})
My understanding is that you want to count the frequency of each integer in your list of lists.
You can do this with numpy.bincount. The actual counting is very fast, as the core of numpy is C++. Some work needs to be done to get the data in the dictionary format -- you could potentially just use the numpy.array generated by this. The majority of this code is just converting from different formats, which you could do away with if your application allows.
list_of_lists = [[9, 7, 5, 3, 1], [2, 1, 3, 2, 5, 3, 7], [3, 5, 8, 1]]
import numpy as np
x = sum(list_of_lists, []) #convert your list of lists to a flat list
y = np.bincount(x) #count frequency of each element
#convert to dict
d = {}
ctr = 0
while ctr < len(y):
d[ctr] = y[ctr]
ctr += 1
If you are allergic to Counter (the right answer BTW), you can use setdefault:
d={}
for key in (e for sl in list_of_lists for e in sl):
d[key] = d.setdefault(key,0) + 1

Reorder Python List

I have a list of 4 items like this:
a, b, c, d = [1, 2, 3, 4]
I'm reordering the list, flipping each pair:
[b, a, d, c]
Is there a way to do this in one expression? I've tried using list comprehension and unpacking, but can't seem to get it right.
I have [1, 2, 3, 4]. I'm trying to get [2, 1, 4, 3].
More generically, if you're looking to flip pairs of numbers in a list:
>>> L = [1, 2, 3, 4, 5, 6]
>>> from itertools import chain
>>> list(chain.from_iterable(zip(L[1::2], L[::2])))
[2, 1, 4, 3, 6, 5]
Look at this:
>>> lst = [1, 2, 3, 4]
>>> [y for x in zip(*[iter(lst)]*2) for y in x[::-1]]
[2, 1, 4, 3]
>>>
>>> lst = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
>>> [y for x in zip(*[iter(lst)]*2) for y in x[::-1]]
[2, 1, 4, 3, 6, 5, 8, 7, 10, 9]
>>>
If this is only about 4 member lists - this would suffice:
list = [1, 2, 3, 4]
reordered_list = [list[1], list[0], list[3],list[2]]
Because absolutely nobody has given an answer that works on generic iterables,
from itertools import chain
items = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
zip(*[iter(items)]*2)
#>>> <zip object at 0x7fd673afd050>
[itms for itms in zip(*[iter(items)]*2)]
#>>> [(1, 2), (3, 4), (5, 6), (7, 8), (9, 10)]
So zip(*[iter(x)]*2) means ix = iter(x); zip(ix, ix) which pairs each element.
Then you can reverse:
[(y, x) for (x, y) in zip(*[iter(items)]*2)]
#>>> [(2, 1), (4, 3), (6, 5), (8, 7), (10, 9)]
Putting it all together and flattening:
[itm for (x, y) in zip(*[iter(items)]*2) for itm in (y, x)]
#>>> [2, 1, 4, 3, 6, 5, 8, 7, 10, 9]
It's generic and short!
If you want something faster at the expense of genericism, you'll be hard pressed to better this:
new = list(items)
new[::2], new[1::2] = new[1::2], new[::2]
new
#>>> [2, 1, 4, 3, 6, 5, 8, 7, 10, 9]
Note that this still works on arbitrary iterables, but there are fewer layers of abstraction; you can't bump up the size of the flipped sub-lists as easily and can't output iterables, etc.
Do you mean this:
>>> a, b, c, d = [1, 2, 3, 4]
>>> b, a, d, c = a, b, c, d
>>> a
2
>>> b
1
>>> c
4
>>> d
3
?
Try this list comprenhension solution:
a = [1,2,3,4,5,6] # Any list with even number of elements
b = [a[e+1] if (e%2 == 0) else a[e-1] for e in range(len(a))]
This just works if the list a have an even number of elements.
In [1]: l = [1, 2, 3, 4]
In [2]: list(chain(*map(reversed, zip(l[::2], l[1::2]))))
Out[2]: [2, 1, 4, 3]
Am I missing something? Reorder given_list with a loop:
rez = []
for i in range(len(given_list)-1, -1, -1):
rez.append(given_list[i])
return rez

How to sum 3 same sized sorted lists based on the identical elements of the first one in Python?

I have a python dictionary containing 3 lists in the keys 'time', 'power' and 'usage'.
All the lists have the same number of elements and all the lists are sorted. What
I want to do is to sum up all the elements for lists 'power' and 'usage' that their indexes
correspond to the same value in list 'time', so as to have only one sample of power and usage per time unit.
For example transform this dictionary:
{'time': [1, 2, 2, 3, 4, 4, 5],
'power': [2, 2, 3, 6, 3, 3, 2],
'usage': [0, 1, 1, 2, 1, 4, 7]}
to this one:
{'time': [1, 2, 3, 4, 5],
'power': [2, 5, 6, 6, 2],
'usage': [0, 2, 2, 5, 7]}
Already I have written this piece of code that works but I don't like
it so much:
d = {'time':[1,2,2,3,4,4,5], 'power':[0,1,1,2,1,4,7], 'usage':[2,2,3,6,3,3,2]}
prev = -1
new_d = {'time':[], 'power': [], 'usage':[]}
indexes = range( len(d['time']) )
for i in indexes:
if d['time'][i]!=prev:
new_d['time'].append(d['time'][i])
new_d['power'].append(d['power'][i])
new_d['usage'].append(d['usage'][i])
else:
last_power = len( new_d['power'] ) - 1
last_usage = len( new_d['usage'] ) - 1
new_d['power'][last_power]+=d['power'][i]
new_d['usage'][last_usage]+=d['usage'][i]
prev=d['time'][i]
print d
print new_d
Is there a pythonian way to do this more simply and comprehensive?
A robust solution that can handle any number of extra fields - sorted by the 'time' field (as a method):
def aggregate(old_d, sort_key='time'):
new_d = dict((k, []) for k in old_d)
prev = None
curr = None
for i in range(len(old_d[sort_key])):
curr = old_d[sort_key][i]
for key, lst in new_d.iteritems(): # .items() in Python 3+
if prev == curr:
if key != sort_key:
lst[-1] += old_d[key][i]
else:
lst.append(old_d[key][i])
prev = curr
return new_d
Using your dictionary:
d = {'time': [1, 2, 2, 3, 4, 4, 5],
'power': [2, 2, 3, 6, 3, 3, 2],
'usage': [0, 1, 1, 2, 1, 4, 7]}
print aggregate(d)
>>>
{'usage': [0, 2, 2, 5, 7], 'power': [2, 5, 6, 6, 2], 'time': [1, 2, 3, 4, 5]}
Here's one that will handle arbitrary dictionaries.... (where d is your dict...)
from itertools import groupby, imap
from operator import itemgetter
def group_dict_by(mapping, field, agg=sum):
grouper = mapping[field]
new_grouper = []
accum = {k: [] for k in mapping.viewkeys() - [field]}
for key, grp in groupby(enumerate(grouper), itemgetter(1)):
new_grouper.append(key)
idx = [g[0] for g in grp]
for dk, dv in accum.iteritems():
dv.append(agg(imap(mapping[dk].__getitem__, idx)))
accum[field] = new_grouper
return accum
print group_dict_by(d, 'time')
# {'usage': [0, 2, 2, 5, 7], 'power': [2, 5, 6, 6, 2], 'time': [1, 2, 3, 4, 5]}
Using itertools.groupby, zip and some list comprehensions:
In [55]: dic={'time': [1, 2, 2, 3, 4, 4, 5],
....: 'power': [2, 2, 3, 6, 3, 3, 2],
....: 'usage': [0, 1, 1, 2, 1, 4, 7]}
In [56]: from itertools import groupby
In [57]: from operator import itemgetter
In [58]: zip1=zip(dic['time'],dic['power']) #use `itertools.izip` for performance
In [59]: [sum(x[1] for x in v) for k,v in groupby(zip1,key=itemgetter(0))]
Out[59]: [2, 5, 6, 6, 2]
In [60]: zip2=zip(dic['time'],dic['usage'])
In [61]: [sum(x[1] for x in v) for k,v in groupby(zip2,key=itemgetter(0))]
Out[61]: [0, 2, 2, 5, 7]
In [64]: timee=[k for k,v in groupby(dic['time'])]
In [65]: timee
Out[65]: [1, 2, 3, 4, 5]
zip1 is [(1, 2), (2, 2), (2, 3), (3, 6), (4, 3), (4, 3), (5, 2)], now you can group elements based on the first item using itertools.groupby and then take the sum of the second element of each tuple in the returned group.
In [75]: new_time=[k for k,v in groupby(dic['time'])]
In [76]: new_power=[sum(x[1] for x in v) for k,v in groupby(zip1,key=itemgetter(0))]
In [77]: new_usage=[sum(x[1] for x in v) for k,v in groupby(zip2,key=itemgetter(0))]
In [80]: dict(zip(('time','power','usage'),(new_time,new_power,new_usage)))
Out[80]: {'power': [2, 5, 6, 6, 2], 'time': [1, 2, 3, 4, 5], 'usage': [0, 2, 2, 5, 7]}
>>> from itertools import groupby
>>> from operator import itemgetter
>>> d = {'usage': [0, 1, 1, 2, 1, 4, 7], 'power': [2, 2, 3, 6, 3, 3, 2], 'time': [1, 2, 2, 3, 4, 4, 5]}
>>> groups = groupby(zip(d['time'], d['power'], d['usage']), key=itemgetter(0))
>>> lists = zip(*[[k] + map(sum, zip(*g)[1:]) for k, g in groups])
>>> dict(zip(('time', 'power', 'usage'), lists))
{'usage': (0, 2, 2, 5, 7), 'power': (2, 5, 6, 6, 2), 'time': (1, 2, 3, 4, 5)}
For variable number of keys, I've added the keys variable to avoid having to rewrite them:
>>> from itertools import groupby
>>> from operator import itemgetter
>>> keys = ('time', 'power', 'usage')
>>> groups = groupby(zip(*[d[k] for k in keys]), key=itemgetter(0))
>>> lists = zip(*[[k] + map(sum, zip(*g)[1:]) for k, g in groups])
>>> dict(zip(keys, lists))
{'usage': (0, 2, 2, 5, 7), 'power': (2, 5, 6, 6, 2), 'time': (1, 2, 3, 4, 5)}
I would first group the values in a new dict and then sum then. Takes a bit more space but it's easy and fast:
from collections import defaultdict
from itertools import groupby
power = defaultdict(list)
usage = defaultdict(list)
for i, time in enumerate(data['time']):
power[time].append(data['power'][i])
usage[time].append(data['usage'][i])
times = [key for key,group in groupby(data['time'])]
print { 'time': times,
'power' : [sum(power[time]) for time in times],
'usage' : [sum(usage[time]) for time in times]
}
You can use the following method for an arbitrary number of extra fields:
from itertools import groupby
from operator import itemgetter
dic = {'time': [1, 2, 2, 3, 4, 4, 5],
'power': [2, 2, 3, 6, 3, 3, 2],
'usage': [0, 1, 1, 2, 1, 4, 7]}
aggrigated = {}
fields = dic.items()
for field in fields:
aggrigated[field[0]] = [sum(y[1] for y in x)
for k,x in groupby(
zip(fields[0][1], field[1]),
key=itemgetter(0))
]
Improved version borrowing from Ashwini Chaudhary's answer.
from itertools import izip
def m_(time, power, usage):
time_, power_, usage_ = [], [], []
for t, p, u in izip(time, power, usage):
if not time_:
time_.append( t )
power_.append( 0 )
usage_.append( 0 )
if time_[-1] == t:
power_[-1] += p
usage_[-1] += u
else:
time_.append( t )
power_.append( p )
usage_.append( u )
time[:], power[:], usage[:] = time_, power_, usage_
if __name__ == '__main__':
d = {'time':[1,2,2,3,4,4,5], 'power':[0,1,1,2,1,4,7], 'usage':[2,2,3,6,3,3,2]}
m_(**d)
print d
This is the "pythonian way" :) :
d = {'time': [1, 2, 2, 3, 4, 4, 5],
'power': [2, 2, 3, 6, 3, 3, 2],
'usage': [0, 1, 1, 2, 1, 4, 7]}
new_d = {'time' : [], 'power' : [], 'usage' : []}
for time in set(d['time']):
new_d['time'].append(time)
new_d['power'].append(sum(value for index, value in enumerate(d['power']) if d['time'][index] == time))
new_d['usage'].append(sum(value for index, value in enumerate(d['usage']) if d['time'][index] == time))
print new_d
Below is the precise solution to my problem. I made it based on the answer of jamylak
which I think is the most "pythonian" and comprehensive solution of all given. What I
have done is to adapt his code so as to work with multiple fields, that is multiple
lists in a dictionary. I have accepted the answer of jamylak and here is the solution
for multiple fields:
from itertools import groupby
from operator import itemgetter
d = {'power': [2, 2, 3, 6, 3, 3, 2],
'usage': [0, 1, 1, 2, 1, 4, 7],
'time': [1, 2, 2, 3, 4, 4, 5]}
# construct a list with all the key names (starting from 'time')
keys = ['time'] + [key for key in d.keys() if key!='time']
# construct a list with all the keys' lists (starting from the one of 'time')
keys_lists = [ d['time'] ] + [d[key] for key in d.keys() if key!='time']
groups = groupby(zip(*keys_lists), key=itemgetter(0))
lists = zip(*[[k] + map(sum, zip(*g)[1:]) for k, g in groups])
new_d = dict(zip((keys), lists))
print new_d

Categories