How to count multiple items in a list - python

I have the following list:
data = [[2004,1,1,1,50], [2008,2,28,1,150],[1984,5,1,3,20],[1982,5,1,7,20], [1982,5,8,7,20]]
The data represents the Year, Month, Day, Day of week, count.
I want to obtain a dictionary of the total counts per day of the week. Something like this
results = {1:200,
2:0,
3:20,
4:0,
5:0,
6:0,
7:40,
}
I believe the best way to do this, please correct me if i'm wrong, is to use collections.Counter. I abandoned this effort for a dictionary comprehension but have been unable to solve the problem
solution = {(x,i) for x[3], i[4] in data}

Since you want to sum and not count it might be easier to use defaultdict:
from collections import defaultdict
data = [[2004,1,1,1,50], [2008,2,30,1,150],[1984,5,1,3,20],[1982,5,1,7,20], [1982,5,8,7,20]]
c = defaultdict(int)
for l in data:
c[l[3]] += l[4]
print(c)
# defaultdict(<class 'int'>, {1: 200, 3: 20, 7: 40})
If you insist on having zero entries you can instantiate it before:
from collections import defaultdict
data = [[2004,1,1,1,50], [2008,2,30,1,150],[1984,5,1,3,20],[1982,5,1,7,20], [1982,5,8,7,20]]
c = defaultdict(int)
c.update({d: 0 for d in range(1, 8)})
for l in data:
c[l[3]] += l[4]
print(c)
# defaultdict(<class 'int'>, {1: 200, 2: 0, 3: 20, 4: 0, 5: 0, 6: 0, 7: 40})
At this point you may use a normal dict over defaultdict if you are sure that the input will not have invalid days:
data = [[2004,1,1,1,50], [2008,2,30,1,150],[1984,5,1,3,20],[1982,5,1,7,20], [1982,5,8,7,20]]
c = {d: 0 for d in range(1, 8)} # or dict.fromkeys(range(1, 8), 0)
for l in data:
c[l[3]] += l[4]
print(c)
# {1: 200, 2: 0, 3: 20, 4: 0, 5: 0, 6: 0, 7: 40}

If, as in your input data, your data is sorted by day of week, i.e. all sublists for a particular day of week are adjacent to each other, you can use itertools.groupby with a dictionary comprehension:
from itertools import groupby
from operator import itemgetter
res = {k: sum(map(itemgetter(-1), v)) for k, v in groupby(data, key=itemgetter(-2))}
print(res)
# {1: 200, 3: 20, 7: 40}
If your data is not sorted, you will have to sort by day of week first:
data = sorted(data, key=itemgetter(-2))

You can tackle this problem with a simple loop instead. Create a results dict with initial values for each day set to zero, and just add to it step by step.
results = {k:0 for k in range(1,8)}
#Output: {1: 0, 2: 0, 3: 0, 4: 0, 5: 0, 6: 0, 7: 0}
data = [[2004,1,1,1,50], [2008,2,30,1,150],[1984,5,1,3,20],[1982,5,1,7,20], [1982,5,8,7,20]]
for x in data:
results[x[3]] += x[4]
print(results)
#Output:
{1: 200, 2: 0, 3: 20, 4: 0, 5: 0, 6: 0, 7: 40}

As you asked to use Counter from collections, you could use it like this:
from collections import Counter
counter=Counter()
for group in data:
counter[group[3]] +=group[4]
results=dict(counter)

Related

how to subtract one list from another including duplicates

I have 2 lists
On is a big list with some elements having duplicates
super_set_list = [1,1,2,3,3,4,4,4,5,6,7,8,9]
The other is a subset of the big list, also with duplicates
sub_set_list = [1,2,3,3,4,4,6,7,9]
I want the difference, like this
diff = [1,4,5,8]
Not sure how I would go about this
You can use a Counter
super_set_list = [1,1,1,2,3,3,4,4,4,5,6,7,8,9]
sub_set_list = [1,2,3,3,4,4,6,7,9]
from collections import Counter
super_counter = Counter(super_set_list)
super_counter = Counter({1: 3, 4: 3, 3: 2, 2: 1, 5: 1, 6: 1, 7: 1, 8: 1, 9: 1})
For every element in sub_set_list, reduce the count in in super_counter
for item in sub_set_list:
super_counter[item]-=1
Now super_counter = Counter({1: 2, 4: 1, 5: 1, 8: 1, 2: 0, 3: 0, 6: 0, 7: 0, 9: 0})
Finally, just pick elements that have some count left (but add it that many number of times).
diff=[]
for k,v in super_counter.items():
for _ in range(v):
diff.append(k)
print(diff)
# [1, 1, 4, 5, 8]
You can loop through sub-set list and remove item in super-set list one by one as follows:
super_set_list = [1,1,2,3,3,4,4,4,5,6,7,8,9]
sub_set_list = [1,2,3,3,4,4,6,7,9]
for item in sub_set_list:
if item in super_set_list:
super_set_list.remove(item)
print(super_set_list)

Creating a dictionary in a dictionary comprehension

I have a list with repeated values and I want to count them using a dictionary comprehension
Here is my initial attempt
number_list = [1,1,2,2,3,3,4,4,5,5]
number_count_dict = {i:1 for i in number_list}
{k: (number_count_dict[k]+1 if k in number_count_dict() else 1) for k in number_list}
Is there a way of achieving this without initialising the dictionary?
Take this example for your question:
numbers = [5,3,3,4,2]
and let us say if you would like to turn it into a dictionary where the key is the index and value is the element in the list. Then you could trt something like this:
{index:numbers[index] for index in range(0,len(numbers))}
Here's the result:
{0: 5, 1: 3, 2: 3, 3: 4, 4: 2}
This will count repetitions only (>1)
>>> from collections import Counter
>>> x=[1,1,2,2,3,3,4,4,5,5]
>>> {i:j for i,j in Counter(x).items() if i>1}
{2: 2, 3: 2, 4: 2, 5: 2}
Your question leaves out a couple of important points. What is the minimum and maximum numbers you want in the dictionary? Do you want numbers with 0 to be counted? Can you use the Counter class?
#1: Count items from 0 to len(number_list) including items with count of 0, using Counter.
>>> from collections import Counter
>>> number_list = [1,1,2,2,3,3,4,4,5,5]
>>> count = Counter(number_list)
>>> number_count_dict = {i:(count[i] if i in number_list else 0) for i in range(len(number_list))}
{0: 0, 1: 2, 2: 2, 3: 2, 4: 2, 5: 2, 6: 0, 7: 0, 8: 0, 9: 0}
#2: Count items from lowest number to highest number in list including items with count of 0, using Counter.
>>> from collections import Counter
>>> number_list = [2,2,3,3,4,4,5,5,7,7]
>>> count = Counter(number_list)
>>> number_count_dict = {i:(count[i] if i in number_list else 0) for i in range(min(number_list),max(number_list)+1)}
{2: 2, 3: 2, 4: 2, 5: 2, 6: 0, 7: 2}
#3: Count items in list using Counter.
>>> from collections import Counter
>>> number_list = [1,1,2,2,3,3,4,4,5,5]
>>> count = Counter(number_list)
>>> number_count_dict = {i:count[i] for i in set(number_list)}
{1: 2, 2: 2, 3: 2, 4: 2, 5: 2}
#4: Count items from 0 to len(number_list) including items with count of 0, NOT using Counter.
>>> number_list = [1,1,2,2,3,3,4,4,5,5]
>>> number_count_dict = {i:number_list.count(i) for i in range(len(number_list))}
{0: 0, 1: 2, 2: 2, 3: 2, 4: 2, 5: 2, 6: 0, 7: 0, 8: 0, 9: 0}
#5: Count items from lowest number to highest number in list including items with count of 0, NOT using Counter.
>>> number_list = [2,2,3,3,4,4,5,5,7,7]
>>> number_count_dict = {i:number_list.count(i) for i in range(min(number_list),max(number_list)+1)}
{2: 2, 3: 2, 4: 2, 5: 2, 6: 0, 7: 2}
#6: Count items in list NOT using Counter.
>>> number_list = [1,1,2,2,3,3,4,4,5,5]
>>> number_count_dict = {i:number_list.count(i) for i in set(number_list)}
{1: 2, 2: 2, 3: 2, 4: 2, 5: 2}
#7: Bonus, using a defaultdict and for loop.
from collections import defaultdict
number_list = [1,1,2,2,3,3,4,4,5,5]
number_count_dict = defaultdict(int)
for i in number_list:
number_count_list[i] += 1

Multiply counter value with corresponding counter key function in a for loop

say I got following output containing several counter objects from several lists:
for b in data:
m = np.sum(b.values())
individuals = [i for i in b.values() if i != 0]
counter = collections.Counter(individuals)
Output:
Counter({1: 10, 2: 2})
Counter({1: 19, 4: 1, 5: 1})
Counter({1: 14, 2: 4, 3: 4, 4: 2})
Counter({1: 12, 3: 5, 2: 3, 4: 2, 6: 1, 9: 1})
Counter({1: 14, 3: 4, 4: 4, 2: 3, 5: 1, 8: 1, 10: 1})
Now what I am trying to do is to raise float(i)/m to the second power for each key in the counter object,
but additionally, I need to select the corresponding counter.value() and multiply it with float(i)/m. How can I integrate a
second for loop with counter.values()?
f = np.sum( float(i)/m**2 for i in counter.keys() )

Writing a nested dictionary into csv python

I am trying to write a nested dictionary into python. I have a dictionary like below:
{'09-04-2018' : {1: 11, 2: 5, 3: 1, 4: 1, 5: 0} , '10-04-2018' : {1: 5, 2: 1, 3: 1, 4: 1, 5: 0}}
and i wanted to write it something like:
count,09-04-2018,10-04-2018
1,11,5
2,5,1
3,1,1
4,1,1
5,0,0
The following produces the requested output:
data = {'09-04-2018' : {1: 11, 2: 5, 3: 1, 4: 1, 5: 0} , '10-04-2018' : {1: 5, 2: 1, 3: 1, 4: 1, 5: 0}}
rows = []
keys = sorted(data)
header = ['count'] + keys
counts = sorted(set(k for v in data.values() for k in v))
for count in counts:
l = [count]
for key in keys:
l.append(data[key].get(count))
rows.append(l)
print header
print rows
import csv
with open('output.csv', 'w') as csvfile:
writer = csv.writer(csvfile)
writer.writerow(header)
writer.writerows(rows)
This builds up the rows before writing them it is possible to write them directly rather than appending them to the list and writing the contents of the list.
produces this output:
count,09-04-2018,10-04-2018
1,11,5
2,5,1
3,1,1
4,1,1
5,0,0
If you are open to using a 3rd party library, you can use pandas:
import pandas as pd
d = {'09-04-2018' : {1: 11, 2: 5, 3: 1, 4: 1, 5: 0},
'10-04-2018' : {1: 5, 2: 1, 3: 1, 4: 1, 5: 0}}
# create dataframe from dictionary
df = pd.DataFrame.from_dict(d).reset_index().rename(columns={'index': 'count'})
# write dataframe to csv file
df.to_csv('file.csv', index=False)
print(df)
# count 09-04-2018 10-04-2018
# 0 1 11 5
# 1 2 5 1
# 2 3 1 1
# 3 4 1 1
# 4 5 0 0
You can shorten your code by using zip:
import csv
d = {'09-04-2018' : {1: 11, 2: 5, 3: 1, 4: 1, 5: 0} , '10-04-2018' : {1: 5, 2: 1, 3: 1, 4: 1, 5: 0}}
with open('filename.csv', 'w') as f:
write = csv.writer(f)
full_rows = [i for h in [zip(*b.items()) for _, b in sorted(d.items(), key=lambda x:map(int, x[0].split('-')))] for i in h]
write.writerows([['counts']+[a for a, _ in sorted(d.items(), key=lambda x:map(int, x[0].split('-')))]]+list(zip(*[full_rows[0]]+full_rows[1::2])))
Output:
counts,09-04-2018,10-04-2018
1,11,5
2,5,1
3,1,1
4,1,1
5,0,0

Adding missing keys in dictionary in Python

I have a list of dictionaries:
L = [{0:1,1:7,2:3,4:8},{0:3,2:6},{1:2,4:6}....{0:2,3:2}].
As you can see, the dictionaries have different length. What I need is to add missing keys:values to every dictionary to make them being with the same length:
L1 = [{0:1,1:7,2:3,4:8},{0:3,1:0,2:6,3:0,4:0},{0:0, 1:2,3:0,4:6}....{0:2,1:0,2:0,3:2,4:0}],
Means to add zeros for missing values. The maximum length isn't given in advance, so one may get it only iterating through the list.
I tried to make something with defaultdicts, like L1 = defaultdict(L) but it seems I don't understand properly how does it work.
You'll have to make two passes: 1 to get the union of all keys, and another to add the missing keys:
max_key = max(max(d) for d in L)
empty = dict.fromkeys(range(max_key + 1), 0)
L1 = [dict(empty, **d) for d in L]
This uses an 'empty' dictionary as a base to quickly produce all keys; a new copy of this dictionary plus an original dictionary produces the output you want.
Note that this assumes your keys are always sequential. If they are not, you can produce the union of all existing keys instead:
empty = dict.fromkeys(set().union(*L), 0)
L1 = [dict(empty, **d) for d in L]
Demo:
>>> L = [{0: 1, 1: 7, 2: 3, 4: 8}, {0: 3, 2: 6}, {1: 2, 4: 6}, {0: 2, 3: 2}]
>>> max_key = max(max(d) for d in L)
>>> empty = dict.fromkeys(range(max_key + 1), 0)
>>> [dict(empty, **d) for d in L]
[{0: 1, 1: 7, 2: 3, 3: 0, 4: 8}, {0: 3, 1: 0, 2: 6, 3: 0, 4: 0}, {0: 0, 1: 2, 2: 0, 3: 0, 4: 6}, {0: 2, 1: 0, 2: 0, 3: 2, 4: 0}]
or the set approach:
>>> empty = dict.fromkeys(set().union(*L), 0)
>>> [dict(empty, **d) for d in L]
[{0: 1, 1: 7, 2: 3, 3: 0, 4: 8}, {0: 3, 1: 0, 2: 6, 3: 0, 4: 0}, {0: 0, 1: 2, 2: 0, 3: 0, 4: 6}, {0: 2, 1: 0, 2: 0, 3: 2, 4: 0}]
The above approach to merge two dictionaries into a new one with dict(d1, **d2) always works in Python 2. In Python 3 additional constraints have been set on what kind of keys you can use this trick with; only string keys are allowed for the second dictionary. For this example, where you have numeric keys, but you can use dictionary unpacking instead:
{**empty, **d} # Python 3 dictionary unpacking
That'll work in Python 3.5 and newer.
a bit of caution: changes L
>>> allkeys = frozenset().union(*L)
>>> for i in L:
... for j in allkeys:
... if j not in i:
... i[j]=0
>>> L
[{0: 1, 1: 7, 2: 3, 3: 0, 4: 8}, {0: 3, 1: 0, 2: 6, 3: 0, 4: 0}, {0: 0, 1: 2, 2:
0, 3: 0, 4: 6}, {0: 2, 1: 0, 2: 0, 3: 2, 4: 0}]
Maybe not the most elegant solution, but should be working:
L = [{0:1,1:7,2:3,4:8},{0:3,2:6},{1:2,4:6},{0:2,3:2}]
alldicts = {}
for d in L:
alldicts.update(d)
allkeys = alldicts.keys()
for d in L:
for key in allkeys:
if key not in d:
d[key] = 0
print(L)
This is only a solution, but I think it's simple and straightforward. Note that it modifies the dictionaries in place, so if you want them to be copied, let me know and I'll revise accordingly.
keys_seen = []
for D in L: #loop through the list
for key in D.keys(): #loop through each dictionary's keys
if key not in keys_seen: #if we haven't seen this key before, then...
keys_seen.append(key) #add it to the list of keys seen
for D1 in L: #loop through the list again
for key in keys_seen: #loop through the list of keys that we've seen
if key not in D1: #if the dictionary is missing that key, then...
D1[key] = 0 #add it and set it to 0
This is quick and slim:
missing_keys = set(dict1.keys()) - set(dict2.keys())
for k in missing_keys:
dict1[k] = dict2[k]
Unless None is a valid value for a dictionary key you have herein is a great solution for you
L = [{0: 1, 1: 7, 2: 3, 4: 8}, {0: 3, 2: 6}, {1: 2, 4: 6}, {0: 2, 3: 2}]
for i0, d0 in enumerate(L[:-1]):
for d1 in L[i0:]:
_ = [d0.__setitem__(k,d1[k]) for k in d1 if d0.get(k,None) is None]
_ = [d1.__setitem__(k,d0[k]) for k in d0 if d1.get(k,None) is None]
print(L)
>>> [{0: 1, 1: 7, 2: 3, 3: 2, 4: 8}, {0: 3, 1: 2, 2: 6, 3: 2, 4: 6}, {0: 2, 1: 2, 2: 3, 3: 2, 4: 6}, {0: 2, 1: 7, 2: 3, 3: 2, 4: 8}]

Categories