sorting dates in the form mm/dd/yy - python

I'm extremely new to Python and I was wondering how I would be able to sort a list of tuples where the second element of each tuple is another tuple, like this:
[ ('Adam', (12,16,1949) ), ('Charlie', (9,4,1988) ), ('Daniel', (11,29,1990) ),
('Ellie', (11, 28, 1924) ), ('Feenie', (2,10,1954) ), ('Harry', (8,15,1924) ),
('Iggy', (12, 29, 1924) ), ('Jack', (2,21,1920) )]
I want to sort the list by using the sorted function to organize from youngest to oldest, but when I tried my function:
def sort_ages(a:list):
return sorted(a, key=lambda x:x[1][2], reverse=True)
[('Daniel', (11, 29, 1990)), ('Charlie', (9, 4, 1988)), ('Feenie', (2, 10, 1954)), ('Adam', (12, 16, 1949)), ('Ellie', (11, 28, 1924)), ('Harry', (8, 15, 1924)), ('Iggy', (12, 29, 1924)), ('Jack', (2, 21, 1920))]
It organizes it by ascending year, but doesn't seem to care about the month or day.
Most of the questions I found here had their dates in the form YYYY-MM-DD.

sorted(xs, reverse=True, key=lambda (name, (m,d,y)): (y,m,d))
Converting to datetime.date isn't necessary, but it is perhaps a better expression of the meaning.
sorted(xs, reverse=True, key=lambda (name, (m,d,y)): datetime.date(y,m,d))

Make the key function to return a tuple that contains year, month, day in order.
def by_day(t):
m, d, y = t[1]
return y, m, d
def sort_ages(a:list):
return sorted(a, key=by_day, reverse=True)
example output for the given list:
[('Daniel', (11, 29, 1990)),
('Charlie', (9, 4, 1988)),
('Feenie', (2, 10, 1954)),
('Adam', (12, 16, 1949)),
('Iggy', (12, 29, 1924)),
('Ellie', (11, 28, 1924)),
('Harry', (8, 15, 1924)),
('Jack', (2, 21, 1920))]

def sort_ages(a:list):
return sorted(a, key=lambda x:(x[1][2], x[1][0], x[1][1]), reverse=True)
Sorts according to year, then month, then date

Related

Group items in a list and calculate sums

I have a list with weekly figures and need to obtain the grouped totals by month.
The following code does the job, but there should be a more pythonic way of doing it with using the standard libraries.
The drawback of the code below is that the list needs to be in sorted order.
#Test data (not sorted)
sum_weekly=[('2020/01/05', 59), ('2020/01/19', 88), ('2020/01/26', 95), ('2020/02/02', 89),
('2020/02/09', 113), ('2020/02/16', 90), ('2020/02/23', 68), ('2020/03/01', 74), ('2020/03/08', 85),
('2020/04/19', 6), ('2020/04/26', 5), ('2020/05/03', 14),
('2020/05/10', 5), ('2020/05/17', 20), ('2020/05/24', 28),('2020/03/15', 56), ('2020/03/29', 5), ('2020/04/12', 2),]
month = sum_weekly[0][0].split('/')[1]
count=0
out=[]
for item in sum_weekly:
m_sel = item[0].split('/')[1]
if m_sel!=month:
out.append((month, count))
count=item[1]
else:
count+=item[1]
month = m_sel
out.append((month, count))
# monthly sums output as ('01', 242), ('02', 360), ('03', 220), ('04', 13), ('05', 67)
print (out)
You could use defaultdict to store the result instead of a list. The keys of the dictionary would be the months and you can simply add the values with the same month (key).
Possible implementation:
# Test Data
from collections import defaultdict
sum_weekly = [('2020/01/05', 59), ('2020/01/19', 88), ('2020/01/26', 95), ('2020/02/02', 89),
('2020/02/09', 113), ('2020/02/16', 90), ('2020/02/23', 68), ('2020/03/01', 74), ('2020/03/08', 85),
('2020/03/15', 56), ('2020/03/29', 5), ('2020/04/12', 2), ('2020/04/19', 6), ('2020/04/26', 5),
('2020/05/03', 14),
('2020/05/10', 5), ('2020/05/17', 20), ('2020/05/24', 28)]
results = defaultdict(int)
for date, count in sum_weekly: # used unpacking to make it clearer
month = date.split('/')[1]
# because we use a defaultdict if the key does not exist it
# the entry for the key will be created and initialize at zero
results[month] += count
print(results)
You can use itertools.groupby (it is part of standard library) - it does pretty much what you did under the hood (grouping together sequences of elements for which the key function gives same output). It can look like the following:
import itertools
def select_month(item):
return item[0].split('/')[1]
def get_value(item):
return item[1]
result = [(month, sum(map(get_value, group)))
for month, group in itertools.groupby(sorted(sum_weekly), select_month)]
print(result)
Terse, but maybe not that pythonic:
import calendar, functools, collections
{calendar.month_name[i]: val for i, val in functools.reduce(lambda a, b: a + b, [collections.Counter({datetime.datetime.strptime(time, '%Y/%m/%d').month: val}) for time, val in sum_weekly]).items()}
a method using pyspark
from pyspark import SparkContext
sc = SparkContext()
l = sc.parallelize(sum_weekly)
r = l.map(lambda x: (x[0].split("/")[1], x[1])).reduceByKey(lambda p, q: (p + q)).collect()
print(r) #[('04', 13), ('02', 360), ('01', 242), ('03', 220), ('05', 67)]
You can accomplish this with a Pandas dataframe. First, you isolate the month, and then use groupby.sum().
import pandas as pd
sum_weekly=[('2020/01/05', 59), ('2020/01/19', 88), ('2020/01/26', 95), ('2020/02/02', 89), ('2020/02/09', 113), ('2020/02/16', 90), ('2020/02/23', 68), ('2020/03/01', 74), ('2020/03/08', 85), ('2020/04/19', 6), ('2020/04/26', 5), ('2020/05/03', 14), ('2020/05/10', 5), ('2020/05/17', 20), ('2020/05/24', 28),('2020/03/15', 56), ('2020/03/29', 5), ('2020/04/12', 2)]
df= pd.DataFrame(sum_weekly)
df.columns=['Date','Sum']
df['Month'] = df['Date'].str.split('/').str[1]
print(df.groupby('Month').sum())

Python: unique value in list array

Image this output from fuzzywuzzy (values could be in another sequence):
[('car', 100, 28),
('tree', 80, 5),
('house', 44, 12),
('house', 44, 25),
('house', 44, 27)]
i want to treat the three houses as the same.
What is an efficient way to have only unique string values to come to this result:
(EDIT: since all houses has the same value 44, I don´t care which of them is in the list. The last house value is irrelevant)
[('car', 100, 28),
('tree', 80, 5),
('house', 44, 12)]
I saw a lot of questions here about uniqueness in lists, but the answers are not working for my example, mostly because author needs a solution just for one list.
I tried this:
unique = []
for element in domain1:
if element[0] not in unique:
unique.append(element)
I thought I cound address the first values with element[0] and check if they exists in unique.
If I print unique I have the same result as after fuzzywuzzy. Seems I am not on the right path with my idea, so how can I achieve my desired result?
Thanks!
you can use dict for it for example:
data = [('car', 100, 28),
('tree', 80, 5),
('house', 44, 12),
('house', 44, 25),
('house', 44, 27)
]
list({x[0]: x for x in reversed(data)}.values())
give you
[('house', 44, 12), ('tree', 80, 5), ('car', 100, 28)]
using the dict give you unique by first element, and the reversed need to put right value to the result ( by default it will be last met).
Could use dict.setdefault here to store the first item found(using first item in tuple as key):
lst = [
("car", 100, 28),
("tree", 80, 5),
("house", 44, 12),
("house", 44, 25),
("house", 44, 27),
]
d = {}
for x, y, z in lst:
d.setdefault(x, (x, y, z))
print(list(d.values()))
Or using indexing instead of tuple unpacking:
d = {}
for item in lst:
d.setdefault(item[0], item)
Output:
[('car', 100, 28), ('tree', 80, 5), ('house', 44, 12)]

How to filter dictionary by value? [duplicate]

This question already has answers here:
How to filter a dictionary according to an arbitrary condition function?
(7 answers)
Closed 4 years ago.
I have dictionary in format "site_mame": (side_id, frequency):
d=[{'fpdownload2.macromedia.com': (1, 88),
'laposte.net': (2, 23),
'www.laposte.net': (3, 119),
'www.google.com': (4, 5441),
'match.rtbidder.net': (5, 84),
'x2.vindicosuite.com': (6, 37),
'rp.gwallet.com': (7, 88)}]
Is there a smart way to filter dictionary d by value so that I have only those positions, where frequency is less than 100? For example:
d=[{'fpdownload2.macromedia.com': (1, 88),
'laposte.net': (2, 23),
'match.rtbidder.net': (5, 84),
'x2.vindicosuite.com': (6, 37),
'rp.gwallet.com': (7, 88)}]
I don't want to use loops, just looking for smart and efficient solution...
You can use a dictionary comprehension with unpacking for a more Pythonic result:
d=[{'fpdownload2.macromedia.com': (1, 88),
'laposte.net': (2, 23),
'www.laposte.net': (3, 119),
'www.google.com': (4, 5441),
'match.rtbidder.net': (5, 84),
'x2.vindicosuite.com': (6, 37),
'rp.gwallet.com': (7, 88)}]
new_data = [{a:(b, c) for a, (b, c) in d[0].items() if c < 100}]
Output:
[{'laposte.net': (2, 23), 'fpdownload2.macromedia.com': (1, 88), 'match.rtbidder.net': (5, 84), 'x2.vindicosuite.com': (6, 37), 'rp.gwallet.com': (7, 88)}]
You can use a dictionary comprehension to do the filtering:
d = {
'fpdownload2.macromedia.com': (1, 88),
'laposte.net': (2, 23),
'www.laposte.net': (3, 119),
'www.google.com': (4, 5441),
'match.rtbidder.net': (5, 84),
'x2.vindicosuite.com': (6, 37),
'rp.gwallet.com': (7, 88),
}
d_filtered = {
k: v
for k, v in d.items()
if v[1] < 100
}
What you want is a dictionary comprehension. I'll show it with a different example:
d = {'spam': 120, 'eggs': 20, 'ham': 37, 'cheese': 101}
d = {key: value for key, value in d.items() if value >= 100}
If you don't already understand list comprehensions, this probably looks like magic that you won't be able to maintain and debug, so I'll show you how to break it out into an explicit loop statement that you should be able to understand easily:
new_d = {}
for key, value in d.items():
if value >= 100:
new_d[key] = value
If you can't figure out how to turn that back into the comprehension, just use the statement version until you learn a bit more; it's a bit more verbose, but better to have code you can think through in your head.
Your problem is slightly more complicated, because the values aren't just a number but a tuple of two numbers (so you want to filter on value[1], not value). And because you have a list of one dict rather than just a dict (so you may need to do this for each dict in the list). And of course my filter test isn't the same as yours. But hopefully you can figure it out from here.

Comparing items of a tuple within a list of tuples

I have designed a program to calculate some maximum tuples from a list, however I am stuck at the point where I have to compare the maximum item[1] from a tuple and compare it to another tuple item[1].
This is the list of tuples:
fff = []
anslist = [(2, [1]), (3, [7]), (4, [2]), (5, [5]), (6, [8]), (7, [16]), (8, [3]), (9, [19]), (10, [6]), (11, [14]), (12, [9]), (13, [9]), (14, [17]), (15, [17]), (16, [4]), (17, [12]), (18, [20]), (19, [20]), (20, [7])]
This is the code I have:
print(max(anslist, key=lambda x: x[1]))
fff.append(max(anslist, key=lambda x: x[1]))
anslist.remove(max(anslist, key=lambda x: x[1]))
while (max(fff[1], key=lambda x: x[1])) == (max(anslist[1], key=lambda x: x[1])):
print(max(anslist, key=lambda x: x[1]))
anslist.remove(max(anslist, key=lambda x: x[1]))
I expect my program to print (18, [20]) and (19, [20]), and because the list of tuples always changes in response to the users input I needed some kind of loop which needed to go through all tuples until the second item of the tuple is not the largest anymore.
This is the error I get :
while (max(fff[1], key=lambda x: x[1])) == (max(anslist[1], key=lambda x: x[1])):
IndexError: list index out of range
The problem is with your subscript [1] which is in the wrong position.
Instead of:
while (max(fff[1], key=lambda x: x[1])) == (max(anslist[1], key=lambda x: x[1])):
This is what you want:
while max(fff, key=lambda x: x[1])[1] == max(anslist, key=lambda x: x[1])[1]:
That prints (19, [20]) as you expect.
(I also removed all unneeded parens from your expression.)
A much more efficient way to do this is to find the max only once and filter the list based on that:
max_element = max(x[1] for x in anslist)
max_tuples = [x for x in anslist if x[1] == max_element]
Also interesting is sorting anslist to put (18, [20]) and (19, [20]) in front, after which you can use a simple for loop and break when the second element of the tuple changes.
sorted_anslist = sorted(anslist, key=lambda x: x[1], reverse=True)

Python sum values in list of tuples up to certain values

NOTE: I edited the question!
I am having trouble with iteration in Python, especially when I would like to sum up values up to a certain number. Here's more information on the problem I'm facing:
I have a list of tuples that looks like this:
[(1, 0.5, 'min'),
(2, 3, 'NA'),
(3, 6, 'NA'),
(4, 40, 'NA'),
(5, 90, 'NA'),
(6, 130.8, 'max'),
(7, 129, 'NA'),
(8, 111, 'NA'),
(9, 8, 'NA'),
(10, 9, 'NA'),
(11, 0.01, 'min'),
(12, 9, 'NA'),
(13, 40, 'NA'),
(14, 90, 'NA'),
(15, 130.1, 'max'),
(16, 112, 'NA'),
(17, 108, 'NA'),
(18, 90, 'NA'),
(19, 77, 'NA'),
(20, 68, 'NA'),
(21, 0.9, 'min'),
(22, 8, 'NA'),
(23, 40, 'NA'),
(24, 90, 'NA'),
(25, 92, 'NA'),
(26, 130.4, 'max')]
I want to sum each value leading up to "max" and each value leading up to "min" and append these results to two separate lists.
For instance, the output should be:
min_sums = [1+2+3+4+5,11+12+13+14, 21+22+23+24+15]
max_sums = [6+7+8+9+10, 15+16+17+18+19+20, 26]
I would also like to keep track of the values I am actually summing up and have this as an output as well:
min_sums_lst = [[1,2,3,4,5], [11,12,13,14],[21,22,23,24,15]]
max_sums_lst = [[6,7,8,9,10], [15,16,17,18,19,20], [26]]
I'm thinking I can use the index value, but am pretty new to Python and am not exactly sure how to proceed. I'm studying biology, but I believe learning CS could help with my work.
max_list = []
min_list = []
flag = ''
min_index = 0
max_index = float('inf');
if flag == 'h':
    max_list.append(item)
elif flag == 'c':
    min_list.append(item)
for i, item in enumerate(minmax_list):
    print(i, item)
    print("max_index: ", max_index)
    print("min_index: ", min_index)
    if item[2] == 'min':
         min_index = i
         max_list('h', item[0])
    elif item[2] == 'NA' and (i < max_index):
        max_list('h', item[0])
    elif item[2] == 'max':
         max_index = i
         max_list('c', item[0])
    elif item[2] == 'NA' and (i > min_index):
        min_list('c', item[0])
I'm quite new to Python - any help would be appreciated. I am only trying to add the first item in each tuple based on min and max as indicated in the output above.
My answer takes a slightly different approach to #Stefan's. It does a bit more validation, and you could pretty easily add other kinds besides 'min' and 'max'.
def partition_items(items):
lists = {
'min': [],
'max': [],
}
current_kind = None
current_list = None
for value, _, kind in items:
if kind != current_kind and kind != 'NA':
current_kind = kind
# You'll get a error here if current_kind isn't one of 'min'
# or 'max'.
current_list = lists[current_kind]
current_list.append(0)
# You'll get an error here if the first item in the list doesn't
# have type of 'min' or 'max'.
current_list[-1] += value
return lists
lists = partition_items(items)
print(lists['min'])
# -> [15, 50, 115]
print(lists['max'])
# -> [40, 105, 26]
Sorry, didn't bother reading your attempt, looks very complicated.
min_sums = []
max_sums = []
for x, _, what in minmax_list:
if what != 'NA':
current = min_sums if what == 'min' else max_sums
current.append(0)
current[-1] += x

Categories