Efficiently distribute large number of events over a 1-year time span - python

I am trying to efficiently and evenly distribute events over a year. For each year I have about 10^6 events. Each event should be assigned a date. Below I posted my approach which is quite slow. Do you see a way of speeding this up?
for year in range(start_year, end_year + 1):
for evt in range(events_this_year):
event_date = (datetime.datetime.strptime(str(year) + "-01-01", "%Y-%m-%d") + datetime.timedelta(days=365 * evt / events_this_year)).strftime("%Y-%m-%d")

You transition back and forth from datetime to str, that's the heavy part.
First of all, you can create the year datetime once in the outer loop and not everytime inside, this will already improves performance by ~3.5x (on my machine).
keeping the results as datetime values instead of strings (if that's OK for you) give a 110x(!!) performance boost:
import datetime
from timeit import timeit
start_year = 2010
end_year = 2020
events_this_year = 10 ** 5
def using_strptime():
result = []
for year in range(start_year, end_year + 1):
for evt in range(events_this_year):
event_date = (datetime.datetime.strptime(str(year) + "-01-01", "%Y-%m-%d") + datetime.timedelta(
days=365 * evt / events_this_year)).strftime("%Y-%m-%d")
result.append(event_date)
return result
def using_delta():
result = []
sec_per_event = datetime.timedelta(seconds=365 * 24 * 60 * 60 // events_this_year)
for year in range(start_year, end_year + 1):
year_dt = datetime.datetime(year=year, month=1, day=1)
cur_dt = year_dt
for evt in range(events_this_year):
cur_dt += sec_per_event
result.append(cur_dt.strftime("%Y-%m-%d"))
return result
def using_delta_nostring():
result = []
sec_per_event = datetime.timedelta(seconds=365 * 24 * 60 * 60 // events_this_year)
for year in range(start_year, end_year + 1):
year_dt = datetime.datetime(year=year, month=1, day=1)
cur_dt = year_dt
for evt in range(events_this_year):
cur_dt += sec_per_event
result.append(cur_dt) # no strftime
return result
t1 = timeit('using_strptime()', globals=globals(), number=1)
t2 = timeit('using_delta()', globals=globals(), number=1)
t3 = timeit('using_delta_nostring()', globals=globals(), number=1)
print(t1)
print(t2)
print(t3)
print("Ratios:")
print(t1 / t2)
print(t1 / t3)
Output on my machine:
22.7066284
6.213773400000001
0.20198889999999992
Ratios:
3.654241463005393
112.4152287576199

There is no need to ever create dates by strptime.
Compute the delta between elements and create a list of events based on the enumeration of your events_this_year times the position in this range:
import datetime
start_year = 2021
end_year = 2021
events_this_year = 10**6
# using this to compute the available time handles leap years
day_first = datetime.datetime(start_year,1,1,0,0,0,0)
day_last = datetime.datetime(start_year,12,31,23,59,59,999)
# delta time between events if spaced equally is
# whole available time divided by number of events
ticks_per_event = (day_last - day_first) / events_this_year
# enumerate range of events and use index (i) to multiply the delta
# and add that to the 1st day
events = [(day_first + ticks_per_event * i, f"Event: {e}")
for i,e in enumerate(range(events_this_year))]
print(events[:5], "...", events[-5:], sep="\n")
Output:
[(datetime.datetime(2021, 1, 1, 0, 0), 'Event: 0'),
(datetime.datetime(2021, 1, 1, 0, 0, 31, 535999), 'Event: 1'),
(datetime.datetime(2021, 1, 1, 0, 1, 3, 71998), 'Event: 2'),
(datetime.datetime(2021, 1, 1, 0, 1, 34, 607997), 'Event: 3'),
(datetime.datetime(2021, 1, 1, 0, 2, 6, 143996), 'Event: 4'),
(datetime.datetime(2021, 1, 1, 0, 2, 37, 679995), 'Event: 5')]
...
[(datetime.datetime(2021, 12, 31, 23, 57, 21, 320005), 'Event: 999995'),
(datetime.datetime(2021, 12, 31, 23, 57, 52, 856004), 'Event: 999996'),
(datetime.datetime(2021, 12, 31, 23, 58, 24, 392003), 'Event: 999997'),
(datetime.datetime(2021, 12, 31, 23, 58, 55, 928002), 'Event: 999998'),
(datetime.datetime(2021, 12, 31, 23, 59, 27, 464001), 'Event: 999999')]
Just collecting the dates (you can zip() them back together them with your eventlist) for
from timeit import timeit
k ="""
e = []
for year in range(start_year, end_year + 1):
for evt in range(events_this_year):
e.append( (datetime.datetime.strptime(str(year) + "-01-01", "%Y-%m-%d") + datetime.timedelta(days=365 * evt / events_this_year)).strftime("%Y-%m-%d"))
"""
print(timeit(k, number=2, globals=globals()))
k = '[day_first + ticks_per_event*i for i,e in enumerate(range(events_this_year))]'
print(timeit(k, number=2, globals=globals()))
takes
100.682846539 # yours
1.786883751 # mine

Related

Generate random start and end date between a date range

I have written some utility functions to generate a random date start and end date range between a specified date range which will return both the est and the utc times. However I'm not getting consistent results. Sometimes the result is offset by 4 hrs between UTC and EST and sometimes its offset by 5 hrs. Can anyone help whats wrong with the code?
def generate_random_dt_btn_two_dates(start_date,end_date, line_item):
time_between_dates = end_date - start_date
days_between_dates = time_between_dates.days
if line_item:
days_between_dates = round(days_between_dates/2)
if days_between_dates < 1 or days_between_dates is None:
days_between_dates = 1
random_number_of_days = random.randrange(days_between_dates)
utc_date = start_date + timedelta(days=random_number_of_days, hours = random.randrange(24), minutes = random.randrange(60),
seconds = random.randrange(60), microseconds= random.randrange(1000), milliseconds= random.randrange(1000))
est_now = utc_date.astimezone(pytz.timezone("America/New_York"))
return {"utc_date":utc_date, "est_date":est_now}
def generate_random_dt_range(start_date = datetime(2022, 1, 1, 15, 16, 17, 345, tzinfo=timezone.utc),
end_date = datetime(2022, 7, 30, 15, 16, 17, 345, tzinfo=timezone.utc),
line_item = False
):
random_start_date = generate_random_dt_btn_two_dates(start_date , end_date, line_item)
random_end_date = generate_random_dt_btn_two_dates(start_date = random_start_date["utc_date"], end_date = end_date, line_item = line_item)
return {"start_date": random_start_date, "end_date" : random_end_date}
campaign_dates = generate_random_dt_range()
c_end_date = campaign_dates.get("end_date").get("est_date").strftime('%Y-%m-%dT%H:%M:%S.%fZ')
c_start_date = campaign_dates.get("start_date").get("est_date").strftime('%Y-%m-%dT%H:%M:%S.%fZ')
c_end_date_utc = campaign_dates.get("end_date").get("utc_date").strftime('%Y-%m-%dT%H:%M:%S.%fZ')
c_start_date_utc = campaign_dates.get("start_date").get("utc_date").strftime('%Y-%m-%dT%H:%M:%S.%fZ')
print("start_date:", c_start_date,c_start_date_utc)
print("end_date:",c_end_date,c_end_date_utc )

Is there any way to combine time and date in python using pandas?

I have 2 functions which gives output as date and time
def jdtodatestd (jdate):
if len(jdate) == 5:
fmt = '%y%j'
datestd = datetime.datetime.strptime(jdate, fmt).date()
return(datestd)
elif len(jdate) == 6:
yr = jdate[0:2]
day = jdate[2:len(jdate)]
day = day.lstrip('0')
jdate = yr+day
fmt = '%y%j'
datestd = datetime.datetime.strptime(jdate, fmt).date()
return(datestd)
elif len(jdate) == 7:
fmt = '%Y%j'
datestd = datetime.datetime.strptime(jdate, fmt).date()
return(datestd)
jdtodatestd('120365')
Output: datetime.date(2012, 12, 30)
def jdtotimestd (jtime):
if len(jtime) == 5:
jtime = '0' + jtime
elif len(jtime) == 6:
jtime = jtime
else:
jtime = '000000'
stdtime = jtime[0:2] + ':' + jtime[2:4] + ':' + jtime[4:6]
return stdtime
jdtotimestd('140932')
Output: '14:09:32'
I would like to combine both such as '2012, 12, 30 14:09:32
How can I do?
Modify your function jdtotimestd:
def jdtotimestd(jtime):
"""Returns a dt.time object from string."""
jtime = jtime.zfill(6)
return datetime.time(int(jtime[:2]), int(jtime[2:4]), int(jtime[4:]))
d = jdtodatestd('120365')
t = jdtotimestd('140932')
dt = datetime.datetime.combine(d, t)
Output:
>>> dt
datetime.datetime(2012, 12, 30, 14, 9, 32)
You can use strftime and strptime:
output1 = jdtodatestd('120365')
output2 = jdtotimestd('140932')
>>> datetime.datetime.strptime(datetime.datetime.strftime(output1, "%Y-%m-%d")+output2, "%Y-%m-%d%H:%M:%S")
datetime.datetime(2012, 12, 30, 14, 9, 32)

Pulling items for presentation in python - I have a list taken from outlook, but need to break those items into individual strings

I am creating a Jarvis style screen and have pulled data from outlook for upcoming meetings that I wish to present on the screen.
The function pulls data from outlook and presents it in a list: -
event(Start=datetime.datetime(2020, 11, 30, 12, 30), Subject='meeting 1 description',
Duration=60)
event(Start=datetime.datetime(2020, 11, 30, 14, 0), Subject='meeting 2 description', Duration=60)
event(Start=datetime.datetime(2020, 12, 1, 8, 30), Subject='meeting 3 description', Duration=60)
event(Start=datetime.datetime(2020, 12, 1, 10, 15), Subject='meeting 4 description', Duration=45)
event(Start=datetime.datetime(2020, 12, 1, 11, 0), Subject='meeting 5 description ',
Duration=90)"
This is great, but what I want to do now is have this present as:
Start time = 'start time'
Subject = 'Meeting description'
Duration = 'duration of meeting'
Is there a way of slicing up the string in a list item and then pulling that into the code as I want it presented? Basically splitting the item in a list into component parts?
Here is the code that pulls the lists: -
def get_date(datestr):
try: # py3
adate = datetime.datetime.fromtimestamp(datestr.Start.timestamp())
except Exception:
adate = datetime.datetime.fromtimestamp(int(datestr.Start))
return adate
def getCalendarEntries(days=3, dateformat="%d/%m/%Y"):
Outlook = win32com.client.Dispatch("Outlook.Application")
ns = Outlook.GetNamespace("MAPI")
appointments = ns.GetDefaultFolder(9).Items
appointments.Sort("[Start]")
appointments.IncludeRecurrences = "True"
today = datetime.datetime.today()
begin = today.date().strftime(dateformat)
tomorrow = datetime.timedelta(days=days) + today
end = tomorrow.date().strftime(dateformat)
appointments = appointments.Restrict(
"[Start] >= '" + begin + "' AND [END] <= '" + end + "'")
events = []
for a in appointments:
adate = get_date(a)
events.append(event(adate, a.Subject, a.Duration))
return events
if __name__ == "__main__":
events = getCalendarEntries()"""
Thanks all,
Graeme
This maybe a bit hacky but the syntax for event in your string is the same as one would define a dictionary. So we can replace 'event' with 'dict' and call eval which basically evaluates a string as if it was Python code. so for example if you run this
import datetime
event_str = r"event(Start=datetime.datetime(2020, 11, 30, 12, 30), Subject='meeting 1 description', Duration=60)"
dict_str = event_str.replace('event','dict')
my_dict = eval(dict_str)
print(my_dict)
this will print
{'Start': datetime.datetime(2020, 11, 30, 12, 30), 'Subject': 'meeting 1 description', 'Duration': 60}
So my_dict will be a dictionary that you can pull various bits out of, such as my_dict['Start'] will give you the start (as datetime), etc
you would need to call this construct on each element of your events list, eg the following should create a list of dictionaries, one for each event
all_dicts = [eval(e.replace('event','dict')) for e in events]
of course you can save yourself all this trouble if you created dictionaries in the first place, so replace the relevant line in your loop with
events.append(dict(Start=adate, Subject=a.Subject, Duration=a.Duration))
and then use dict functionatility to get the fields via events[i]['Start'] etc
So this is what I came up with after your suggestion - worked like a treat - thank you so much :) You are a super star!
import win32com.client
import datetime
from collections import namedtuple
event = namedtuple("event", "Start Subject Duration")
def get_date(datestr):
try: # py3
adate = datetime.datetime.fromtimestamp(datestr.Start.timestamp())
except Exception:
adate = datetime.datetime.fromtimestamp(int(datestr.Start))
return adate
def getCalendarEntries(days=3, dateformat="%d/%m/%Y"):
Outlook = win32com.client.Dispatch("Outlook.Application")
ns = Outlook.GetNamespace("MAPI")
appointments = ns.GetDefaultFolder(9).Items
appointments.Sort("[Start]")
appointments.IncludeRecurrences = "True"
today = datetime.datetime.today()
begin = today.date().strftime(dateformat)
tomorrow = datetime.timedelta(days=days) + today
end = tomorrow.date().strftime(dateformat)
appointments = appointments.Restrict(
"[Start] >= '" + begin + "' AND [END] <= '" + end + "'")
events = []
for a in appointments:
adate = get_date(a)
events.append(dict(Start=adate, Subject=a.Subject, Duration=a.Duration))
return events
if __name__ == "__main__":
events = getCalendarEntries()
print ("Time:", events[1]['Start'])
print ("Subject:",events[1]['Subject'])
print ("Duration:",events[1]['Duration'])

How to get the next specific day of the month in Python?

I am using Python 2.7.
I want to get the next 25th day of the month from now on. Today is February 17th, so I should get February 25th. If we were on February 26th, I should get March 25th.
I was not able to find anything about getting the next specific day of the month, but I am pretty sure that Python has something to make it easy.
Does anyone know how?
You could use python-dateutil for that and play with the rrule:
import datetime
from dateutil.rrule import *
now = datetime.datetime.today().date()
days = rrule(MONTHLY, dtstart=now, bymonthday=25)
print (days[0]) # datetime.datetime(2016, 2, 25, 0, 0)
print (days[1]) # datetime.datetime(2016, 3, 25, 0, 0)
import datetime
def get_next_date_with_day(day_of_the_month):
today_date = datetime.datetime.today()
today = today_date.day
if today == day_of_the_month:
return today_date
if today < day_of_the_month:
return datetime.date(today_date.year, today_date.month, day_of_the_month)
if today > day_of_the_month:
if today_date.month == 12:
return datetime.date(today_date.year+1, 1, day_of_the_month)
return datetime.date(today_date.year, today_date.month+1, day_of_the_month)
print get_next_date_with_day(25)
>>2016-02-25
def get_25_th_day(curr_date):
if curr_date.day <= 25:
return datetime.date(curr_date.year, curr_date.month, 25)
else:
new_month = curr_date.month + 1
new_year = curr_date.year
if curr_date.month == 12:
new_month = 1
new_year = curr_date.year + 1
return datetime.date(new_year, new_month, 25)
print get_25_th_day(datetime.date.today())
>> 2016-02-25
print get_25_th_day(datetime.date(2016, 2, 25))
>> 2016-02-25
print get_25_th_day(datetime.date(2016, 2, 26))
>> 2016-03-25
print get_25_th_day(datetime.date(2016, 12, 26))
>> 2017-01-25

Converting a decimal time number (such as 23.33154) to seconds in Python?

Im trying to convert a decimal time number such as 23.456, 0.5555, 1.9999 etc. to seconds (as a whole number).
I have made a function that converts the decimal time to hours and minutes.
For example, converting to hours:
def decimal_to_hours(t)
return math.floor(t)
And then for decimal time to minutes:
def decimal_to_mins(t)
if t < 1:
return math.floor(t * 60)
if t % 1 == 0:
return 0
elif t > 1:
return math.floor((t - math.floor(t)) * 60)
So my problem is I don't know how I would convert the fraction to seconds. Any help would be apreciated.
Just convert to seconds and do the original operation: floored modulo.
from math import floor
time_hours = 23.99431
time_minutes = time_hours * 60
time_seconds = time_minutes * 60
hours_part = floor(time_hours)
minutes_part = floor(time_minutes % 60)
seconds_part = floor(time_seconds % 60)
print("{h}:{m}:{s}".format(h=hours_part, m=minutes_part, s=seconds_part))
#>>> 23:59:39
def convert_decimal_to_time(decimal):
values_in_seconds = [('days', 60*60*24), ('hours', 60*60), ('minutes', 60), ('seconds', 1)]
output_dict = {}
num_seconds = int(decimal * 3600)
for unit, worth in values_in_seconds:
output_dict[unit] = num_seconds // worth
num_seconds = num_seconds % worth
return output_dict
convert_decimal_to_time(23.99431)
# {'minutes': 59, 'hours': 23, 'days': 0, 'seconds': 39}
convert_decimal_to_time(2.75)
# {'minutes': 45, 'seconds': 0, 'hours': 2, 'days': 0}
convert_decimal_to_time(252.5)
# {'minutes': 30, 'seconds': 0, 'hours': 12, 'days': 10}

Categories