Business Hours Between Two Dates in Pandas Dataframe (including holidays) - python

Novice Python user here - I'm attempting to Calculate the Business Hours between two dates in a pandas DataFrame given 9am-5pm, Mon-Fri Working Hours and to exclude Australian Public Holidays.
I have tried to hack together a lot of solutions over the past few days and apply it to my problem but I'm having significant trouble.
I will post my current iteration but also looking for feedback as the best way to handle this overall and to gain some understanding of how to tackle these problems in the future.
My lastest attempt is using pandas CDay then creating a custom holiday calendar for Australian dates which all seems to be working - it's then going from this step to applying it to the pandas dates which I am having trouble understanding. I am using a custom function from this https://codereview.stackexchange.com/questions/135142/calculate-working-minutes-between-two-timestamps/135200#135200 solution to count the minutes between the dates but having no luck.
Appreciate any help!
import datetime
from pandas.tseries.holiday import Holiday, AbstractHolidayCalendar
from pandas.tseries.offsets import CDay
class HolidayCalendar(AbstractHolidayCalendar):
rules =[Holiday('New Years Day',year=2016,month=1,day=1),
Holiday('Australia Day',year=2016,month=1,day=26),
Holiday('Good Friday',year=2016,month=3,day=25),
Holiday('Easter Monday',year=2016,month=3,day=28),
Holiday('ANZAC Day',year=2016,month=4,day=25),
Holiday('Queens Birthday',year=2016,month=6,day=13),
Holiday('Christmas Day',year=2016,month=12,day=25),
Holiday('Boxing Day',year=2016,month=12,day=26),
Holiday('New Years Day',year=2017,month=1,day=1),
Holiday('Australia Day',year=2017,month=1,day=26),
Holiday('Good Friday',year=2017,month=4,day=15),
Holiday('Easter Monday',year=2017,month=4,day=17),
Holiday('ANZAC Day',year=2017,month=4,day=25),
Holiday('Queens Birthday',year=2017,month=6,day=12),
Holiday('Christmas Day',year=2017,month=12,day=25),
Holiday('Boxing Day',year=2017,month=12,day=26),
Holiday('New Years Day',year=2018,month=1,day=1),
Holiday('Australia Day',year=2018,month=1,day=26),
Holiday('Good Friday',year=2018,month=3,day=30),
Holiday('Easter Monday',year=2018,month=4,day=2),
Holiday('ANZAC Day',year=2018,month=4,day=25),
Holiday('Queens Birthday',year=2018,month=6,day=11),
Holiday('Christmas Day',year=2018,month=12,day=25),
Holiday('Boxing Day',year=2018,month=12,day=26)]
cal = HolidayCalendar()
dayindex = pd.bdate_range(datetime.date(2015,1,1),datetime.date.today(),freq=CDay(calendar=cal))
day_series = dayindex.to_series()
def count_mins(start,end):
starttime = datetime.datetime.fromtimestamp(int(start)/1000)
endtime = datetime.datetime.fromtimestamp(int(end)/1000)
days = day_series[starttime.date():endtime.date()]
daycount = len(days)
if daycount == 0:
return daycount
else:
startday = datetime.datetime(days[0].year,
days[0].month,
days[0].day,
hour=9,
minute=0)
endday = datetime.datetime(days[-1].year,
days[-1].month,
days[-1].day,
hour=17,
minute=0)
if daycount == 1:
if starttime < startday:
periodstart = startday
else:
periodstart = starttime
if endtime > endday:
periodend = endday
else:
periodend = endtime
return (periodend - periodstart).seconds/60
if daycount == 2:
if starttime < startday:
first_day_mins = 480
else:
first_day_mins = (startday.replace(hour=17)-starttime).seconds/60
if endtime > endday:
second_day_mins = 480
else:
second_day_mins = (endtime-endday.replace(hour=9)).seconds/60
return (first_day_mins + second_day_mins)
else:
if starttime < startday:
first_day_mins = 480
else:
first_day_mins = (startday.replace(hour=17)-starttime).seconds/60
if endtime > endday:
second_day_mins = 480
else:
second_day_mins = (endtime-endday.replace(hour=9)).seconds/60
return (first_day_mins + second_day_mins + ((daycount-2)*480))
df_updated['Created Date'] = pd.to_datetime(df_updated['Created Date'])
df_updated['Updated Date'] = pd.to_datetime(df_updated['Updated Date'])
df_updated['Created Date'] = df_updated['Created Date'].astype(np.int64) /
int(1e6)
df_updated['Updated Date'] = df_updated['Updated Date'].astype(np.int64) /
int(1e6)
count_mins(df_updated['Created Date'], df_updated['Updated Date'])

Try out this package called business-duration in PyPi
pip install business-duration
Example Code:
from business_duration import businessDuration
import pandas as pd
from datetime import time,datetime
import holidays as pyholidays
startdate = pd.to_datetime('2017-01-01 00:00:00')
enddate = pd.to_datetime('2017-01-31 23:00:00')
starttime=time(9,0,0)
endtime=time(17,0,0)
holidaylist = pyholidays.Australia()
unit='hour'
#By default weekends are Saturday and Sunday
print(businessDuration(startdate,enddate,starttime,endtime,holidaylist=holidayli
st,unit=unit))
Output: 160.0
holidaylist:
{datetime.date(2017, 1, 1): "New Year's Day",
datetime.date(2017, 1, 2): "New Year's Day (Observed)",
datetime.date(2017, 1, 26): 'Australia Day',
datetime.date(2017, 3, 6): 'Canberra Day',
datetime.date(2017, 4, 14): 'Good Friday',
datetime.date(2017, 4, 15): 'Easter Saturday',
datetime.date(2017, 4, 17): 'Easter Monday',
datetime.date(2017, 4, 25): 'Anzac Day',
datetime.date(2017, 6, 12): "Queen's Birthday",
datetime.date(2017, 9, 26): 'Family & Community Day',
datetime.date(2017, 10, 2): 'Labour Day',
datetime.date(2017, 12, 25): 'Christmas Day',
datetime.date(2017, 12, 26): 'Boxing Day'}

You could use the length of bdate_range:
In [11]: pd.bdate_range('2017-01-01', '2017-10-23')
Out[11]:
DatetimeIndex(['2017-01-02', '2017-01-03', '2017-01-04', '2017-01-05',
'2017-01-06', '2017-01-09', '2017-01-10', '2017-01-11',
'2017-01-12', '2017-01-13',
...
'2017-10-10', '2017-10-11', '2017-10-12', '2017-10-13',
'2017-10-16', '2017-10-17', '2017-10-18', '2017-10-19',
'2017-10-20', '2017-10-23'],
dtype='datetime64[ns]', length=211, freq='B')
In [12]: len(pd.bdate_range('2017-01-01', '2017-10-23'))
Out[12]: 211

I suggest a simpler solution
import pandas as pd
from datetime import datetime
weekmask = 'Sun Mon Tue Wed Thu'
exclude = [pd.datetime(2020, 5, 1),
pd.datetime(2020, 5, 2),
pd.datetime(2020, 5, 3)]
pd.bdate_range('2020/4/30','2020/5/26',
freq='C',
weekmask = weekmask,
holidays=exclude)

Related

How to take months as inputs then determine start and end date for each month? I also want to go from Dec 2021 to April 2022

This is the code I have so far:
from calendar import isleap
import datetime
year =2021
month= 4
year2=2022
months_choices=[]
for i in range(1, 5):
month = datetime.date(2021, i, 1).strftime('%b')
startDate = f"01-Dec-{year}"
if month in ["Jan", "Mar", "May", "Jul", "Aug", "Oct", "Dec"]:
endDate = f"31-{month}-{year}"
elif month in ["Apr", "Jun", "Sep", "Nov"]:
endDate = f"30-{month}-{year}"
else:
isLeap = isleap(1900)
if isLeap:
endDate = f"29-{month}-{year}"
else:
endDate = f"28-{month}-{year}"
months_choices.append((startDate, endDate))
print(months_choices)
I would like my output to print as [('01-Dec-2021', '31-Dec-2021'), ('01-Jan-2022', '31-Jan-2022'), ('01-Feb-2022', '28-Feb-2022'), ('01-March-2021', '31-March-2021'),('01-April-2021', '30-Apr-2021')], but it prints like below.
print(months_choices)
[('01-Dec-2021', '31-Jan-2021'), ('01-Dec-2021', '28-Feb-2021'), ('01-Dec-2021', '31-Mar-2021'), ('01-Dec-2021', '30-Apr-2021')]
The calendar module has some very useful features that you could utilise.
As a starting point you would benefit from having a function that takes the start month/year and end month/year.
Something like this:
from calendar import monthrange, month_abbr
def range_ok(start_month, start_year, end_month, end_year):
if start_month < 1 or start_month > 12 or end_month < 1 or end_month > 12:
return False
if start_year > end_year or (start_year == end_year and start_month > end_month):
return False
return True
def func(start_month, start_year, end_month, end_year):
result = []
while range_ok(start_month, start_year, end_month, end_year):
mn = month_abbr[start_month]
d1 = f'01-{mn}-{start_year}'
sd = monthrange(start_year, start_month)[1]
d2 = f'{sd}-{mn}-{start_year}'
result.append((d1, d2))
if (start_month := start_month + 1) > 12:
start_month = 1
start_year += 1
return result
print(func(12, 2021, 4, 2022))
Output:
[('01-Dec-2021', '31-Dec-2021'), ('01-Jan-2022', '31-Jan-2022'), ('01-Feb-2022', '28-Feb-2022'), ('01-Mar-2022', '31-Mar-2022'), ('01-Apr-2022', '30-Apr-2022')]
EDIT
As requested in the comments, in order to generate the date range between two dates, the solution can be adjusted like this:
import pandas as pd
import calendar
from datetime import date
start_date = '2021-12-01'
end_date = '2022-04-30'
date_range = pd.date_range(start_date,end_date,
freq='MS').map(lambda x: (x.year, x.month)).tolist()
def get_dates(year, month):
return (date(year, month, 1).strftime("%d-%b-%Y"),
date(year,
month,
calendar.monthrange(year, month)[1]
).strftime("%d-%b-%Y"))
[get_dates(year, month)
for year, month in date_range]
Original solution
Use calendar.monthrange(YEAR, MONTH) to get the last day of the month. It handles the leap years for you.
import calendar
from datetime import date
years = [2021,2022]
months = [range(12, 13), range(1,5)]
def get_dates(year, month):
return (date(year, month, 1).strftime("%d-%b-%Y"),
date(year,
month,
calendar.monthrange(year, month)[1]
).strftime("%d-%b-%Y"))
[get_dates(year, month)
for year, month_range in zip(years, months)
for month in month_range]
Output:
[('01-Dec-2021', '31-Dec-2021'),
('01-Jan-2022', '31-Jan-2022'),
('01-Feb-2022', '28-Feb-2022'),
('01-Mar-2022', '31-Mar-2022'),
('01-Apr-2022', '30-Apr-2022')]

Getting dates in python between a past datestamp and the present

Language Python
I am wondering if anyone can help me print out some dates.
i cam trying to create a loop in which i pass in a date say 01/1/2017 and the loop will then output the first and last day in every month between then and the present day.
Example
01/01/2017
31/01/2017
01/02/2017
28/02/2017
etc
Any help will be appreciated
Hope this will help,
Code:
from datetime import date
from dateutil.relativedelta import relativedelta
from calendar import monthrange
d1 = date(2018, 2, 26)
d2 = date.today()
def print_month_day_range(date):
first_day = date.replace(day = 1)
last_day = date.replace(day = monthrange(date.year, date.month)[1])
print (first_day.strftime("%d/%m/%Y"))
print (last_day.strftime("%d/%m/%Y"))
print_month_day_range(d1)
while (d1 < d2):
d1 = d1 + relativedelta(months=1)
print_month_day_range(d1)
Output:
01/02/2018
28/02/2018
01/03/2018
31/03/2018
...
01/07/2018
31/07/2018
you can use calendar module. Below is the code:
import datetime
from calendar import monthrange
strt_date = '01/1/2017'
iter_year = datetime.datetime.strptime(strt_date, '%m/%d/%Y').year
iter_month = datetime.datetime.strptime(strt_date, '%m/%d/%Y').month
cur_year = datetime.datetime.today().year
cur_month = datetime.datetime.today().month
while cur_year > iter_year or (cur_year == iter_year and iter_month <= cur_month):
number_of_days_in_month = monthrange(iter_year, iter_month)[1]
print '/'.join([str(iter_month) , '01', str(iter_year)])
print '/'.join([str(iter_month), str(number_of_days_in_month), str(iter_year)])
if iter_month == 12:
iter_year += 1
iter_month = 1
else:
iter_month += 1
this could work, as long as the first date you give is always the first of the month
from datetime import datetime
from datetime import timedelta
date_string = '01/01/2017'
date = datetime.strptime(date_string, '%d/%m/%Y').date()
today = datetime.now().date()
months = range(1,13)
years = range(date.year, today.year + 1)
for y in years:
for m in months:
new_date = date.replace(month=m, year=y)
last_day = new_date - timedelta(days=1)
if (date < new_date) & (new_date <= today):
print last_day.strftime('%d/%m/%Y')
print new_date.strftime('%d/%m/%Y')
elif (date <= new_date) & (new_date <= today):
print new_date.strftime('%d/%m/%Y')
This code would print the first and last days of all months in a year.
Maybe add some logic to iterate through the years
import datetime
def first_day_of_month(any_day):
first_month = any_day.replace(day=1)
return first_month
def last_day_of_month(any_day):
next_month = any_day.replace(day=28) + datetime.timedelta(days=4)
return next_month - datetime.timedelta(days=next_month.day)
for month in range(1, 13):
print first_day_of_month(datetime.date(2017, month, 1))
print last_day_of_month(datetime.date(2017, month, 1))

Count summer days between two dates

I want to count summer days between two dates. Summer is May first to August last.
This will count all days:
import datetime
startdate=datetime.datetime(2015,1,1)
enddate=datetime.datetime(2016,6,1)
delta=enddate-startdate
print delta.days
>>517
But how can only count the passed summer days?
You could define a generator to iterate over every date between startdate and enddate, define a function to check if a date represents a summer day and use sum to count the summer days:
import datetime
startdate = datetime.datetime(2015,1,1)
enddate = datetime.datetime(2016,6,1)
all_dates = (startdate + datetime.timedelta(days=x) for x in range(0, (enddate-startdate).days))
def is_summer_day(date):
return 5 <= date.month <= 8
print(sum(1 for date in all_dates if is_summer_day(date)))
# 154
Thanks to the generator, you don't need to create a huge list in memory with every day between startdate and enddate.
This iteration still considers every single day, even if it's not needed. For very large gaps, you could use the fact that every complete year has 123 summer days according to your definition.
You can create a few functions to count how many summer days you have between two days:
from datetime import date
def get_summer_start(year):
return date(year, 5, 1)
def get_summer_end(year):
return date(year, 8, 31)
def get_start_date(date, year):
return max(date, get_summer_start(year))
def get_end_date(date, year):
return min(date, get_summer_end(year))
def count_summer_days(date1, date2):
date1_year = date1.year
date2_year = date2.year
if date1_year == date2_year:
s = get_start_date(date1, date1_year)
e = get_end_date(date2, date1_year)
return (e - s).days
else:
s1 = max(date1, get_summer_start(date1_year))
e1 = get_summer_end(date1_year)
first_year = max(0,(e1 -s1).days)
s1 = get_summer_start(date2_year)
e1 = min(date2, get_summer_end(date2_year))
last_year = max(0,(e2 -s2).days)
other_years = date2_year - date1_year - 1
summer_days_per_year = (get_summer_end(date1_year) - get_summer_start(date1_year)).days
return first_year + last_year + (other_years * summer_days_per_year)
date1 = date(2015,1,1)
date2 = date(2016,6,1)
print count_summer_days(date1, date2)
Here is a better solution for large periods:
first_summer_day = (5,1)
last_summer_day = (8,31)
from datetime import date
startdate = date(2015,1,1)
enddate = date(2016,6,1)
# make sure that startdate > endate
if startdate > enddate:
startdate, endate = endate, startdate
def iter_yearly_summer_days(startdate, enddate):
for year in range(startdate.year, enddate.year+1):
start_period = startdate if year == startdate.year else date(year, 1, 1)
end_period = enddate if year == enddate.year else date(year, 12, 31)
year_first_summer_day = date(year, *first_summer_day)
year_last_summer_day = date(year, *last_summer_day)
summer_days_that_year = (min(year_last_summer_day, end_period) - max(year_first_summer_day, start_period)).days
print('year {} had {} days of summer'.format(year, summer_days_that_year))
yield summer_days_that_year
print(sum(iter_yearly_summer_days(startdate, enddate)))

Logic to jump weekends Django/python

I got this function. What I need is to make some changes in my model depending on the day. As my if statements shown below, if the "ddate" is today or tomorrow make some changes in my "pull_ins" column and set it up as "Ready to ship" but if is the day after tomorrow and the next day, set "Not yet". This is working but my problem is that I need to jump weekends and keep the 4 days logic, any ideas?
As an example if today is Thrusday to get the ddate from today, tomorrow(friday) -----jump weekend --- monday, tuesday.
This is what I got:
def Ship_status():
week = {0, 1, 2, 3, 4}
deltaday = timedelta(days=1)
today = datetime.now().date()
day = today
day1 = day + deltaday
day2 = day1 + deltaday
day3 = day2 + deltaday
for i in Report.objects.all():
if i.ddate.weekday() in week:
if i.ddate == day:
i.pull_ins = "Ready to ship"
i.save()
if i.date == day1:
i.pull_ins = "Ready to ship"
i.save()
if i.date == day2:
i.pull_ins = "Not yet"
i.save()
if i.date == day3:
i.pull_ins = "not yet"
i.save()
Thanks for your time.
dateutil.rrule is library you could leverage. To get the next weekday:
from dateutil import rrule
next_weekday = rrule.rrule(rrule.DAILY, count=3, byweekday=(0, 1, 2, 3, 4), dtstart=dt))
So, in your query, you could do something like this:
def compute_shipping(dt=datetime.datetime.date(), count=2):
next_weekdays = rrule.rrule(rrule.DAILY, count=count, byweekday=(0, 1, 2, 3, 4), dtstart=dt))
return list(next_weekdays)
#Ready to ship
Report.objects.filter(ddate__in=compute_shipping()).update(pull_ins="Ready to ship")
#For Not yet
#Query would be similar - just set the appropriate start date

How to get the next specific day of the month in Python?

I am using Python 2.7.
I want to get the next 25th day of the month from now on. Today is February 17th, so I should get February 25th. If we were on February 26th, I should get March 25th.
I was not able to find anything about getting the next specific day of the month, but I am pretty sure that Python has something to make it easy.
Does anyone know how?
You could use python-dateutil for that and play with the rrule:
import datetime
from dateutil.rrule import *
now = datetime.datetime.today().date()
days = rrule(MONTHLY, dtstart=now, bymonthday=25)
print (days[0]) # datetime.datetime(2016, 2, 25, 0, 0)
print (days[1]) # datetime.datetime(2016, 3, 25, 0, 0)
import datetime
def get_next_date_with_day(day_of_the_month):
today_date = datetime.datetime.today()
today = today_date.day
if today == day_of_the_month:
return today_date
if today < day_of_the_month:
return datetime.date(today_date.year, today_date.month, day_of_the_month)
if today > day_of_the_month:
if today_date.month == 12:
return datetime.date(today_date.year+1, 1, day_of_the_month)
return datetime.date(today_date.year, today_date.month+1, day_of_the_month)
print get_next_date_with_day(25)
>>2016-02-25
def get_25_th_day(curr_date):
if curr_date.day <= 25:
return datetime.date(curr_date.year, curr_date.month, 25)
else:
new_month = curr_date.month + 1
new_year = curr_date.year
if curr_date.month == 12:
new_month = 1
new_year = curr_date.year + 1
return datetime.date(new_year, new_month, 25)
print get_25_th_day(datetime.date.today())
>> 2016-02-25
print get_25_th_day(datetime.date(2016, 2, 25))
>> 2016-02-25
print get_25_th_day(datetime.date(2016, 2, 26))
>> 2016-03-25
print get_25_th_day(datetime.date(2016, 12, 26))
>> 2017-01-25

Categories