How to add business days in date excluding holidays - python

I have a dataframe (df) with start_date column's and add_days column's (=10). I want to create target_date (=start_date + add_days) excluding week-end and holidays (holidays as dataframe).
I do some research and I try this.
from datetime import date, timedelta
import datetime as dt
df["star_date"] = pd.to_datetime(df["star_date"])
Holidays['Date_holi'] = pd.to_datetime(Holidays['Date_holi'])
def date_by_adding_business_days(from_date, add_days, holidays):
business_days_to_add = add_days
current_date = from_date
while business_days_to_add > 0:
current_date += datetime.timedelta(days=1)
weekday = current_date.weekday()
if weekday >= 5: # sunday = 6
continue
if current_date in holidays:
continue
business_days_to_add -= 1
return current_date
#demo:
base["Target_date"]=date_by_adding_business_days(df["start_date"], 10, Holidays['Date_holi'])
but i get this error:
AttributeError: 'Series' object has no attribute 'weekday'
Thanks you for your help.

The comments by ALollz are very valid; customizing your date during creation to only keep what is defined as business day for your problem would be optimal.
However, I assume that you cannot define the business day beforehand and that you need to solve the problem with the data frame constructed as is.
Here is one possible solution:
import pandas as pd
import numpy as np
from datetime import timedelta
# Goal is to offset a start date by N business days (weekday + not a holiday)
# Here we fake the dataset as it was not provided
num_row = 1000
df = pd.DataFrame()
df['start_date'] = pd.date_range(start='1/1/1979', periods=num_row, freq='D')
df['add_days'] = pd.Series([10]*num_row)
# Define what is a week day
week_day = [0,1,2,3,4] # Monday to Friday
# Define what is a holiday with month and day without year (you can add more)
holidays = ['10-30','12-24']
def add_days_to_business_day(df, week_day, holidays, increment=10):
'''
modify the dataframe to increment only the days that are part of a weekday
and not part of a pre-defined holiday
>>> add_days_to_business_day(df, [0,1,2,3,4], ['10-31','12-31'])
this will increment by 10 the days from Monday to Friday excluding Halloween and new year-eve
'''
# Increment everything that is in a business day
df.loc[df['start_date'].dt.dayofweek.isin(week_day),'target_date'] = df['start_date'] + timedelta(days=increment)
# Remove every increment done on a holiday
df.loc[df['start_date'].dt.strftime('%m-%d').isin(holidays), 'target_date'] = np.datetime64('NaT')
add_days_to_business_day(df, week_day, holidays)
df
To Note: I'm not using the 'add_days' column since its just a repeated value. I am instead using a parameter for my function increment which will increment by N number of days (with a default of N = 10).
Hope it helps!

Related

List iteration through JSON array with dates (date format conflict)

I've been diving into List Comprehensions, and I'm determined to put it into practice.
The below code takes a month and year input to determine the number of business days in the month, minus the public holidays (available at https://www.gov.uk/bank-holidays.json).
Additionally, I want to list all public holidays in that month/year, but I'm struck with a date format conflict.
TypeError: '<' not supported between instances of 'str' and 'datetime.date'
edate and sdate are datetime.date, whereas title["date"] is a string.
I've tried things like datetime.strptime and datetime.date to now avail.
How can I resolve the date conflict within the List Comprehension?
Any help or general feedback on code appreciated.
from datetime import date, timedelta, datetime
import inspect
from turtle import title
from typing import Iterator
import numpy as np
import json
import requests
from calendar import month, monthrange
import print
# Ask for a month and year input (set to September for quick testing)
monthInput = "09"
yearInput = "2022"
# Request from UK GOV and filter to England and Wales
holidaysJSON = requests.get("https://www.gov.uk/bank-holidays.json")
ukHolidaysJSON = json.loads(holidaysJSON.text)['england-and-wales']['events']
# List for all England and Wales holidays
ukHolidayList = []
eventIterator = 0
for events in ukHolidaysJSON:
ukHolidayDate = list(ukHolidaysJSON[eventIterator].values())[1]
ukHolidayList.append(ukHolidayDate)
eventIterator += 1
# Calculate days in the month
daysInMonth = monthrange(int(yearInput), int(monthInput))[1] # Extract the number of days in the month
# Define start and end dates
sdate = date(int(yearInput), int(monthInput), 1) # start date
edate = date(int(yearInput), int(monthInput), int(daysInMonth)) # end date
# Calculate delta
delta = edate - sdate
# Find all of the business days in the month
numberOfWorkingDays = 0
for i in range(delta.days + 1): # Look through all days in the month
day = sdate + timedelta(days=i)
if np.is_busday([day]) and str(day) not in ukHolidayList: # Determine if it's a business day
print("- " + str(day))
numberOfWorkingDays += 1
# Count all of the UK holidays
numberOfHolidays = 0
for i in range(delta.days + 1): # Look through all days in the month
day = sdate + timedelta(days=i)
if str(day) in ukHolidayList: # Determine if it's a uk holiday
numberOfHolidays += 1
# Strip the 0 from the month input
month = months[monthInput.lstrip('0')]
# for x in ukHolidaysJSON:
# pprint.pprint(x["title"])
# This is where I've gotten to
hols = [ title["title"] for title in ukHolidaysJSON if title["date"] < sdate and title["date"] > edate ]
print(hols)
I got this to work. You can used the datetime module to parse the string format but then you need to convert that result into a Date to compare to the Date objects you have already.
hols = [ title["title"] for title in ukHolidaysJSON if datetime.strptime(title["date"], '%Y-%m-%d').date() < sdate and datetime.strptime(title["date"], "%Y-%m-%d").date() > edate ]
First use strptime and then convert the datetime object to date. I'm not sure if there's a more straightforward way but this seems to work:
hols = [title["title"] for title in ukHolidaysJSON
if datetime.strptime(title["date"], "%Y-%M-%d").date() < sdate and
datetime.strptime(title["date"], "%Y-%M-%d").date() > edate]

Dates/months calculation

My below working code calculates date/month ranges, but I am using the Pandas library, which I want to get rid of.
import pandas as pd
dates=pd.date_range("2019-12","2020-02",freq='MS').strftime("%Y%m%d").tolist()
#print dates : ['20191101','20191201','20200101','20200201']
df=(pd.to_datetime(dates,format="%Y%m%d") + MonthEnd(1)).strftime("%Y%m%d").tolist()
#print df : ['20191130','20191231','20200131','20200229']
How can I rewrite this code without using Pandas?
I don't want to use Pandas library as I am triggering my job through Oozie and we don't have Pandas installed on all our nodes.
Pandas offers some nice functionalities when using datetimes which the standard library datetime module does not have (like the frequency or the MonthEnd). You have to reproduce these yourself.
import datetime as DT
def next_first_of_the_month(dt):
"""return a new datetime where the month has been increased by 1 and
the day is always the first
"""
new_month = dt.month + 1
if new_month == 13:
new_year = dt.year + 1
new_month = 1
else:
new_year = dt.year
return DT.datetime(new_year, new_month, day=1)
start, stop = [DT.datetime.strptime(dd, "%Y-%m") for dd in ("2019-11", "2020-02")]
dates = [start]
cd = next_first_of_the_month(start)
while cd <= stop:
dates.append(cd)
cd = next_first_of_the_month(cd)
str_dates = [d.strftime("%Y%m%d") for d in dates]
print(str_dates)
# prints: ['20191101', '20191201', '20200101', '20200201']
end_dates = [next_first_of_the_month(d) - DT.timedelta(days=1) for d in dates]
str_end_dates = [d.strftime("%Y%m%d") for d in end_dates]
print(str_end_dates)
# prints ['20191130', '20191231', '20200131', '20200229']
I used here a function to get a datetime corresponding to the first day of the next month of the input datetime. Sadly, timedelta does not work with months, and adding 30 days of course is not feasible (not all months have 30 days).
Then a while loop to get a sequence of fist days of the month until the stop date.
And to the get the end of the month, again get the next first day of the month fo each datetime in your list and subtract a day.

Pandas select last friday of each month [duplicate]

I've written this function to get the last Thursday of the month
def last_thurs_date(date):
month=date.dt.month
year=date.dt.year
cal = calendar.monthcalendar(year, month)
last_thurs_date = cal[4][4]
if month < 10:
thurday_date = str(year)+'-0'+ str(month)+'-' + str(last_thurs_date)
else:
thurday_date = str(year) + '-' + str(month) + '-' + str(last_thurs_date)
return thurday_date
But its not working with the lambda function.
datelist['Date'].map(lambda x: last_thurs_date(x))
Where datelist is
datelist = pd.DataFrame(pd.date_range(start = pd.to_datetime('01-01-2014',format='%d-%m-%Y')
, end = pd.to_datetime('06-03-2019',format='%d-%m-%Y'),freq='D').tolist()).rename(columns={0:'Date'})
datelist['Date']=pd.to_datetime(datelist['Date'])
Jpp already added the solution, but just to add a slightly more readable formatted string - see this awesome website.
import calendar
def last_thurs_date(date):
year, month = date.year, date.month
cal = calendar.monthcalendar(year, month)
# the last (4th week -> row) thursday (4th day -> column) of the calendar
# except when 0, then take the 3rd week (February exception)
last_thurs_date = cal[4][4] if cal[4][4] > 0 else cal[3][4]
return f'{year}-{month:02d}-{last_thurs_date}'
Also added a bit of logic - e.g. you got 2019-02-0 as February doesn't have 4 full weeks.
Scalar datetime objects don't have a dt accessor, series do: see pd.Series.dt. If you remove this, your function works fine. The key is understanding that pd.Series.apply passes scalars to your custom function via a loop, not an entire series.
def last_thurs_date(date):
month = date.month
year = date.year
cal = calendar.monthcalendar(year, month)
last_thurs_date = cal[4][4]
if month < 10:
thurday_date = str(year)+'-0'+ str(month)+'-' + str(last_thurs_date)
else:
thurday_date = str(year) + '-' + str(month) + '-' + str(last_thurs_date)
return thurday_date
You can rewrite your logic more succinctly via f-strings (Python 3.6+) and a ternary statement:
def last_thurs_date(date):
month = date.month
year = date.year
last_thurs_date = calendar.monthcalendar(year, month)[4][4]
return f'{year}{"-0" if month < 10 else "-"}{month}-{last_thurs_date}'
I know that a lot of time has passed since the date of this post, but I think it would be worth adding another option if someone came across this thread
Even though I use pandas every day at work, in that case my suggestion would be to just use the datetutil library. The solution is a simple one-liner, without unnecessary combinations.
from dateutil.rrule import rrule, MONTHLY, FR, SA
from datetime import datetime as dt
import pandas as pd
# monthly options expiration dates calculated for 2022
monthly_options = list(rrule(MONTHLY, count=12, byweekday=FR, bysetpos=3, dtstart=dt(2022,1,1)))
# last satruday of the month
last_saturday = list(rrule(MONTHLY, count=12, byweekday=SA, bysetpos=-1, dtstart=dt(2022,1,1)))
and then of course:
pd.DataFrame({'LAST_ST':last_saturdays}) #or whatever you need
This question answer Calculate Last Friday of Month in Pandas
This can be modified by selecting the appropriate day of the week, here freq='W-FRI'
I think the easiest way is to create a pandas.DataFrame using pandas.date_range and specifying freq='W-FRI.
W-FRI is Weekly Fridays
pd.date_range(df.Date.min(), df.Date.max(), freq='W-FRI')
Creates all the Fridays in the date range between the min and max of the dates in df
Use a .groupby on year and month, and select .last(), to get the last Friday of every month for every year in the date range.
Because this method finds all the Fridays for every month in the range and then chooses .last() for each month, there's not an issue with trying to figure out which week of the month has the last Friday.
With this, use pandas: Boolean Indexing to find values in the Date column of the dataframe that are in last_fridays_in_daterange.
Use the .isin method to determine containment.
pandas: DateOffset objects
import pandas as pd
# test data: given a dataframe with a datetime column
df = pd.DataFrame({'Date': pd.date_range(start=pd.to_datetime('2014-01-01'), end=pd.to_datetime('2020-08-31'), freq='D')})
# create a dateframe with all Fridays in the daterange for min and max of df.Date
fridays = pd.DataFrame({'datetime': pd.date_range(df.Date.min(), df.Date.max(), freq='W-FRI')})
# use groubpy and last, to get the last Friday of each month into a list
last_fridays_in_daterange = fridays.groupby([fridays.datetime.dt.year, fridays.datetime.dt.month]).last()['datetime'].tolist()
# find the data for the last Friday of the month
df[df.Date.isin(last_fridays_in_daterange)]

How to get 1st business date of year in python?

How do I determine the first business date of the year?. I am trying using
`Import holidays
HOLIDAYS= holidays.US()`
I would start by using this existing StackOverflow answer to get the first Monday of the year, then filter by holidays using the holidays module.
import datetime
import holidays
us_holidays = holidays.UnitedStates()
def next_workday(start_date, weekday=0):
"""
Weekday is an integer. 0=Monday, 1=Tuesday, etc.
"""
start_date -= datetime.timedelta(days=1) # Go back 1 to be inclusive of the start date itself
days_ahead = weekday - start_date.weekday()
if days_ahead <= 0: # Target day already happened this week
days_ahead += 7
start_date = start_date + datetime.timedelta(days_ahead)
while 1:
if start_date in us_holidays:
start_date += datetime.timedelta(1)
else:
return start_date
for i in range(100):
year = 2000 + i
print(f'First workday of {year} is', next_workday(datetime.date(year, 1, 1)))

finding last business day of a month in python

I'm trying to find last business day of of the month. I wrote the code below for that and it works fine but I was wondering if there is a cleaner way of doing it?
from datetime import date,timedelta
import datetime
import calendar
today=datetime.date.today()
last = today.replace(day=calendar.monthrange(today.year,today.month)[1])
if last.weekday()<5:
print last
else:
print last-timedelta(days=1+last.weekday()-5)
Thanks in advance!
I use the following:
from pandas.tseries.offsets import BMonthEnd
from datetime import date
d=date.today()
offset = BMonthEnd()
#Last day of current month
offset.rollforward(d)
#Last day of previous month
offset.rollback(d)
Let's say you want to get the last business days of the month up-to the end of the next two years, the following will work.
import pandas as pd
import datetime
start = datetime.date.today()
end = datetime.date(start.year+2, 12, 31)
bussiness_days_rng =pd.date_range(start, end, freq='BM')
For one-liner fans:
import calendar
def last_business_day_in_month(year: int, month: int) -> int:
return max(calendar.monthcalendar(year, month)[-1][:5])
I use this for the first business day of the month but it can be used for last business day of the month as well:
import time
import datetime
from pandas.tseries.holiday import USFederalHolidayCalendar
from pandas.tseries.offsets import CustomBusinessDay
from dateutil.relativedelta import relativedelta
#Create dates needed to be entered as parameters
today = datetime.date.today()
first = today.replace(day=1)
#End of the Prior Month
eopm = first - datetime.timedelta(days=1)
eopm = eopm.strftime("%Y%m%d")
#Create first business day of current month date
us_bd = CustomBusinessDay(calendar=USFederalHolidayCalendar())
focm = first
nxtMo = today + relativedelta(months=+1)
fonm = nxtMo.replace(day=1)
eocm = fonm - datetime.timedelta(days=1)
first_bd = pd.DatetimeIndex(start = focm, end = eocm, freq= us_bd)
first_bd = first_bd.strftime("%Y%m%d")
#First Business Day of the Month
first_bd = first_bd[0]
#Last Business Day of the Month
lst_day = len(first_bd)-1
last_bd = first_bd[lst_day]
I left some code in there that is not needed for the last business day of the current month, but may be useful to someone.
You can use Pandas to get business days. Refer http://pandas.pydata.org/pandas-docs/stable/timeseries.html
Also you can refer this https://pypi.python.org/pypi/business_calendar/ for simple business days calculation.
with rollforward(d) you will skip to the next month if the date is past the last business day of the current month, so below might be safer for any day of the month:
from datetime import date
import pandas as pd
d = date(2011, 12, 31) # a caturday
pd.bdate_range(end=pd.offsets.MonthEnd().rollforward(d), periods=1)
pd.offsets.BMonthEnd().rollforward(d)
I needed something intuitively readable and opted for the following:
from datetime import datetime, timedelta
import pandas as pd
def isMonthLastBusinessDay(date):
lastDayOfMonth = date + pd.offsets.MonthEnd(0)
isFriday = date.weekday() == 4
if (date.weekday() < 5 and lastDayOfMonth == date) or (isFriday and lastDayOfMonth == date+timedelta(days=1)) or (isFriday and lastDayOfMonth == date+timedelta(days=2)):
return True
else:
return False

Categories