How to get 1st business date of year in python? - python

How do I determine the first business date of the year?. I am trying using
`Import holidays
HOLIDAYS= holidays.US()`

I would start by using this existing StackOverflow answer to get the first Monday of the year, then filter by holidays using the holidays module.
import datetime
import holidays
us_holidays = holidays.UnitedStates()
def next_workday(start_date, weekday=0):
"""
Weekday is an integer. 0=Monday, 1=Tuesday, etc.
"""
start_date -= datetime.timedelta(days=1) # Go back 1 to be inclusive of the start date itself
days_ahead = weekday - start_date.weekday()
if days_ahead <= 0: # Target day already happened this week
days_ahead += 7
start_date = start_date + datetime.timedelta(days_ahead)
while 1:
if start_date in us_holidays:
start_date += datetime.timedelta(1)
else:
return start_date
for i in range(100):
year = 2000 + i
print(f'First workday of {year} is', next_workday(datetime.date(year, 1, 1)))

Related

List iteration through JSON array with dates (date format conflict)

I've been diving into List Comprehensions, and I'm determined to put it into practice.
The below code takes a month and year input to determine the number of business days in the month, minus the public holidays (available at https://www.gov.uk/bank-holidays.json).
Additionally, I want to list all public holidays in that month/year, but I'm struck with a date format conflict.
TypeError: '<' not supported between instances of 'str' and 'datetime.date'
edate and sdate are datetime.date, whereas title["date"] is a string.
I've tried things like datetime.strptime and datetime.date to now avail.
How can I resolve the date conflict within the List Comprehension?
Any help or general feedback on code appreciated.
from datetime import date, timedelta, datetime
import inspect
from turtle import title
from typing import Iterator
import numpy as np
import json
import requests
from calendar import month, monthrange
import print
# Ask for a month and year input (set to September for quick testing)
monthInput = "09"
yearInput = "2022"
# Request from UK GOV and filter to England and Wales
holidaysJSON = requests.get("https://www.gov.uk/bank-holidays.json")
ukHolidaysJSON = json.loads(holidaysJSON.text)['england-and-wales']['events']
# List for all England and Wales holidays
ukHolidayList = []
eventIterator = 0
for events in ukHolidaysJSON:
ukHolidayDate = list(ukHolidaysJSON[eventIterator].values())[1]
ukHolidayList.append(ukHolidayDate)
eventIterator += 1
# Calculate days in the month
daysInMonth = monthrange(int(yearInput), int(monthInput))[1] # Extract the number of days in the month
# Define start and end dates
sdate = date(int(yearInput), int(monthInput), 1) # start date
edate = date(int(yearInput), int(monthInput), int(daysInMonth)) # end date
# Calculate delta
delta = edate - sdate
# Find all of the business days in the month
numberOfWorkingDays = 0
for i in range(delta.days + 1): # Look through all days in the month
day = sdate + timedelta(days=i)
if np.is_busday([day]) and str(day) not in ukHolidayList: # Determine if it's a business day
print("- " + str(day))
numberOfWorkingDays += 1
# Count all of the UK holidays
numberOfHolidays = 0
for i in range(delta.days + 1): # Look through all days in the month
day = sdate + timedelta(days=i)
if str(day) in ukHolidayList: # Determine if it's a uk holiday
numberOfHolidays += 1
# Strip the 0 from the month input
month = months[monthInput.lstrip('0')]
# for x in ukHolidaysJSON:
# pprint.pprint(x["title"])
# This is where I've gotten to
hols = [ title["title"] for title in ukHolidaysJSON if title["date"] < sdate and title["date"] > edate ]
print(hols)
I got this to work. You can used the datetime module to parse the string format but then you need to convert that result into a Date to compare to the Date objects you have already.
hols = [ title["title"] for title in ukHolidaysJSON if datetime.strptime(title["date"], '%Y-%m-%d').date() < sdate and datetime.strptime(title["date"], "%Y-%m-%d").date() > edate ]
First use strptime and then convert the datetime object to date. I'm not sure if there's a more straightforward way but this seems to work:
hols = [title["title"] for title in ukHolidaysJSON
if datetime.strptime(title["date"], "%Y-%M-%d").date() < sdate and
datetime.strptime(title["date"], "%Y-%M-%d").date() > edate]

I'm trying to make a function that filters the last working day of the month, I'm having a problem

This function is taking the last working day of the previous month. Assuming that when the last business day of the previous month falls on a Friday, I need to fall on the next business day, which in this case would be a Monday. But it's falling on a Saturday. How do I integrate so it only drops on weekdays? And how can I integrate the holidays?
I already have an existing holiday table
import calendar
from datetime import date
import pandas as pd
from datetime import timedelta
def BsDay():
today = date.today()
last_day = max(calendar.monthcalendar(today.year, today.month)[-1:][0][:5])
validDay = (today.year, today.month-1,
max(calendar.monthcalendar(today.year, today.month-1)[-1:][0][:5]))
if today.month == 1 and today.day <= last_day:
validDay = (today.year-1, today.month+11,
max(calendar.monthcalendar(today.year, today.month-1)[-1:][0][:5]))
elif today.day <= last_day:
validDay
else:
validDay = (today.year, today.month, max(
calendar.monthcalendar(today.year, today.month)[-1:][0][:5]))
return validDay
I tried to do this calculation but it is not working.
BusinessDay = ''.join(map(str, BsDay()))
BusinessDay = BusinessDay[0:4] + '-0' + BusinessDay[4]+'-'+BusinessDay[5:7]
dateD='2022-07-29'
dateD=pd.to_datetime(dateD)
dateF=pd.to_datetime(BusinessDay)
LastDayNotWeekend=dateD+timedelta(days=1)
print('LastDayNotWeekend not Weekend',LastDayNotWeekend)
LastDayInWeekend=dateF+timedelta(days=1)
print('LastDayInWeekend in Weekend->',LastDayInWeekend)
Output of the variable where the last working day of the month does not fall on a weekend.
LastDayNotWeekend not Weekend 2022-07-30 00:00:00
Output of the variable where the last working day of the month falls on a weekend.
LastDayInWeekend in Weekend-> 2022-09-01 00:00:00

Calculate the N th day of the previous year

I have a date in the format "YYYY-MM-DD", for example "2022-03-09". It is the 68th day of the year 2022. Is there a way to get the 68th day of 2021? More generally, if I have a date in "YYYY-MM-DD" format, which is the N th day of the year, is there a quick way to calculate the N th day of the previous year?
You could do:
from datetime import datetime, timedelta
date_string = '2020-03-09'
# Parse the string to a datetime.date object
date = datetime.strptime(date_string, '%Y-%m-%d')
# Get the number N of the day in the year
N = date.timetuple().tm_yday
# Take 1st of january of last year and add N-1 days
N_last_year = (
date.replace(year=date.year-1, month=1, day=1) +
timedelta(days=N-1)
)
print(N_last_year.date())
Or, another funny solution based on leap years. It is based on the fact that the Nth day of last year is the same as this year's, except if the date is after 29th february and there is a leap year somewhere.
from datetime import datetime, timedelta
def leap(year: int) -> bool:
# Returns True if year is a leap year, False otherwise
return (year % 4 == 0 and year % 100 != 0) or (year % 400 == 0)
date_string = '2020-03-09'
# Parse the string to a datetime.date object
date = datetime.strptime(date_string, '%Y-%m-%d')
# Get the number N of the day in the year
N = date.timetuple().tm_yday
date_last_year = date.replace(year=date.year-1)
# Do the cool thing
if date.month >= 3:
if leap(date.year):
date_last_year += timedelta(days=1)
elif leap(date.year-1):
date_last_year += timedelta(days=-1)
print(date_last_year.date())
The datetime objects will you with that (https://docs.python.org/3/library/datetime.html).
Indeed, you can compute the time lap between your date and the beginning of the year. Then you can add this difference to the beginning of the next year.
import datetime
date_string = "2022-03-09"
date_object = datetime.datetime.strptime(date_string, "%Y-%m-%d").date()
beginning_of_year = datetime.date(date_object.year, 1, 1)
time_difference = date_object - beginning_of_year
beginning_of_last_year = datetime.date(date_object.year - 1, 1, 1)
same_day_last_year = beginning_of_last_year + time_difference
print(same_day_last_year)
However, the result is disappointing, since it is in fact the same date...
I think you can do something like
from datetime import timedelta, date
year = timedelta(days=365)
today = date.fromisoformat("2022-03-09")
print(today - year)

How to add business days in date excluding holidays

I have a dataframe (df) with start_date column's and add_days column's (=10). I want to create target_date (=start_date + add_days) excluding week-end and holidays (holidays as dataframe).
I do some research and I try this.
from datetime import date, timedelta
import datetime as dt
df["star_date"] = pd.to_datetime(df["star_date"])
Holidays['Date_holi'] = pd.to_datetime(Holidays['Date_holi'])
def date_by_adding_business_days(from_date, add_days, holidays):
business_days_to_add = add_days
current_date = from_date
while business_days_to_add > 0:
current_date += datetime.timedelta(days=1)
weekday = current_date.weekday()
if weekday >= 5: # sunday = 6
continue
if current_date in holidays:
continue
business_days_to_add -= 1
return current_date
#demo:
base["Target_date"]=date_by_adding_business_days(df["start_date"], 10, Holidays['Date_holi'])
but i get this error:
AttributeError: 'Series' object has no attribute 'weekday'
Thanks you for your help.
The comments by ALollz are very valid; customizing your date during creation to only keep what is defined as business day for your problem would be optimal.
However, I assume that you cannot define the business day beforehand and that you need to solve the problem with the data frame constructed as is.
Here is one possible solution:
import pandas as pd
import numpy as np
from datetime import timedelta
# Goal is to offset a start date by N business days (weekday + not a holiday)
# Here we fake the dataset as it was not provided
num_row = 1000
df = pd.DataFrame()
df['start_date'] = pd.date_range(start='1/1/1979', periods=num_row, freq='D')
df['add_days'] = pd.Series([10]*num_row)
# Define what is a week day
week_day = [0,1,2,3,4] # Monday to Friday
# Define what is a holiday with month and day without year (you can add more)
holidays = ['10-30','12-24']
def add_days_to_business_day(df, week_day, holidays, increment=10):
'''
modify the dataframe to increment only the days that are part of a weekday
and not part of a pre-defined holiday
>>> add_days_to_business_day(df, [0,1,2,3,4], ['10-31','12-31'])
this will increment by 10 the days from Monday to Friday excluding Halloween and new year-eve
'''
# Increment everything that is in a business day
df.loc[df['start_date'].dt.dayofweek.isin(week_day),'target_date'] = df['start_date'] + timedelta(days=increment)
# Remove every increment done on a holiday
df.loc[df['start_date'].dt.strftime('%m-%d').isin(holidays), 'target_date'] = np.datetime64('NaT')
add_days_to_business_day(df, week_day, holidays)
df
To Note: I'm not using the 'add_days' column since its just a repeated value. I am instead using a parameter for my function increment which will increment by N number of days (with a default of N = 10).
Hope it helps!

How do I divide a date range into months in Python?

I have the following date range:
begin: 2018-02-15
end: 2018-04-23
I want to achieve the following:
["2018-02-15 - 2018-02-28", "2018-03-01 - 2018-03-31", "2018-04-01 - 2018-04-23"]
Essentially, I want to divide a given date range into months. I can't think of a way to accomplish this in Python.
I have considered the solution here, however, this splits the date range based on a specified interval. I want to be able to split a date range dynamically.
Hence, given a date range from 15 February 2018 to 23 April 2018, I want to be able to get the individual months in the range, like so:
15 February 2018 to 28 February 2018
01 March 2018 to 31 March 2018
01 April 2018 to 23 April 2018
In a loop; starting at the first day continually add one day till you get to the end date; whenever the month changes save the dates.
import datetime
begin = '2018-02-15'
end = '2018-04-23'
dt_start = datetime.datetime.strptime(begin, '%Y-%m-%d')
dt_end = datetime.datetime.strptime(end, '%Y-%m-%d')
one_day = datetime.timedelta(1)
start_dates = [dt_start]
end_dates = []
today = dt_start
while today <= dt_end:
#print(today)
tomorrow = today + one_day
if tomorrow.month != today.month:
start_dates.append(tomorrow)
end_dates.append(today)
today = tomorrow
end_dates.append(dt_end)
out_fmt = '%d %B %Y'
for start, end in zip(start_dates,end_dates):
print('{} to {}'.format(start.strftime(out_fmt), end.strftime(out_fmt)))
Result:
>>>
15 February 2018 to 28 February 2018
01 March 2018 to 31 March 2018
01 April 2018 to 23 April 2018
>>>
You could probably figure out a way to get a range of months between the start and end dates; create a datetime object for the first day of each of those months store them and the days just prior to them. Dates spanning a change of year might be problematic though.
To work with convenient date objects, always use the standard module datetime. This wraps your string formatted dates, and allows easier calculations as well as tailored output formatting.
Unfortunately, it seems to miss one important piece of information: the last day of each month, given a year (which is necessary for Februari). There is an additional module calendar which returns the last day for a month, but since this is all you need of it and there is a simple datetime based function that does the same thing, I chose the latter.
With that, you can set any begin date and append it to your list, together with its last day of that month, then set begin to the next month's 1st and continue until you pass end.
A caveat/finetuning: I realized it would not work if both begin and end fall inside the same month. That needs an interim check, so I changed my initial while begin < end to while True and moved the check for crossing the end date into a separate line.
Also, to cross a year needs a separate test again, because else the statement month+1 will fail on December.
import datetime
# borrowed from https://stackoverflow.com/a/13565185
# as noted there, the calendar module has a function of its own
def last_day_of_month(any_day):
next_month = any_day.replace(day=28) + datetime.timedelta(days=4) # this will never fail
return next_month - datetime.timedelta(days=next_month.day)
begin = "2018-02-15"
end = "2018-04-23"
def monthlist(begin,end):
begin = datetime.datetime.strptime(begin, "%Y-%m-%d")
end = datetime.datetime.strptime(end, "%Y-%m-%d")
result = []
while True:
if begin.month == 12:
next_month = begin.replace(year=begin.year+1,month=1, day=1)
else:
next_month = begin.replace(month=begin.month+1, day=1)
if next_month > end:
break
result.append ([begin.strftime("%Y-%m-%d"),last_day_of_month(begin).strftime("%Y-%m-%d")])
begin = next_month
result.append ([begin.strftime("%Y-%m-%d"),end.strftime("%Y-%m-%d")])
return result
date_list = monthlist(begin,end)
print (date_list)
results in
[ ['2018-02-15', '2018-02-28'],
['2018-03-01', '2018-03-31'],
['2018-04-01', '2018-04-23'] ]
(slightly formatted for readability only)
If you don't mind using pandas, there's a nice helper date_range that will achieve what you want:
import pandas as pd
start = pd.Timestamp('20180215')
end = pd.Timestamp('20180423')
parts = list(pd.date_range(start, end, freq='M'))
# parts = [Timestamp('2018-02-28 00:00:00', freq='M'), Timestamp('2018-03-31 00:00:00', freq='M')]
if start != parts[0]:
parts.insert(0, start)
if end != parts[-1]:
parts.append(end)
parts[0] -= pd.Timedelta('1d') # we add back one day later
pairs = zip(map(lambda d: d + pd.Timedelta('1d'), parts[:-1]), parts[1:])
pairs_str = list(map(lambda t: t[0].strftime('%Y-%m-%d') + ' - ' + t[1].strftime('%Y-%m-%d'), pairs))
# pairs_str = ['2018-02-15 - 2018-02-28', '2018-03-01 - 2018-03-31', '2018-04-01 - 2018-04-23']
Using python calendar and accounting for change of the year
import calendar
from datetime import datetime
begin = '2018-02-15'
end= '2018-04-23'
begin_year, begin_month, begin_date = [int(i) for i in begin.split("-")]
end_year, end_month, end_date = [int(i) for i in end.split("-")]
years = end_year - begin_year
# if date range contains more than single year, we calculate total months
if years:
months = (12 - begin_month) + end_month + (12 * (years - 1))
else:
months = end_month - begin_month
dates = []
month = begin_month
year = begin_year
def create_datetime_object(y, m, d):
return datetime.strptime('{}-{}-{}'.format(y, m, d), '%Y-%m-%d')
# append the first date
dates.append(create_datetime_object(begin_year, begin_month, begin_date))
for i in range(months+1):
days_in_month = calendar.monthrange(year, month)[-1]
if month == begin_month and year == begin_year:
dates.append(create_datetime_object(begin_year, begin_month, days_in_month))
elif month == end_month and year == end_year:
dates.append(create_datetime_object(end_year, end_month, 1))
else:
dates.append(create_datetime_object(year, month, 1))
dates.append(create_datetime_object(year, month, days_in_month))
if month == 12:
month = 0
year += 1
month += 1
# append the last date
dates.append(create_datetime_object(end_year, end_month, end_date))
And to get a list in the question, we could do something like -
dates = [datetime.strftime(dt, '%Y-%m-%d') for dt in dates]
I had to do a similar manipulation and ended up building this function. I tested it on different use cases (different years, same month...) and it's working well.
It is inspired from S.Lott answer here
Creating a range of dates in Python
import datetime
def get_segments(start_date, end_date):
"""
Divides input date range into associated months periods
Example:
Input: start_date = 2018-02-15
end_date = 2018-04-23
Output:
["2018-02-15 - 2018-02-28",
"2018-03-01 - 2018-03-31",
"2018-04-01 - 2018-04-23"]
"""
curr_date = start_date
curr_month = start_date.strftime("%m")
segments = []
loop = (curr_date!=end_date)
days_increment = 1
while loop:
# Get incremented date with 1 day
curr_date = start_date + datetime.timedelta(days=days_increment)
# Get associated month
prev_month = curr_month
curr_month = curr_date.strftime("%m")
# Add to segments if new month
if prev_month!=curr_month:
# get start of segment
if not segments:
start_segment = start_date
else:
start_segment = segments[-1][1] + datetime.timedelta(days=1)
# get end of segment
end_segment = curr_date - datetime.timedelta(days=1)
# define and add segment
segment = [start_segment, end_segment]
segments.append(segment)
# stop if last day reached
loop = (curr_date!=end_date)
# increment added days
days_increment += 1
if not segments or segments[-1][1]!=end_date:
if not segments:
start_last_segment = start_date
else:
start_last_segment = segments[-1][1] + datetime.timedelta(days=1)
last_segment = [start_last_segment, end_date]
segments.append(last_segment)
for i in range(len(segments)):
segments[i][0] = segments[i][0].strftime("%Y-%m-%d")
segments[i][1] = segments[i][1].strftime("%Y-%m-%d")
return segments
Here is an example:
start_date = datetime.datetime(2020, 5, 27)
end_date = datetime.datetime(2021, 3, 1)
segments = get_segments(start_date, end_date)
for seg in segments:
print(seg)
Output:
['2020-05-27', '2020-05-31']
['2020-06-01', '2020-06-30']
['2020-07-01', '2020-07-31']
['2020-08-01', '2020-08-31']
['2020-09-01', '2020-09-30']
['2020-10-01', '2020-10-31']
['2020-11-01', '2020-11-30']
['2020-12-01', '2020-12-31']
['2021-01-01', '2021-01-31']
['2021-02-01', '2021-02-28']
['2021-03-01', '2021-03-01']
I extend the solution by #wwii
Now you will not have duplicate start and/or end dates
def date_range_split_monthly(begin, end):
dt_start = datetime.strptime(begin, '%Y-%m-%d')
dt_end = datetime.strptime(end, '%Y-%m-%d')
one_day = timedelta(1)
start_dates = [dt_start]
end_dates = []
today = dt_start
while today <= dt_end:
# print(today)
tomorrow = today + one_day
if tomorrow.month != today.month:
if tomorrow <= dt_end:
start_dates.append(tomorrow)
end_dates.append(today)
today = tomorrow
end_dates.append(dt_end)
return start_dates, end_dates
For people using Pendulum :
import pendulum
start = pendulum.now().subtract(months=6)
end = pendulum.today()
period = pendulum.period(start, end)
time_ranges = list(period.range("months"))
arr = []
for index, dt in enumerate(time_ranges):
if index < len(time_ranges) - 1:
start_range = time_ranges[index].format("YYYY-MM-D")
end_range = time_ranges[index + 1].format("YYYY-MM-D")
litt = F"{start_range} - {end_range}"
print(litt)
arr.append(litt)
print(arr)
More about period here
i'm quoting the comment of Kiran Subbaraman, just with addition of the exact keyword (otherwise, whole months will be returned even if ranges fall beyond the start or the end).
#!pip install arrow
from arrow import Arrow
Arrow.span_range('month', start, end, exact=True)

Categories