How to filter two datetime indices? - python

I have two datetime indices - one being a date_range of business days and the other being a list of holidays.
I filter the holiday list by a start and end date. But now I need to join them and drop any duplicates (holidays and trading days both exist).
Finally I need to convert the daterange into a list of formatted strings ie: yyyy_mm_dd that I can iterate through later.
Here is my code so far:
import datetime
import pandas as pd
from pandas.tseries.holiday import AbstractHolidayCalendar, Holiday, nearest_workday, \
USMartinLutherKingJr, USPresidentsDay, GoodFriday, USMemorialDay, \
USLaborDay, USThanksgivingDay
class USTradingCalendar(AbstractHolidayCalendar):
rules = [
Holiday('NewYearsDay', month=1, day=1, observance=nearest_workday),
USMartinLutherKingJr,
USPresidentsDay,
GoodFriday,
USMemorialDay,
Holiday('USIndependenceDay', month=7, day=4, observance=nearest_workday),
USLaborDay,
USThanksgivingDay,
Holiday('Christmas', month=12, day=25, observance=nearest_workday)
]
def get_trading_close_holidays(year):
inst = USTradingCalendar()
return inst.holidays(datetime.datetime(year-1, 12, 31),
datetime.datetime(year, 12, 31))
start_date = "2017_07_01"
end_date = "2017_08_31"
start_date = datetime.datetime.strptime(start_date,"%Y_%m_%d").date()
end_date = datetime.datetime.strptime(end_date,"%Y_%m_%d").date()
date_range = pd.bdate_range(start = start_date, end = end_date, name =
"trading_days")
holidays = get_trading_close_holidays(start_date.year)
holidays = holidays.where((holidays.date > start_date) &
(holidays.date < end_date))
holidays = holidays.dropna(how = 'any')
date_range = date_range.where(~(date_range.trading_days.isin(holidays)))

Consider filtering by boolean condition:
date_range = date_range[date_range.date != holidays.date]
print(date_range) # ONE HOLIDAY 2017-07-04 DOES NOT APPEAR
# DatetimeIndex(['2017-07-03', '2017-07-05', '2017-07-06', '2017-07-07',
# '2017-07-10', '2017-07-11', '2017-07-12', '2017-07-13',
# '2017-07-14', '2017-07-17', '2017-07-18', '2017-07-19',
# '2017-07-20', '2017-07-21', '2017-07-24', '2017-07-25',
# '2017-07-26', '2017-07-27', '2017-07-28', '2017-07-31',
# '2017-08-01', '2017-08-02', '2017-08-03', '2017-08-04',
# '2017-08-07', '2017-08-08', '2017-08-09', '2017-08-10',
# '2017-08-11', '2017-08-14', '2017-08-15', '2017-08-16',
# '2017-08-17', '2017-08-18', '2017-08-21', '2017-08-22',
# '2017-08-23', '2017-08-24', '2017-08-25', '2017-08-28',
# '2017-08-29', '2017-08-30', '2017-08-31'],
# dtype='datetime64[ns]', name='trading_days', freq=None)
And using astype() to convert the datetime index to string type array, even tostring() for list conversion:
strdates = date_range.date.astype('str').tolist()
print(strdates)
# ['2017-07-03', '2017-07-05', '2017-07-06', '2017-07-07', '2017-07-10',
# '2017-07-11', '2017-07-12', '2017-07-13', '2017-07-14', '2017-07-17',
# '2017-07-18', '2017-07-19', '2017-07-20', '2017-07-21', '2017-07-24',
# '2017-07-25', '2017-07-26', '2017-07-27', '2017-07-28', '2017-07-31',
# '2017-08-01', '2017-08-02', '2017-08-03', '2017-08-04', '2017-08-07',
# '2017-08-08', '2017-08-09', '2017-08-10', '2017-08-11', '2017-08-14',
# '2017-08-15', '2017-08-16', '2017-08-17', '2017-08-18', '2017-08-21',
# '2017-08-22', '2017-08-23', '2017-08-24', '2017-08-25', '2017-08-28',
# '2017-08-29', '2017-08-30', '2017-08-31']

Related

pandas Calendar issue

This code for CustomBusinessDay() works fine:
from datetime import datetime
from pandas.tseries.offsets import CustomBusinessDay
runday = datetime(2021,12,30).date()
nextday = (runday + CustomBusinessDay()).date()
output 1:
In [26]: nextday
Out[26]: datetime.date(2021, 12, 31)
However, when adding an optional calendar as in date functionality , it produces the next business day even though today's date (Dec 31, 2021) is not a holiday according to a specified calendar below:
from pandas.tseries.holiday import AbstractHolidayCalendar, Holiday, \
USMemorialDay, USMartinLutherKingJr, USPresidentsDay, GoodFriday, \
USLaborDay, USThanksgivingDay, nearest_workday
class NYSECalendar(AbstractHolidayCalendar):
''' NYSE holiday calendar via pandas '''
rules = [
Holiday('New Years Day', month=1, day=1, observance=nearest_workday),
USMartinLutherKingJr,
USPresidentsDay,
GoodFriday,
USMemorialDay,
Holiday('USIndependenceDay', month=7, day=4, observance=nearest_workday),
USLaborDay,
USThanksgivingDay,
Holiday('Christmas', month=12, day=25, observance=nearest_workday),
]
nextday = (runday + CustomBusinessDay(calendar=NYSECalendar())).date()
output2:
In [27]: nextday
Out[27]: datetime.date(2022, 1, 3)
This line is the location of your problem:
Holiday('New Years Day', month=1, day=1, observance=nearest_workday)
If you take a look at the source code, nearest_workday means that the holiday is observed on a Friday if it falls on a Saturday, and on a Monday if the holiday falls on a Sunday. Since New Year's Day 2022 falls on a Saturday, it is observed today (12/31/2021) according to your calendar.
Removing the observance parameter will lead to an output of 2021-12-31.

Select specific dates from a data frame in python using MODIS data in NETCDF4

I am not well-versed in python, and I'm sure there is a simple solution to this (although, I have looked). I got this code from an lpdaac tutorial.
My input is a NETCDF4 file downloaded from MODIS satellite. Printing the metadata of the file returns the variables
file_in = Dataset(file_list[0], 'r', format = 'NETCDF4')
#print metadata
list(file_in.variables)
Out[19]: ['crs', 'time', 'lat', 'lon', '_1_km_16_days_EVI', '_1_km_16_days_VI_Quality']
I want to convert the time variable to date format, and then only select 1 date from each year. Here is the code to convert to date format:
from netCDF4 import num2date
times = file_in.variables["time"] #import time variables
dates = num2date(times[:], times.units) #get the time info
dates = [date.strftime("%Y-%m-%d") for date in dates] #get the list of datetime
print(dates)
The dates are as follows:
['2000-06-25', '2000-07-11', '2000-07-27', '2000-08-12', '2000-08-28', '2000-09-13', '2000-09-29', '2000-10-15', '2000-10-31', '2000-11-16', '2000-12-02', '2000-12-18', '2001-01-01', '2001-01-17', '2001-02-02', '2001-02-18', '2001-03-06', '2001-03-22', '2001-04-07', '2001-04-23', '2001-05-09', '2001-05-25', '2001-06-10', '2001-06-26', '2001-07-12', '2001-07-28', '2001-08-13', '2001-08-29', '2001-09-14', '2001-09-30', '2001-10-16', '2001-11-01', '2001-11-17', '2001-12-03', '2001-12-19', '2002-01-01', '2002-01-17', '2002-02-02', '2002-02-18', '2002-03-06', '2002-03-22', '2002-04-07', '2002-04-23', '2002-05-09', '2002-05-25', '2002-06-10', '2002-06-26', '2002-07-12', '2002-07-28', '2002-08-13', '2002-08-29', '2002-09-14', '2002-09-30', '2002-10-16', '2002-11-01', '2002-11-17', '2002-12-03', '2002-12-19', '2003-01-01', '2003-01-17', '2003-02-02', '2003-02-18', '2003-03-06', '2003-03-22', '2003-04-07', '2003-04-23', '2003-05-09', '2003-05-25', '2003-06-10', '2003-06-26', '2003-07-12', '2003-07-28', '2003-08-13', '2003-08-29', '2003-09-14', '2003-09-30', '2003-10-16', '2003-11-01', '2003-11-17', '2003-12-03', '2003-12-19', '2004-01-01', '2004-01-17', '2004-02-02', '2004-02-18', '2004-03-05', '2004-03-21', '2004-04-06', '2004-04-22', '2004-05-08', '2004-05-24', '2004-06-09', '2004-06-25', '2004-07-11', '2004-07-27', '2004-08-12', '2004-08-28', '2004-09-13', '2004-09-29', '2004-10-15', '2004-10-31', '2004-11-16', '2004-12-02', '2004-12-18', '2005-01-01', '2005-01-17', '2005-02-02', '2005-02-18', '2005-03-06', '2005-03-22', '2005-04-07', '2005-04-23', '2005-05-09', '2005-05-25', '2005-06-10', '2005-06-26', '2005-07-12', '2005-07-28', '2005-08-13', '2005-08-29', '2005-09-14', '2005-09-30', '2005-10-16', '2005-11-01', '2005-11-17', '2005-12-03', '2005-12-19', '2006-01-01', '2006-01-17', '2006-02-02', '2006-02-18', '2006-03-06', '2006-03-22', '2006-04-07', '2006-04-23', '2006-05-09', '2006-05-25', '2006-06-10', '2006-06-26', '2006-07-12', '2006-07-28', '2006-08-13', '2006-08-29', '2006-09-14', '2006-09-30', '2006-10-16', '2006-11-01', '2006-11-17', '2006-12-03', '2006-12-19', '2007-01-01', '2007-01-17', '2007-02-02', '2007-02-18', '2007-03-06', '2007-03-22', '2007-04-07', '2007-04-23', '2007-05-09', '2007-05-25', '2007-06-10', '2007-06-26', '2007-07-12', '2007-07-28', '2007-08-13', '2007-08-29', '2007-09-14', '2007-09-30', '2007-10-16', '2007-11-01', '2007-11-17', '2007-12-03', '2007-12-19', '2008-01-01', '2008-01-17', '2008-02-02', '2008-02-18', '2008-03-05', '2008-03-21', '2008-04-06', '2008-04-22', '2008-05-08', '2008-05-24', '2008-06-09', '2008-06-25', '2008-07-11', '2008-07-27', '2008-08-12', '2008-08-28', '2008-09-13', '2008-09-29', '2008-10-15', '2008-10-31', '2008-11-16', '2008-12-02', '2008-12-18', '2009-01-01', '2009-01-17', '2009-02-02', '2009-02-18', '2009-03-06', '2009-03-22', '2009-04-07', '2009-04-23', '2009-05-09', '2009-05-25', '2009-06-10', '2009-06-26', '2009-07-12', '2009-07-28', '2009-08-13', '2009-08-29', '2009-09-14', '2009-09-30', '2009-10-16', '2009-11-01', '2009-11-17', '2009-12-03', '2009-12-19', '2010-01-01', '2010-01-17', '2010-02-02', '2010-02-18', '2010-03-06', '2010-03-22', '2010-04-07', '2010-04-23', '2010-05-09', '2010-05-25', '2010-06-10', '2010-06-26', '2010-07-12', '2010-07-28', '2010-08-13', '2010-08-29', '2010-09-14', '2010-09-30', '2010-10-16', '2010-11-01', '2010-11-17', '2010-12-03', '2010-12-19', '2011-01-01', '2011-01-17', '2011-02-02', '2011-02-18', '2011-03-06', '2011-03-22', '2011-04-07', '2011-04-23', '2011-05-09', '2011-05-25', '2011-06-10', '2011-06-26', '2011-07-12', '2011-07-28', '2011-08-13', '2011-08-29', '2011-09-14', '2011-09-30', '2011-10-16', '2011-11-01', '2011-11-17', '2011-12-03', '2011-12-19', '2012-01-01', '2012-01-17', '2012-02-02', '2012-02-18', '2012-03-05', '2012-03-21', '2012-04-06', '2012-04-22', '2012-05-08', '2012-05-24', '2012-06-09', '2012-06-25', '2012-07-11', '2012-07-27', '2012-08-12', '2012-08-28', '2012-09-13', '2012-09-29', '2012-10-15', '2012-10-31', '2012-11-16', '2012-12-02', '2012-12-18', '2013-01-01', '2013-01-17', '2013-02-02', '2013-02-18', '2013-03-06', '2013-03-22', '2013-04-07', '2013-04-23', '2013-05-09', '2013-05-25', '2013-06-10', '2013-06-26', '2013-07-12', '2013-07-28', '2013-08-13', '2013-08-29', '2013-09-14', '2013-09-30', '2013-10-16', '2013-11-01', '2013-11-17', '2013-12-03', '2013-12-19', '2014-01-01', '2014-01-17', '2014-02-02', '2014-02-18', '2014-03-06', '2014-03-22', '2014-04-07', '2014-04-23', '2014-05-09', '2014-05-25', '2014-06-10', '2014-06-26', '2014-07-12', '2014-07-28', '2014-08-13', '2014-08-29', '2014-09-14', '2014-09-30', '2014-10-16', '2014-11-01', '2014-11-17', '2014-12-03', '2014-12-19', '2015-01-01', '2015-01-17', '2015-02-02', '2015-02-18', '2015-03-06', '2015-03-22', '2015-04-07', '2015-04-23', '2015-05-09', '2015-05-25', '2015-06-10', '2015-06-26', '2015-07-12', '2015-07-28', '2015-08-13', '2015-08-29', '2015-09-14', '2015-09-30', '2015-10-16', '2015-11-01', '2015-11-17', '2015-12-03', '2015-12-19', '2016-01-01', '2016-01-17', '2016-02-02', '2016-02-18', '2016-03-05', '2016-03-21', '2016-04-06', '2016-04-22', '2016-05-08', '2016-05-24', '2016-06-09', '2016-06-25', '2016-07-11', '2016-07-27', '2016-08-12', '2016-08-28', '2016-09-13', '2016-09-29', '2016-10-15', '2016-10-31', '2016-11-16', '2016-12-02', '2016-12-18', '2017-01-01', '2017-01-17', '2017-02-02', '2017-02-18', '2017-03-06', '2017-03-22', '2017-04-07', '2017-04-23', '2017-05-09', '2017-05-25', '2017-06-10', '2017-06-26', '2017-07-12', '2017-07-28', '2017-08-13', '2017-08-29', '2017-09-14', '2017-09-30', '2017-10-16', '2017-11-01', '2017-11-17', '2017-12-03', '2017-12-19', '2018-01-01', '2018-01-17', '2018-02-02', '2018-02-18', '2018-03-06', '2018-03-22', '2018-04-07', '2018-04-23', '2018-05-09', '2018-05-25', '2018-06-10', '2018-06-26', '2018-07-12', '2018-07-28', '2018-08-13', '2018-08-29', '2018-09-14', '2018-09-30', '2018-10-16', '2018-11-01', '2018-11-17', '2018-12-03', '2018-12-19', '2019-01-01', '2019-01-17', '2019-02-02', '2019-02-18', '2019-03-06', '2019-03-22', '2019-04-07', '2019-04-23', '2019-05-09', '2019-05-25', '2019-06-10', '2019-06-26', '2019-07-12', '2019-07-28', '2019-08-13', '2019-08-29', '2019-09-14', '2019-09-30', '2019-10-16', '2019-11-01', '2019-11-17', '2019-12-03', '2019-12-19', '2020-01-01', '2020-01-17', '2020-02-02', '2020-02-18', '2020-03-05', '2020-03-21', '2020-04-06', '2020-04-22', '2020-05-08', '2020-05-24', '2020-06-09', '2020-06-25', '2020-07-11', '2020-07-27']
And these are the dates I want in the data frame:
dates = ['2000-07-11', '2001-07-12', '2002-07-12', '2003-07-12', '2004-07-11',
'2005-07-12', '2006-07-12', '2007-07-12', '2008-07-11', '2009-07-12',
'2010-07-12', '2011-07-12', '2012-07-11', '2013-07-12', '2014-07-12',
'2015-07-12', '2016-07-11', '2017-07-12', '2018-07-12', '2019-07-12',
'2020-07-11']
I tried just defining a new dates data frame, but I think that caused problems for me later in the code, so I would like to just subset the first dates data frame if there is an easy way to do it.
Thank you for your help

Transform data to growth rates in Python

I have two variables and I want to express one of them (monetary_base) in terms of monthly growth.
How can I do that?. In the R language you should first transform the data into time series, in Python is this also the case?
#LLamando a las series que buscamos
inflacion = llamada_api('https://api.estadisticasbcra.com/inflacion_mensual_oficial')
base_monetaria = llamada_api('https://api.estadisticasbcra.com/base')
#Armando DataFrames
df = pd.DataFrame(inflacion)
df_bm = pd.DataFrame(base_monetaria)
#Renombrando columnas
df = df.rename(columns={'d':'Fecha',
'v':'IPC'})
df_bm = df_bm.rename(columns={'d':'Fecha',
'v':'base_monetaria'})
#Arreglando tipo de datos
df['Fecha']=pd.to_datetime(df['Fecha'])
df_bm['Fecha']=pd.to_datetime(df_bm['Fecha'])
#Verificando que las fechas esten en formato date
df['Fecha'].dtype
df_bm['Fecha'].dtype
#Filtrando
df_ipc = df[(df['Fecha'] > '2002-12-31')]
df_bm_filter = df_bm[(df_bm['Fecha'] > '2002-12-31')]
#Graficando
plt.figure(figsize=(14,12))
df_ipc.plot(x = 'Fecha', y = 'IPC')
plt.title('IPC-Mensual', fontdict={'fontsize':20})
plt.ylabel('IPC')
plt.xticks(rotation=45)
plt.show()
The data looks like this
Fecha base_monetaria
1748 2003-01-02 29302
1749 2003-01-03 29360
1750 2003-01-06 29524
1751 2003-01-07 29867
1752 2003-01-08 29957
... ...
5966 2020-02-18 1941302
5967 2020-02-19 1941904
5968 2020-02-20 1887975
5969 2020-02-21 1855477
5970 2020-02-26 1807042
The idea is to take the data for the last day of the month and calculate the growth rate with the data for the last day of the previous month.
You can try something like this
from pandas.tseries.offsets import MonthEnd
import pandas as pd
df = pd.DataFrame({'Fecha': ['2020-01-31', '2020-02-29', '2020-03-31', '2020-05-31', '2020-04-30', '2020-07-31', '2020-06-30', '2020-08-31', '2020-09-30', '2020-10-31', '2020-11-30', '2020-12-31'],
'price': ['32132', '54321', '3213121', '432123', '32132', '54321', '32132', '54321', '3213121', '432123', '32132', '54321']})
df['Fecha'] = df['Fecha'].astype('datetime64[ns]')
df['is_month_end'] = df['Fecha'].dt.is_month_end
df = df[df['is_month_end'] == True]
df.sort_values('Fecha',inplace=True)
df.reset_index(drop=True, inplace = True)
def change(x,y):
try:
index = df[df['Fecha']==y].index.item()
last = df.loc[index-1][1]
return float(x)/float(last)
except:
return 0
df['new_column'] = df.apply(lambda row: change(row['price'],row['Fecha']), axis=1)
df.head(12)
Assuming the base_moetaria is a monthly cumulative value then
df = pd.DataFrame({'Fecha': ['2020-01-31', '2020-02-29', '2020-03-31', '2020-05-31', '2020-04-30', '2020-07-31', '2020-06-30', '2020-08-31', '2020-09-30', '2020-10-31', '2020-11-30', '2020-12-31'],
'price': [32132, 54321, 3213121, 432123, 32132, 54321, 32132, 54321, 3213121, 432123, 32132, 54321]})
df['Fecha'] = pd.to_datetime(df['Fecha'])
df.set_index('Fecha', inplace=True)
new_df = df.groupby(pd.Grouper(freq="M")).tail(1).reset_index()
new_df['rate'] = (new_df['price'] -new_df['price'].shift(1))/new_df['price'].shift(1)
The new_df['rate'] will give you the growth rate the way you explained in the comment below
The problem can be solve creating a column with the lag values of base_monetaria
df_bm_filter['is_month_end'] = df_bm_filter['Fecha'].dt.is_month_end
df_last_date = df_bm_filter[df_bm_filter['is_month_end'] == True]
df_last_date['base_monetaria_lag'] = df_last_date['base_monetaria'].shift(1)
df_last_date['bm_growth'] = (df_last_date['base_monetaria'] - df_last_date['base_monetaria_lag']) / df_last_date['base_monetaria_lag']

python last working day of month (with CustomBusinessDay)?

I like to calculate last working day before or after a specific date(includes holidays, not just weekends)?
import datetime as dt
from pandas.tseries.holiday import AbstractHolidayCalendar, Holiday, nearest_workday, \
USMartinLutherKingJr, USPresidentsDay, GoodFriday, USMemorialDay, \
USLaborDay, USThanksgivingDay
class USTradingCalendar(AbstractHolidayCalendar):
rules = [
Holiday('NewYearsDay', month=1, day=1, observance=nearest_workday),
USMartinLutherKingJr,
USPresidentsDay,
GoodFriday,
USMemorialDay,
Holiday('USIndependenceDay', month=7, day=4, observance=nearest_workday),
USLaborDay,
USThanksgivingDay,
Holiday('Christmas', month=12, day=25, observance=nearest_workday)
]
def get_trading_close_holidays(fromyear, toyear):
inst = USTradingCalendar()
return inst.holidays(dt.datetime(fromyear-1, 12, 31), dt.datetime(toyear, 12, 31))
print(get_trading_close_holidays(2018,2018))
>> DatetimeIndex(['2018-01-01', '2018-01-15', '2018-02-19', '2018-03-30', '2018-05-28', '2018-07-04', '2018-09-03', '2018-11-22', '2018-12-25'], dtype='datetime64[ns]', freq=None)
import datetime as dt
from pandas.tseries.holiday import USFederalHolidayCalendar
bday_us = CustomBusinessDay(calendar=get_trading_close_holidays(2000,2050))
d = dt.datetime(2018, 3, 31)
d - bday_us
>> Timestamp('2018-03-30 00:00:00')
This falls on Good Friday, that holiday(as shown)... should show 1 day before = 2018-03-29...
What's the issue?
I was able to reproduce the problem and after some testing I've narrowed it down to using a DatetimeIndex as the input of the calendar parameter in CustomBusinessDay.
You can skip that and use the calendar instance directly:
import datetime as dt
import pandas as pd
from pandas.tseries.holiday import AbstractHolidayCalendar, Holiday, nearest_workday, \
USMartinLutherKingJr, USPresidentsDay, GoodFriday, USMemorialDay, \
USLaborDay, USThanksgivingDay
from pandas.tseries.offsets import CustomBusinessDay, BDay
class USTradingCalendar(AbstractHolidayCalendar):
rules = [
Holiday('NewYearsDay', month=1, day=1, observance=nearest_workday),
USMartinLutherKingJr,
USPresidentsDay,
GoodFriday,
USMemorialDay,
Holiday('USIndependenceDay', month=7, day=4, observance=nearest_workday),
USLaborDay,
USThanksgivingDay,
Holiday('Christmas', month=12, day=25, observance=nearest_workday)
]
bday_us = CustomBusinessDay(calendar=USTradingCalendar())
d = dt.datetime(2018, 3, 31)
c = d - bday_us
print(c)
The output:
2018-03-29 00:00:00

Changing pandas class variables in python

I'm making use of the pandas.tseries.holiday module and have had an issue with the AbstractHolidayCalendar class within this module. The use is to obtain a calendar that accounts for all bank holidays/weekends in the UK when rolling forward business days.
My existing code (taken from another user) is:
class EnglandAndWalesHolidayCalendar(AbstractHolidayCalendar):
rules = [Holiday('New Years Day', month=1, day=1, observance=next_monday),
GoodFriday,
EasterMonday,
Holiday('Early May bank holiday',
month=5, day=1, offset=DateOffset(weekday=MO(1))),
Holiday('Spring bank holiday',
month=5, day=31, offset=DateOffset(weekday=MO(-1))),
Holiday('Summer bank holiday',
month=8, day=31, offset=DateOffset(weekday=MO(-1))),
Holiday('Christmas Day', month=12, day=25, observance=next_monday),
Holiday('Boxing Day',
month=12, day=26, observance=next_monday_or_tuesday)]
In the pandas module mentioned, I see the following:
class AbstractHolidayCalendar(object):
"""
Abstract interface to create holidays following certain rules.
"""
__metaclass__ = HolidayCalendarMetaClass
rules = []
start_date = Timestamp(datetime(1970, 1, 1))
end_date = Timestamp(datetime(2030, 12, 31))
_cache = None
However when adding
end_date = Timestamp(datetime(2080, 12, 31))
To my defined class it doesn't seem to work. Does anyone know how to adjust the end date without directly changing the pandas module?
Thanks

Categories