Pandas date_range starting from the end date to start date - python

In am trying to generate a range of semi-annual dates using Python. Pandas provides a function pd.date_range to help with this however I would like my date range to start from the end date and iterate backwards.
For instance given the input:
start = datetime.datetime(2016 ,2, 8)
end = datetime.datetime(2018 , 6, 1)
pd.date_range(start, end, freq='6m')
The result is:
DatetimeIndex(['2016-02-29', '2016-08-31', '2017-02-28', '2017-08-31',
'2018-02-28'])
How can I generate the following:
DatetimeIndex(['2016-02-08', '2016-06-01', '2016-12-01', '2017-06-01',
'2017-12-01', '2018-06-01'])

With the updated output (from the edit you made) you can do something like the following:
from pandas.tseries.offsets import DateOffset
end = datetime.datetime(2018 , 6, 1)
start = datetime.datetime(2016 ,2, 8)
#Get the range of months to cover
months = (end.year - start.year)*12 + end.month - start.month
#The frequency of periods
period = 6 # in months
pd.DatetimeIndex([end - DateOffset(months=e) for e in range(0, months, period)][::-1]).insert(0, start)
This is a fairly concise solution, though I didn't compare runtimes so I'm not sure how fast it is.
Basically this is just creating the dates you need as a list, and then converting it to a datetime index.

This can be done without pandas and using datutil instead. However it is more involved than it perhaps should:
from datetime import date
import math
from dateutil.relativedelta import relativedelta
#set up key dates
start = date(2016 ,2, 8)
end = date(2018 , 6, 1)
#calculate date range and number of 6 month periods
daterange = end-start
periods = daterange.days *2//365
#calculate next date in sequence and check for year roll-over
next_date = date(start.year,math.ceil(start.month/6)*6,1)
if next_date < start: next_date = date(next_date.year+1,next_date.month,1)
#add the first two values to a list
arr = [start.isoformat(),next_date.isoformat()]
#calculate all subsequent dates using 'relativedelta'
for i in range(periods):
next_date = next_date+ relativedelta(months=+6)
arr.append(next_date.isoformat())
#display results
print(arr)

Related

Choosing a date randomly in a period?

I want to randomly choose a date from 2021/1/1 to 2021/12/31, the process might include as follows:
generate a date list from 2021/1/1 to 2021/12/31, totally 365 elements;
randomly choose a date from the list.
Thanks!
As you tagged the question pandas, here is a pandas way:
out = (pd.date_range('2021/1/1', '2021/12/31') # random dates
.strftime('%Y/%m/%d') # format as string
.to_series().sample(n=1) # select 1 random date
.squeeze() # compress output
)
variant, setting the start date and number of days
out = (pd.date_range('2021/1/1', periods=365) # random dates
.strftime('%Y/%m/%d') # format as string
.to_series().sample(n=1) # select 1 random date
.squeeze() # compress output
)
example output: '2021/10/09'
list of random dates
You can easily adapt to generate several dates:
out = (pd
.date_range('2021/1/1', periods=365)
.strftime('%Y/%m/%d').to_series()
.sample(n=10)
.to_list()
)
example output:
['2021/04/06', '2021/09/11', '2021/08/02', '2021/09/17', '2021/12/30',
'2021/10/27', '2021/03/09', '2021/02/27', '2021/11/28', '2021/01/18']
Here's another way, using random between to epoch dates:
import pandas as pd
import numpy as np
date1 = pd.Timestamp("2021/01/01")
date2 = pd.Timestamp("2021/12/31")
print(date1.timestamp())
print(date2.timestamp())
n = 3 # How many samples to take
out = pd.to_datetime(
np.random.randint(date1.timestamp(), date2.timestamp(), n), unit="s"
).normalize()
print(out)
Output:
1609459200.0
1640908800.0
DatetimeIndex(['2021-04-13', '2021-01-17', '2021-08-24'], dtype='datetime64[ns]', freq=None)
import datetime
from datetime import date, timedelta
from random import sample
start = date(2021, 1, 1)
end = date(2021, 12, 31)
dates = []
day = start
while day <= end:
dates.append(day)
day = day + datetime.timedelta(days=1)
sample(dates, 1)

Getting list of months in between two dates according to specific format

start = "Nov20"
end = "Jan21"
# Expected output:
["Nov20", "Dec20", "Jan21"]
What I've tried so far is the following but am looking for more elegant way.
from calendar import month_abbr
from time import strptime
def get_range(a, b):
start = strptime(a[:3], '%b').tm_mon
end = strptime(b[:3], '%b').tm_mon
dates = []
for m in month_abbr[start:]:
dates.append(m+a[-2:])
for mm in month_abbr[1:end + 1]:
dates.append(mm+b[-2:])
print(dates)
get_range('Nov20', 'Jan21')
Note: i don't want to use pandas as that's not logical to import such library for generating dates.
The date range may span different years so one way is to loop from the start date to end date and increment the month by 1 until end date is reached.
Try this:
from datetime import datetime
def get_range(a, b):
start = datetime.strptime(a, '%b%y')
end = datetime.strptime(b, '%b%y')
dates = []
while start <= end:
dates.append(start.strftime('%b%y'))
if start.month == 12:
start = start.replace(month=1, year=start.year+1)
else:
start = start.replace(month=start.month+1)
return dates
dates = get_range("Nov20", "Jan21")
print(dates)
Output:
['Nov20', 'Dec20', 'Jan21']
You can use timedelta to step one month (31 days) forward, but make sure you stay on the 1st of the month, otherwise the days might add up and eventually skip a month.
from datetime import datetime
from datetime import timedelta
def get_range(a, b):
start = datetime.strptime(a, '%b%y')
end = datetime.strptime(b, '%b%y')
dates = []
while start <= end:
dates.append(start.strftime('%b%y'))
start = (start + timedelta(days=31)).replace(day=1) # go to 1st of next month
return dates
dates = get_range("Jan20", "Jan21")
print(dates)

Generate a random list of n dates in the iso 8601 format within a range in Python

I want to generate a random list of dates in the iso8601 format within the range from 2019-01-01 to 2019-12-31 n times.
from datetime import date
start_date = date(2019,1,1)
end_date = date(2019,12,31)
Other threads I've looked at simply give the list of all dates within that range, but that's not what I need. I also need the dates to be in the iso8601 format. What is the best way to achieve this?
You can use random.sample to sample without replacement or random.choices to sample with replacement after generating a list of all the dates in the range.
If you don't want to store the list you could also generate N random numbers from 1 through 365, then convert those to the appropriate dates.
import random
from datetime import date, timedelta
end_date = date(2019, 12, 31)
current_date = date(2019, 1, 1)
n = 3
step = timedelta(days=1)
dates = [current_date]
while current_date != end_date:
current_date += step
dates.append(current_date)
random_dates = random.choices(dates, k=n)
print([d.isoformat() for d in random_dates])
You can do something like this
import datetime
import random
# startdate
start_date = datetime.date(2019, 1, 1)
# enddate
end_date = datetime.date(2019, 12, 31)
time_between_dates = end_date - start_date
days_between_dates = time_between_dates.days
#workload in days
random.seed(a=None)
random_number_of_days = random.randrange(days_between_dates)
random_date = start_date + datetime.timedelta(days=random_number_of_days)
print(str(random_date))
Which gave the following result when I ran it
2019-06-07
A similar question has been asked here
Python - Generate random dates to create Gantt sequenced tasks?
Most of the code is from there except the last loop
I create a dataframe with an datetimeindex with two iso8601 date values. I then resample the dataframe index to every 30Minute intervals then randomly choose 3 items from the dataframe.
df=pd.DataFrame({'timestamp':['2019-01-01T00:00:00.000Z','2019-12-31T23:59:59.300Z']})
df['timestamp']=df['timestamp'].apply(lambda timestamp: datetime.strptime(timestamp, '%Y-%m-%dT%H:%M:%S.%f%z'))
print(df['timestamp'])
df=df.set_index('timestamp')
dates = df.resample('30Min').max().dropna()
#print(dates)
random_dates = random.choices(dates.index, k=3)
print(random_dates)
output:
[Timestamp('2019-08-29 16:30:00+0000', tz='UTC', freq='30T'), Timestamp('2019-11-09 03:30:00+0000', tz='UTC', freq='30T'), Timestamp('2019-08-02 12:00:00+0000', tz='UTC', freq='30T')]

Dates/months calculation

My below working code calculates date/month ranges, but I am using the Pandas library, which I want to get rid of.
import pandas as pd
dates=pd.date_range("2019-12","2020-02",freq='MS').strftime("%Y%m%d").tolist()
#print dates : ['20191101','20191201','20200101','20200201']
df=(pd.to_datetime(dates,format="%Y%m%d") + MonthEnd(1)).strftime("%Y%m%d").tolist()
#print df : ['20191130','20191231','20200131','20200229']
How can I rewrite this code without using Pandas?
I don't want to use Pandas library as I am triggering my job through Oozie and we don't have Pandas installed on all our nodes.
Pandas offers some nice functionalities when using datetimes which the standard library datetime module does not have (like the frequency or the MonthEnd). You have to reproduce these yourself.
import datetime as DT
def next_first_of_the_month(dt):
"""return a new datetime where the month has been increased by 1 and
the day is always the first
"""
new_month = dt.month + 1
if new_month == 13:
new_year = dt.year + 1
new_month = 1
else:
new_year = dt.year
return DT.datetime(new_year, new_month, day=1)
start, stop = [DT.datetime.strptime(dd, "%Y-%m") for dd in ("2019-11", "2020-02")]
dates = [start]
cd = next_first_of_the_month(start)
while cd <= stop:
dates.append(cd)
cd = next_first_of_the_month(cd)
str_dates = [d.strftime("%Y%m%d") for d in dates]
print(str_dates)
# prints: ['20191101', '20191201', '20200101', '20200201']
end_dates = [next_first_of_the_month(d) - DT.timedelta(days=1) for d in dates]
str_end_dates = [d.strftime("%Y%m%d") for d in end_dates]
print(str_end_dates)
# prints ['20191130', '20191231', '20200131', '20200229']
I used here a function to get a datetime corresponding to the first day of the next month of the input datetime. Sadly, timedelta does not work with months, and adding 30 days of course is not feasible (not all months have 30 days).
Then a while loop to get a sequence of fist days of the month until the stop date.
And to the get the end of the month, again get the next first day of the month fo each datetime in your list and subtract a day.

How to count days belonging to a given month in the range of two Python datetimes?

I have two Python datetime and I want to count the days between those dates, counting ONLY the days belonging to the month I choose. The range might overlap multiple months/years.
Example:
If I have 2017-10-29 & 2017-11-04 and I chose to count the days in October, I get 3 (29, 30 & 31 Oct.).
I can't find a straightforward way to do this so I think I'm going to iterate over the days using datetime.timedelta(days=1), and increment a count each time the day belongs to the month I chose.
Do you know a more performant method?
I'm using Python 2.7.10 with the Django framework.
Iterating over the days would be the most straightforward way to do it. Otherwise, you would need to know how many days are in a given month and you would need different code for different scenarios:
The given month is the month of the first date
The given month is the month of the second date
The given month is between the first and the second date (if dates span more than two months)
If you want to support dates spanning more than one year then you would need the input to include month and year.
Your example fits scenario #1, which I guess you could do like this:
>>> from datetime import datetime, timedelta
>>>
>>> first_date = datetime(2017, 10, 29)
>>>
>>> first_day_of_next_month = first_date.replace(month=first_date.month + 1, day=1)
>>> last_day_of_this_month = first_day_of_next_month - timedelta(1)
>>> number_of_days_in_this_month = last_day_of_this_month.day
>>> number_of_days_in_this_month - first_date.day + 1
3
This is why I would suggest implementing it the way you originally intended and only turning to this if there's a performance concern.
You can get difference between two datetime objects by simply subtracting them.
So, we start by getting the difference between the two dates.
And then we generate all the dates between the two using
gen = (start_date + datetime.timedelta(days = e) for e in range(diff + 1))
And since we only want the dates between the specified ones, we apply a filter.
filter(lambda x : x==10 , gen)
Then we will sum them over.
And the final code is this:
diff = start_date - end_date
gen = (start_date + datetime.timedelta(days = e) for e in range(diff + 1))
filtered_dates = filter(
lambda x : x.month == 10 ,
gen
)
count = sum(1 for e in filtered_dates)
You can also use reduce but sum() is a lot more readable.
A potential method of achieving this is to first compare whether your start or end dates you are comparing have the same month that you want to choose.
For example:
start = datetime(2017, 10, 29)
end = datetime(2017, 11, 4)
We create a function to compare the dates like so:
def daysofmonth(start, end, monthsel):
if start.month == monthsel:
days = (datetime(start.year, monthsel+1, 1) - start).days
elif end.month == monthsel:
days = (end - datetime(end.year, monthsel, 1)).days
elif not (monthsel > start.month) & (end.month > monthsel):
return 0
else:
days = (datetime(start.year, monthsel+1, 1) - datetime(start.year, monthsel, 1)).days
return days
So, in our example setting monthsel gives:
>>> daysofmonth(start, end, 10)
>>> 3
Using pandas whit your dates:
import pandas as pd
from datetime import datetime
first_date = datetime(2017, 10, 29)
second_date = datetime(2017, 11, 4)
days_count = (second_date - first_date).days
month_date = first_date.strftime("%Y-%m")
values = pd.date_range(start=first_date,periods=days_count,freq='D').to_period('M').value_counts()
print(values)
print(values[month_date])
outputs
2017-10 3
2017-11 3
3

Categories