How to replace timestamp across the columns using pandas - python

df = pd.DataFrame({
'subject_id':[1,1,2,2],
'time_1':['2173/04/11 12:35:00','2173/04/12 12:50:00','2173/04/11 12:59:00','2173/04/12 13:14:00'],
'time_2':['2173/04/12 16:35:00','2173/04/13 18:50:00','2173/04/13 22:59:00','2173/04/21 17:14:00'],
'val' :[5,5,40,40],
'iid' :[12,12,12,12]
})
df['time_1'] = pd.to_datetime(df['time_1'])
df['time_2'] = pd.to_datetime(df['time_2'])
df['day'] = df['time_1'].dt.day
Currently my dataframe looks like as shown below
I would like to replace the timestamp in time_1 column to 00:00:00 and time_2 column to 23:59:00
This is what I tried but it doesn't work
df.groupby(df['subject_id'])['time_1'].apply(lambda x: pd.datetime.strftime(x, "%H:%M:%S") == "00:00:00") #approach 1
df.groupby(df['subject_id'])['time_1'].apply(lambda x: pd.pd.Timestamp(hour = '00', second = '00')) #approach 2
I expect my output dataframe to be like as shown below

I pandas if all datetimes have 00:00:00 times in same column then not display it.
Use Series.dt.floor or Series.str.normalize for remove times and for second add DateOffset:
df['time_1'] = pd.to_datetime(df['time_1']).dt.floor('d')
#alternative
#df['time_1'] = pd.to_datetime(df['time_1']).dt.normalize()
df['time_2']=pd.to_datetime(df['time_2']).dt.floor('d') + pd.DateOffset(hours=23, minutes=59)
df['day'] = df['time_1'].dt.day
print (df)
subject_id time_1 time_2 val iid day
0 1 2173-04-11 2173-04-12 23:59:00 5 12 11
1 1 2173-04-12 2173-04-13 23:59:00 5 12 12
2 2 2173-04-11 2173-04-13 23:59:00 40 12 11
3 2 2173-04-12 2173-04-21 23:59:00 40 12 12

Related

How do I specify certain date as the first week and calculate the week number in pandas?

how to convert time to week number
year_start = '2019-05-21'
year_end = '2020-02-22'
How do I get the week number based on the date that I set as first week?
For example 2019-05-21 should be Week 1 instead of 2019-01-01
If you do not have dates outside of year_start/year_end, use isocalendar().week and perform a simple subtraction with modulo:
year_start = pd.to_datetime('2019-05-21')
#year_end = pd.to_datetime('2020-02-22')
df = pd.DataFrame({'date': pd.date_range('2019-05-21', '2020-02-22', freq='30D')})
df['week'] = (df['date'].dt.isocalendar().week.astype(int)-year_start.isocalendar()[1])%52+1
Output:
date week
0 2019-05-21 1
1 2019-06-20 5
2 2019-07-20 9
3 2019-08-19 14
4 2019-09-18 18
5 2019-10-18 22
6 2019-11-17 26
7 2019-12-17 31
8 2020-01-16 35
9 2020-02-15 39
Try the following code.
import numpy as np
import pandas as pd
year_start = '2019-05-21'
year_end = '2020-02-22'
# Create a sample dataframe
df = pd.DataFrame(pd.date_range(year_start, year_end, freq='D'), columns=['date'])
# Add the week number
df['week_number'] = (((df.date.view(np.int64) - pd.to_datetime([year_start]).view(np.int64)) / (1e9 * 60 * 60 * 24) - df.date.dt.day_of_week + 7) // 7 + 1).astype(np.int64)
date
week_number
2019-05-21
1
2019-05-22
1
2019-05-23
1
2019-05-24
1
2019-05-25
1
2019-05-26
1
2019-05-27
2
2019-05-28
2
2020-02-18
40
2020-02-19
40
2020-02-20
40
2020-02-21
40
2020-02-22
40
If you just need a function to calculate week no, based on given start and end date:
import pandas as pd
import numpy as np
start_date = "2019-05-21"
end_date = "2020-02-22"
start_datetime = pd.to_datetime(start_date)
end_datetime = pd.to_datetime(end_date)
def get_week_no(date):
given_datetime = pd.to_datetime(date)
# if date in range
if start_datetime <= given_datetime <= end_datetime:
x = given_datetime - start_datetime
# adding 1 as it will return 0 for 1st week
return int(x / np.timedelta64(1, 'W')) + 1
raise ValueError(f"Date is not in range {start_date} - {end_date}")
print(get_week_no("2019-05-21"))
In the function, we are calculating week no by finding difference between given date and start date in weeks.

I want percentage change in two time column (column format is hh:mm:ss) in pandas

import pandas as pd
import numpy as np
data = {'Name':['Si','Ov','Sp','Sa','An'],
'Time1':['02:00:00', '03:02:00', '04:00:30','01:02:30','0'],
'Time2':['03:00:00', '0', '05:00:30','02:02:30','02:00:00']}
# Create DataFrame
df = pd.DataFrame(data)
# Print the output.
print (df)
Output
Name Time1 Time2
0 Siya 02:00:00 03:00:00
1 Ovi 03:02:00 0
2 Spruha 04:00:30 05:00:30
3 Saanvi 01:02:30 02:02:30
4 Ansh 0 02:00:00
want to add one more column to and apply the formula
Time3=(Time1-Time2)/Time2
There is 0 or nan value also.
Use to_timedelta for convert times to timedeltas:
t1 = pd.to_timedelta(df['Time1'])
t2 = pd.to_timedelta(df['Time2'])
df['Time3'] = t1.sub(t2).div(t2)
print (df)
Name Time1 Time2 Time3
0 Si 02:00:00 03:00:00 -0.333333
1 Ov 03:02:00 0 inf
2 Sp 04:00:30 05:00:30 -0.199667
3 Sa 01:02:30 02:02:30 -0.489796
4 An 0 02:00:00 -1.000000
EDIT:
For add new row and column use:
def format_timedelta(x):
ts = x.total_seconds()
hours, remainder = divmod(ts, 3600)
minutes, seconds = divmod(remainder, 60)
return ('{}:{:02d}:{:02d}').format(int(hours), int(minutes), int(seconds))
t1 = pd.to_timedelta(df['Time1'])
t2 = pd.to_timedelta(df['Time2'])
df['Time3'] = t1.sub(t2).div(t2)
idx = len(df)
df.loc[idx] = (pd.concat([t1, t2], axis=1)
.sum()
.apply(format_timedelta))
df.loc[idx, ['Name','Time3']] = ['Total', df['Time3'].mask(np.isinf(df['Time3'])).sum()]
print (df)
Name Time1 Time2 Time3
0 Si 02:00:00 03:00:00 -0.333333
1 Ov 03:02:00 0 inf
2 Sp 04:00:30 05:00:30 -0.199667
3 Sa 01:02:30 02:02:30 -0.489796
4 An 0 02:00:00 -1.000000
5 Total 10:05:00 12:03:00 -2.022796

Days before end of month in pandas

I would like to get the number of days before the end of the month, from a string column representing a date.
I have the following pandas dataframe :
df = pd.DataFrame({'date':['2019-11-22','2019-11-08','2019-11-30']})
df
date
0 2019-11-22
1 2019-11-08
2 2019-11-30
I would like the following output :
df
date days_end_month
0 2019-11-22 8
1 2019-11-08 22
2 2019-11-30 0
The package pd.tseries.MonthEnd with rollforward seemed a good pick, but I can't figure out how to use it to transform a whole column.
Subtract all days of month created by Series.dt.daysinmonth with days extracted by Series.dt.day:
df['date'] = pd.to_datetime(df['date'])
df['days_end_month'] = df['date'].dt.daysinmonth - df['date'].dt.day
Or use offsets.MonthEnd, subtract and convert timedeltas to days by Series.dt.days:
df['days_end_month'] = (df['date'] + pd.offsets.MonthEnd(0) - df['date']).dt.days
print (df)
date days_end_month
0 2019-11-22 8
1 2019-11-08 22
2 2019-11-30 0

how to compare two dates columns with common category in pandas?

I have a data set of of the same category. I want to compare the two date columns of the same category
I want to see if DATE1 less than in values in DATE2 of the same CATEGORY and find the earliest DATE it is greater than
I'm trying this but i'm not getting the results that I am looking for
df['test'] = np.where(m['DATE1'] < df['DATE2'], Y, N)
CATEGORY DATE1 DATE2 GREATERTHAN GREATERDATE
0 23 2015-01-18 2015-01-15 Y 2015-01-10
1 11 2015-02-18 2015-02-19 N 0
2 23 2015-03-18 2015-01-10 Y 2015-01-10
3 11 2015-04-18 2015-08-18 Y 2015-02-19
4 23 2015-05-18 2015-02-21 Y 2015-01-10
5 11 2015-06-18 2015-08-18 Y 2015-02-19
6 15 2015-07-18 2015-02-18 0 0
df['DATE1'] = pd.to_datetime(df['DATE1'])
df['DATE2'] = pd.to_datetime(df['DATE2'])
df['GREATERTHAN'] = np.where(df['DATE1'] > df['DATE2'], 'Y', 'N')
## Getting the earliest date for which data is available, per category
earliest_dates = df.groupby(['CATEGORY']).apply(lambda x: x['DATE1'].append(x['DATE2']).min()).to_frame()
## Merging to get the earliest date column per category
df.merge(earliest_dates, left_on = 'CATEGORY', right_on = earliest_dates.index, how = 'left')

Pandas add n number of new date rows to DataFrame

I want to add a number of months to the end of my dataframe.
What is the best way to append another six (or 12) months to such a dataframe using dates?
0 2013-07-31
1 2013-08-31
2 2013-09-30
3 2013-10-31
4 2013-11-30
Thanks
Edit: I think you might want pd.date_range
df = pd.DataFrame({'date':['2010-01-31', '2010-02-28'], 'x':[1,2]})
df['date'] = pd.to_datetime(df.date)
date x
0 2010-01-31 1
1 2010-02-28 2
Then
df.append(pd.DataFrame({'date': pd.date_range(start=df.date.iloc[-1], periods=6, freq='M', closed='right')}))
date x
0 2010-01-31 1.0
1 2010-02-28 2.0
0 2010-03-31 NaN
1 2010-04-30 NaN
2 2010-05-31 NaN
3 2010-06-30 NaN
4 2010-07-31 NaN
After looking into append and other loop sort of options I created this:
length = df.shape [ 0 ]
add = 12
start = df [ 'month' ].iloc [ 0 ]
count = int ( length + add )
dt = pd.date_range ( start, periods = count, freq = 'M' )
this is the dt I get. It gives the proper ending month days.
DatetimeIndex(['2013-07-31', '2013-08-31', '2013-09-30', '2013-10-31',
'2013-11-30', '2013-12-31', '2014-01-31', '2014-02-28',
'2014-03-31', '2014-04-30', '2014-05-31', '2014-06-30'],
dtype='datetime64[ns]', freq='M')
now I just have to change from the DatetimeIndex.
I hope this is good code. Cheers.

Categories