How to convert a Pandas Timestamp to UTC seconds as an int? - python

I tried to convert it over like this but it still doesn't work as intended.
ts = pd.Timestamp('2022-01-02T12')
ts_utc = ts.replace(tzinfo=timezone.utc)
x = pd.Timestamp.utcnow()
ts_delta = x - ts_utc
ts_new = ts_delta.total_seconds()
time_yesterday = ts - pd.Timedelta(days=1)
ts_y_utc = time_yesterday.replace(tzinfo=timezone.utc)
ts_y_delta = x - ts_y_utc
ts_y_new = ts_y_delta.total_seconds()

This code works and returns UTC seconds from the Pandas Timestamp.
ts = pd.Timestamp('2022-01-02T12')
timestamp_og = time.mktime(ts.timetuple())
dt = datetime.fromtimestamp(timestamp_og)
timestamp = dt.replace(tzinfo=timezone.utc).timestamp()
time_yesterday = ts - pd.Timedelta(hours=1)
timestamp2_og = time.mktime(time_yesterday.timetuple())
dt_2 = datetime.fromtimestamp(timestamp2_og)
timestamp2 = dt_2.replace(tzinfo=timezone.utc).timestamp()

Related

How can I take the last value -1 pandas

I am trying to do a function where I check if a date is in my excel file, and if unfortunately it is not. I retrieve the date before.
I succeeded with the after date and here is my code.
Only with the date before, I really can't do it.
i tried this for the day before:
def get_all_dates_between_2_dates_with_special_begin_substraction(Class, date_départ, date_de_fin, date_debut_analyse, exclus=False):
date_depart = date_départ
date_fin = date_de_fin
result_dates = []
inFile = "database/Calendar_US_Target.xlsx"
inSheetName = "Sheet1"
df =(pd.read_excel(inFile, sheet_name = inSheetName))
date_depart = datetime.datetime.strptime(date_depart, '%Y-%m-%d')
date_fin = datetime.datetime.strptime(date_fin, '%Y-%m-%d')
date_calcul_depart = datetime.datetime.strptime(date_debut_analyse, '%Y-%m-%d')
var_date_depart = date_depart
time_to_add = ""
if (Class.F0 == "mois"):
time_to_add = relativedelta(months=1)
if (Class.F0 == "trimestre"):
time_to_add = relativedelta(months=3)
if (Class.F0 == "semestre"):
time_to_add = relativedelta(months=6)
if (Class.F0 == "année"):
time_to_add = relativedelta(years=1)
while var_date_depart <= date_fin:
-------------------------------------------------------------
df['mask'] = (var_date_depart <= df['TARGETirs_holi']) # daybefore
print(df.head())
print(df[df.mask =="True"].head(1)) #want to check the last true value
------------------------------------------------------------------------------
if (result >= date_calcul_depart):
result = (str(result)[0:10])
result = result[8:10] + "/" + result[5:7] + "/" + result[0:4]
result_dates.append(str(result))
var_date_depart = var_date_depart + time_to_add
if (exclus == True):
result_dates = result_dates[1:-1]
return(result_dates)
I want to say, do a column (or a dataframe) where the first date is true where the first date smaller than the second then i take the last value who is true.
for example:
I have this array [12-05-2022,15-05-2022,16-05-2022 and 19-05-2022]
if i put 15-05-2022, it gives me 15-05-2022, but if i put 18-05-2022, its gives me 16-05-2022
Thanks!

How to replace the day in a date with another date?

I'm trying to replace the day in my if statement for my date but I keep getting this output for my year.
05/15/5 besides 05/15/2020 . Code is below:
today_date = datetime.datetime.now()
date = today_date.date()
formatted_date = datetime.date.strftime(date, "%m/%d/%Y")
mmonth = date.month
myear = date.year
mdate = date.day
if mdate < 7:
m0weekend = formatted_date.replace(str(myear),str(mmonth),1)
else:
m0weekend = formatted_date.replace(str(myear),str(mmonth),15)
it's easier to replace the day before converting to a string:
date = date.replace(day=1)
or, in your case:
if mdate < 7:
m0weekend = date.replace(day=1)
else:
m0weekend = date.replace(day=15)
formatted_date is actually a string.
You are using the str.replace() method not the datetime.date.replace() method.
import datetime
today_date = datetime.datetime.now()
pre_formatted_date = today_date.date()
mmonth = pre_formatted_date.month
myear = pre_formatted_date.year
mdate = pre_formatted_date.day
if mdate < 7:
pre_formatted_date = pre_formatted_date.replace(day=1)
else:
pre_formatted_date = pre_formatted_date.replace(day=15)
print(pre_formatted_date)
formatted_date = pre_formatted_date.strftime("%m/%d/%Y")
print(formatted_date)
Which has the following output:
2020-05-15
05/15/2020
You might get today datetime.date directly from datetime rather than creating datetime.datetime and converting to date. After you have today you might create needed datetime.date and turn it into str, i.e.:
import datetime
today = datetime.date.today()
date = datetime.date(today.year, today.month, 1 if today.day < 7 else 15)
formatted_date = datetime.date.strftime(date, "%m/%d/%Y")
print(formatted_date) # 05/15/2020

How to create a blank datetime object? So I can find the average of the time deltas

I have converted current time stamps into datetime objects and I am able to subtract them. My problem is that I need to sum up all of the deltas but I am unable to create a "blank" datetime object. For example 0000-00-00 00:00:00.000
low_sum_time_alert_fired_to_closed = 0
low_sum_time_investigate_open_to_closed = 0
low_sum_time_alert_fired_to_investigation = 0
low_sum_time_alert_fired_to_first_assignment = 0
# iterate through the row index's
for idx in range(amt_of_rows):
# Index the row of the dataframe
row = df.iloc[idx]
vendor_name = row[2]
# formatting the timestamps
insert_at = datetime.strptime(row[3], "%Y-%m-%d %H:%M:%S.%f")
first_assignment = datetime.strptime(row[4], "%Y-%m-%d %H:%M:%S.%f")
first_investigated = datetime.strptime(row[5], "%Y-%m-%d %H:%M:%S.%f")
time_closed = datetime.strptime(row[6], "%Y-%m-%d %H:%M:%S.%f")
if row[0] == "LOW":
num_low += 1
####### THIS IS WHERE THINGS BREAK SINCE I'M TRYING TO ADD THE DIFFERENCE OF THE TIME STAMPS TO AN INTEGER ###
low_sum_time_alert_fired_to_closed += time_closed - insert_at
low_sum_time_investigate_open_to_closed += time_closed - first_investigated
low_sum_time_alert_fired_to_investigation += first_investigated - insert_at
low_sum_time_alert_fired_to_first_assignment += first_assignment - insert_at
low_sum_time_alert_fired_to_closed = low_sum_time_alert_fired_to_closed/amt_of_rows
low_sum_time_investigate_open_to_closed = low_sum_time_investigate_open_to_closed/amt_of_rows
low_sum_time_alert_fired_to_investigation = low_sum_time_alert_fired_to_investigation/amt_of_rows
low_sum_time_alert_fired_to_first_assignment = low_sum_time_alert_fired_to_first_assignment/amt_of_rows
To sum all timedeltas you need to start from a blank timedelta, rather than datetime.
just start from datetime.timedelta(0).
simple example:
import datetime
td0 = datetime.timedelta(0)
td1 = datetime.timedelta(hours=1, minutes=1, seconds=1)
td2 = datetime.timedelta(hours=2, minutes=2, seconds=2)
td3 = datetime.timedelta(hours=3, minutes=3, seconds=3)
td0 += td1
td0 += td2
td0 += td3
print(td0)
Output:
6:06:06

Pandas date formatting

I have the below two functions :
def create_base_df(start_date, end_date):
base_df = pd.DataFrame({"dt": pd.date_range(start_date, end_date)})
base_df["dt_num_key"] = base_df.dt.apply(lambda x: datetime.datetime.strftime(x, "%Y%m%d")).astype(int)
base_df["cal_yr_nkey"] = base_df.dt.dt.strftime("%Y")
base_df["cal_mon_ofyr_nkey"] = base_df.dt.dt.strftime("%m")
base_df["cal_qtr_ofyr_nkey"] = base_df.dt.dt.quarter.astype(str).apply(lambda x: x.rjust(2, '0'))
base_df["cal_wk_ofyr_nkey"] = base_df.dt.dt.week.astype(str)
return base_df
def month_operations(df):
df["cal_mon_nm"] = df.dt.dt.strftime("%B")
df["cal_mon_shrt_nm"] = df.dt.dt.strftime("%b")
df["cal_yr_mon_nkey"] = df["cal_yr_nkey"] + df["cal_mon_ofyr_nkey"]
df["mon_seq_id"] = df.cal_yr_mon_nkey.sort_values().reset_index() ["cal_yr_mon_nkey"].rank(method='dense').astype(int)
df["dt_frst_dayof_mon"] = df.dt.apply(lambda x: datetime.datetime(x.year, x.month, 1))
df["dt_frst_dayof_mon_nkey"] = df["dt_frst_dayof_mon"].dt.strftime("%Y%m%d")
df["dt_lst_dayof_mon"] = df["dt_frst_dayof_mon"] + pd.tseries.offsets.DateOffset(
months=1) - pd.tseries.offsets.DateOffset(days=1)
df["dt_lst_dayof_mon_nkey"] = df["dt_lst_dayof_mon"].dt.strftime("%Y%m%d")
df["dt_frst_dayof_lst_mon"] = df["dt_frst_dayof_mon"] - pd.DateOffset(months=1)
df["dt_frst_dayof_lst_mon_nkey"] = df["dt_frst_dayof_lst_mon"].dt.strftime("%Y%m%d")
df["dt_lst_mon"] = df.dt - pd.tseries.offsets.DateOffset(months=1)
df["dt_lst_mon_nkey"] = df["dt_lst_mon"].dt.strftime("%Y%m%d")
df["dt_lst_yr_lst_mon"] = df.dt_lst_mon - pd.tseries.offsets.DateOffset(years=1)
df["dt_lst_yr_lst_mon_nkey"] = df["dt_lst_yr_lst_mon"].dt.strftime("%Y%m%d")
return df
The columns dt_lst_yr_lst_mon_nkey, dt_lst_mon_nkey and dt_frst_dayof_lst_mon_nkey are returning values in datetime format ('1899-12-01 00:00:00') and I cant seem to figure out why. All the other *key columns return integers as expected
my main looks like below:
base_df = create_base_df(start_date="01/01/1900", end_date="01/12/1900")
month_df = month_operations(base_df)
The expected output : if the value of dt_lst_yr_lst_mon is "1900-12-01 00:00:00" then dt_lst_yr_lst_mon_nkey will be "19001201"
Any pointers on where I am going wrong is appreciated.
Thanks.

I want to create a time series of monthly means in Pandas

I have a dataframe that consists of hourly data for a whole year. I want to calculate the monthly means and show them in a time series plot. I have one variable which is NO2 values.
#Cleaning data
ck_2000 = pd.read_csv('2000-CamdenKerbside.csv', header=0,skiprows=4,usecols=range(0,3),skipfooter = 1, na_values = 'No data',engine = 'python')
colnames = ['Date', 'Time', 'NO2']
ck_2000.columns = colnames
#Reformat date/time
ck_2000.Time.replace(to_replace = '24:00:00', value = '00:00:00', inplace = True)
dtw = pd.to_datetime(ck_2000.Date + ck_2000.Time,format='%d/%m/%Y%H:%M:%S')
ck_2000.index = dtw
#Index dataframe by date
firstDate = ck_2000.index[0]
lastDate = ck_2000.index[len(ck_2000.Date) - 1]
ck2000 = ck_2000.reindex(index=pd.date_range(start = firstDate, end =lastDate, freq = '1H'), fill_value= None)
#Change data type to float
ck2000['NO2'] = ck2000['NO2'].dropna().astype('int64')
#Interpolation
ck_2000_int = ck_2000.interpolate()
#df's for all months
ck_2000_jan = ck_2000_int['2000-01']
ck_2000_feb = ck_2000_int['2000-02']
ck_2000_mar = ck_2000_int['2000-03']
ck_2000_apr = ck_2000_int['2000-04']
ck_2000_may = ck_2000_int['2000-05']
ck_2000_jun = ck_2000_int['2000-06']
ck_2000_jul = ck_2000_int['2000-07']
ck_2000_aug = ck_2000_int['2000-08']
ck_2000_sept = ck_2000_int['2000-09']
ck_2000_oct = ck_2000_int['2000-10']
ck_2000_nov = ck_2000_int['2000-11']
ck_2000_dec = ck_2000_int['2000-12']
you should be able to use resample
Consider the following example
tidx = pd.date_range('2000-01-01', '2000-12-31 23:00', freq='H')
ck_2000_int = pd.DataFrame(dict(NO2=np.random.randn(len(tidx))), tidx)
ck_2000_int.resample('M').mean().plot()

Categories