Python datetime max and min using numpy showing unexpected result - python

I have a graph generating code in tkinter. Here in the x-axis I have to show merge value of Date and Time dataframe. But, x-axis doesn't show proper labels to its grid.
Here I am sharing my code as well as excel data and the graph image I'm getting:
import tkinter as tk
import os
from tkinter import filedialog
from matplotlib.figure import Figure
import pandas as pd
import numpy as np
import time
import tkinter.messagebox
from matplotlib.font_manager import FontProperties
from pathlib import Path
fontP = FontProperties()
fontP.set_size('x-small')
root= tk.Tk()
canvas1 = tk.Canvas(root, width = 800, height = 300)
canvas1.pack()
label1 = tk.Label(root, text='Data Analyser')
label1.config(font=('Arial', 20))
date1=time.strftime("%H-%M-%S,%d-%m-%Y")
def get1lineGraph ():
global df
import_file_path = filedialog.askopenfilename(parent=root)
filename=Path(import_file_path).stem
isExist = os.path.exists(filename)
if not isExist:
os.makedirs(filename)
df = pd.read_excel (import_file_path)
df['Date'] = pd.to_datetime(df['Date'])
daytime=df['Time'].astype(str) + '\n' + df['Date'].astype(str)
major_ticks_x1 = np.arange(0, len(df.index+1)+3.9, len(df.index+1)/4)
minor_ticks_x1 = np.arange(0, len(df.index+1)+3.9, len(df.index+1)/8)
if(df[clicked1.get()].max()==df[clicked1.get()].min()):
major_ticks_y11 = np.arange(0,14,2)
minor_ticks_y11 = np.arange(0,14,4)
else:
major_ticks_y11 = np.arange(df[clicked1.get()].min(), df[clicked1.get()].max()+3.9, (df[clicked1.get()].max()-df[clicked1.get()].min())/4)
minor_ticks_y11 = np.arange(df[clicked1.get()].min(), df[clicked1.get()].max()+3.9, (df[clicked1.get()].max()-df[clicked1.get()].min())/8)
x1 = daytime
y11 = df[clicked1.get()].astype(float)
figure11 = Figure(figsize=(11,4), dpi=100)
subplot11 = figure11.add_subplot(1,1,1)
t11, = subplot11.plot(x1, y11, color='blue', linestyle='solid', linewidth = 2, label=clicked1.get())
subplot11.set_ylabel(clicked1.get())
subplot11.set_xlabel('Date & Time')
subplot11.set_xticks(major_ticks_x1)
subplot11.set_xticks(minor_ticks_x1, minor=True)
subplot11.set_yticks(major_ticks_y11)
subplot11.set_yticks(minor_ticks_y11, minor=True)
subplot11.grid(which='both')
subplot11.grid(which='minor', alpha=0.2)
subplot11.grid(which='major', alpha=0.5)
subplot11.set_title(clicked1.get()+' VS Time')
subplot11.legend(loc = 'upper left', bbox_to_anchor=(1, 1), prop=fontP)
figure11.tight_layout()
figure11.savefig(filename+'/Bldg1_'+clicked1.get()+'_'+date1+'.png')
tkinter.messagebox.showinfo("Message from Bldg 1.", "Graph is saved in .png file format in your PC.",parent=root)
options1 = [
"P1",
"P2",
"P3",
"P4"
]
clicked1 = tk.StringVar()
clicked1.set( "Select Y-axis Parameter" )
drop1 = tk.OptionMenu( root , clicked1 , *options1 )
canvas1.create_window(250, 100, window=drop1)
browseButton_Excel1line = tk.Button(root, text='Generate graph with selected Y-axis', command=get1lineGraph, bg='green', fg='white', font=('helvetica', 12, 'bold'))
canvas1.create_window(500, 100, window=browseButton_Excel1line)
button1 = tk.Button (root, text='Exit from Building 1!', command=root.destroy, bg='green', fg='white', font=('helvetica', 11, 'bold'))
canvas1.create_window(400, 180, window=button1)
root.mainloop()
Here I am providing excel data for ready to use code making.
Date Time P1 P2 P3 P4
1-9-2019 9:57:00 114.33 21.07 21.01 0
1-9-2019 9:58:00 114.33 21.07 21.01 0
1-9-2019 9:59:00 6.6 4.63 21.01 0
1-9-2019 10:00:00 5.72 4.05 21.01 0
1-9-2019 10:01:00 5.28 4.05 21.01 0
1-9-2019 10:02:00 5.28 4.05 21.01 0
1-9-2019 10:03:00 5.72 4.05 21.01 0
1-9-2019 10:04:00 5.72 3.76 21.01 0
1-9-2019 10:05:00 5.72 4.05 21.01 0
1-9-2019 10:06:00 5.72 4.05 21.01 0
1-9-2019 10:07:00 5.72 4.05 21.01 0
1-9-2019 10:08:00 5.72 4.05 21.01 0
1-9-2019 10:09:00 5.72 4.05 21.01 0
1-9-2019 10:10:00 5.72 4.34 16.82 0
1-9-2019 10:11:00 5.72 4.34 16.79 0
1-9-2019 10:12:00 5.72 4.34 16.7 0
1-9-2019 10:13:00 5.72 4.34 16.79 0
Here is the graph I am getting:
As you can see y-axis working perfectly fine, but x-axis is not giving desired ticks. Last grid of x-axis should provide last daytime of the dataframe.
Like y-axis grid starts from min value and end at max value, likewise I want x-axis value also should start from first index position (which is achieved) and should end at last index position (which needs to be achieved).
Please Help. I'm trying this from last couple of days. But couldn't find the answer.

Related

Animate px.line line with plotly express

Learning plotly line animation and come across this question
My df:
Date
1Mo
2Mo
3Mo
6Mo
1Yr
2Yr
0
2023-02-12
4.66
4.77
4.79
4.89
4.50
4.19
1
2023-02-11
4.66
4.77
4.77
4.90
4.88
4.49
2
2023-02-10
4.64
4.69
4.72
4.88
4.88
4.79
3
2023-02-09
4.62
4.68
4.71
4.82
4.88
4.89
4
2023-02-08
4.60
4.61
4.72
4.83
4.89
4.89
How do I animate this dataframe so the frame has
x = [1Mo, 2Mo, 3Mo, 6Mo, 1Yr, 2Yr], and
y = the actual value on a date, eg y=df[df['Date']=="2023-02-08"], animation_frame = df['Date']?
I tried
plot = px.line(df, x=df.columns[1:], y=df['Date'], title="Treasury Yields", animation_frame=df_treasuries_yield['Date'])
No joy :(
I think the problem is you cannot pass multiple columns to the animation_frame parameter. But we can get around this by converting your df from wide to long format using pd.melt – for your data, we will want to take all of the values from [1Mo, 2Mo, 3Mo, 6Mo, 1Yr, 2Yr] and put them a new column called "value" and we will have a variable column called "variable" to tell us which column the value came from.
df_long = pd.melt(df, id_vars=['Date'], value_vars=['1Mo', '2Mo', '3Mo', '6Mo', '1Yr', '2Yr'])
This will look like the following:
Date variable value
0 2023-02-12 1Mo 4.66
1 2023-02-11 1Mo 4.66
2 2023-02-10 1Mo 4.64
3 2023-02-09 1Mo 4.62
4 2023-02-08 1Mo 4.60
...
28 2023-02-09 2Yr 4.89
29 2023-02-08 2Yr 4.89
Now can pass the argument animation_frame='Date' to px.line:
fig = px.line(df_long, x="variable", y="value", animation_frame="Date", title="Yields")

How to separate plots from subplots and plot correctly?

I am trying to plot the following sample table:
Time Period
HR
Legal
Fin
Leadership
Market
UX
CX
01/04/2021
6.39
5.81
7.53
7.16
6.78
7.25
7.40
7.47
6.20
01/07/2021
6.95
6.25
7.46
7.16
7.05
7.51
7.70
7.83
6.69
01/10/2021
7.41
6.43
7.65
7.50
7.25
7.74
8.00
8.04
6.90
01/01/2022
7.51
6.51
7.74
7.52
8.00
7.84
8.10
8.04
7.05
01/04/2022
7.70
6.91
7.86
7.59
7.69
7.81
8.13
8.47
7.30
01/07/2022
7.80
6.60
7.50
7.50
7.80
7.50
7.70
7.90
7.15
(Please note there are 11 columns but all of them cannot be included)
This is the code I am using:
import pandas as pd
from datetime import date, timedelta
import datetime
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.graphics.tsaplots import plot_pacf
from statsmodels.tsa.arima_model import ARIMA
import statsmodels.api as sm
import warnings
from plotly.offline import download_plotlyjs, init_notebook_mode, plot
from plotly.graph_objs import *
init_notebook_mode()
'set filepath'
data = pd.read_csv(inputfilepath, parse_dates=["Time Period"], index_col = "Time Period")
print(data.shape)
'Convert column from object to datetime'
#data["Time Period"] = pd.to_datetime(data['Time Period'], format="%d/%m/%Y")
data
fig, axes = plt.subplots(nrows=3, ncols=4, dpi=110, figsize=(10,6))
#axes.axis("off")
for i, ax in enumerate(axes.flatten()):
macro_data = data[data.columns[i]]
ax.plot(data, color='red', linewidth=1)
# Decorations
ax.set_title(data.columns[i], fontsize = 10)
ax.xaxis.set_ticks_position('none')
ax.yaxis.set_ticks_position('none')
ax.spines["top"].set_alpha(0)
ax.tick_params(labelsize=6)
plt.tight_layout();
I end up with 11 duplicated sub plots and each subplot has 11 lines across it. I want one line for each subplot to see each plot individually to compare. And, The plot is definitely wrong since the lines are not changing their gradients between each time period. Here is what I see:
And here is what I would (ideally want)
For reference, I am using the guide found here:
https://github.com/nachi-hebbar/Multivariate-Time-Series-Forecasting/blob/main/VAR_Model%20(1).ipynb

How can I append to to an existing Pandas Dataframe

This is what I have so far to retrieve data from polygon.io. For each sym I would like to add it to df
sym = ['OCGN', 'TKAT', 'MMAT', 'MDIA', 'PHUN']
df = pd.DataFrame()
i=0
for i in range(len(sym)):
stock = sym[i]
url = f'https://api.polygon.io/v2/aggs/ticker/{stock}/range/1/day/{fromdate}/{to}?
adjusted=true&sort=asc&limit=50000&apiKey=Demo'.format()
tick = requests.get(url)
tick =pd.json_normalize(tick.json()["results"])
daa = (tick.iloc[[-1]])
data = pd.DataFrame(daa)
df = df.append(data, ignore_index=True)
print(df)
output
v vw o c h l t
0 15426806.0 6.2736 6.31 6.03 6.66 6.030 1638334800000
1 464144.0 4.9949 5.16 4.73 5.28 4.640 1638334800000
2 8101699.0 3.5164 3.75 3.36 3.82 3.300 1638334800000
3 109407.0 5.0286 4.90 4.77 5.28 4.654 1638334800000
4 45679175.0 3.7679 3.01 3.25 3.26 2.780 1638334800000

Plotting data from different datasets using plotnine and pandas

First off, I think it would be helpful to offer some background about what I want to do. I have a time-series dataset that describes air quality in a region, with hour resolution. Each row is an observation, each column is a different parameter (eg. Temperature, Pressure, Particulate matter, etc.) I want to take an average of observations for each hour in the day, across the entire five year dataset. However, I first need to distinguish between summer and winter observations. Here are a few rows for reference:
Date Time WSA WSV WDV WSM SGT T2M T10M DELTA_T PBAR SRAD RH PM25 AQI
0 2015-01-01 00:00:00 0.9 0.2 334 3.2 70.9 29.2 29.1 -0.1 740.4 8 102.5 69.0 157.970495
1 2015-01-01 01:00:00 1.5 0.7 129 4.0 58.8 29.6 29.2 -0.4 740.2 8 102.5 23.5 74.974249
2 2015-01-01 02:00:00 0.8 0.8 70 2.7 18.0 28.7 28.3 -0.4 740.3 7 102.2 40.1 112.326633
3 2015-01-01 03:00:00 1.1 1.0 82 3.4 21.8 28.2 27.8 -0.4 740.1 6 102.0 31.1 90.957082
4 2015-01-01 04:00:00 1.0 0.8 65 4.7 34.3 27.3 27.2 -0.2 739.7 6 101.7 13.7 54.364807
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
43175 2016-12-30 19:00:00 1.7 0.7 268 4.1 63.6 33.8 34.1 0.3 738.8 8 100.7 38.4 108.140704
43176 2016-12-30 20:00:00 1.5 0.1 169 3.3 77.5 33.2 33.7 0.5 738.7 9 101.0 27.2 82.755365
43177 2016-12-30 21:00:00 1.4 0.5 278 4.0 65.7 32.5 32.8 0.3 738.6 9 101.4 42.5 118.236181
43178 2016-12-30 22:00:00 2.8 2.7 277 6.5 16.7 33.2 33.3 0.1 738.6 9 101.6 25.2 78.549356
43179 2016-12-30 23:00:00 1.9 0.3 241 4.2 74.2 31.0 31.6 0.6 738.4 9 100.4 18.7 64.879828
[43180 rows x 15 columns]
I have tried splitting the dataset into two based on season, and plotting each separately. This works, but I cannot manage to make the plot display a legend.
mask = (df['Date'].dt.month > 3) & (df['Date'].dt.month < 10)
summer = df[mask]
winter = df[~mask]
summer = summer.groupby(summer['Time'].dt.hour).mean().reset_index()
winter = winter.groupby(winter['Time'].dt.hour).mean().reset_index()
p = (
ggplot(mapping=aes( x='Time', y='PM25')) +
geom_point(data=summer, color='red')+
geom_point(data=winter, color='blue')
)
print(p)
Plotting with separate dataframes:
[1]: https://i.stack.imgur.com/W75kk.png
I did some more research, and learned that plotnine/ggplot can color-code data points based on one of their attributes. This approach requires the data to be a single dataset, so I added a parameter specifying the season. However, when I group by hour, this 'Season' attribute is removed. I assume it is because you cannot take the mean of non-numeric data. As such, I find myself in a bit of a paradox.
Here is the my attempt at keeping the data together and adding a 'Season' column:
df.insert(0,'Season', 0)
summer = (df['Date'].dt.month > 3) & (df['Date'].dt.month < 10)
df['Season'] = df.where(summer, other='w')
df['Season'] = df.where(~summer, other='s')
df = df.groupby(df['Time'].dt.hour).mean()
print(df)
p = (
ggplot(data = df, mapping=aes( x='Time', y='PM25', color='Season')) +
geom_point()
)
print(p)
When I try to run this, it raises the following, and if I inspect the dataframe all non-numeric paramters have been removed:
plotnine.exceptions.PlotnineError: "Could not evaluate the 'color' mapping: 'Season' (original error: name 'Season' is not defined)"
Any suggestions would be hugely appreciated.
Data provided has been saved to airq.csv. Besides to Season column, Hour column has been included. Code provided has been used. 'Hour' and 'Season' have to be provided in groupby function. Two plotnine.ggplot possibilities are provided. Fist using geom_point, and second one adding facet_wrap. Theme customization has been included for each case.
from plotnine import *
import pandas as pd
df = pd.read_csv('airq.csv', parse_dates=[0,1])
df.insert(0,'Season', 0)
summer = (df['Date'].dt.month > 3) & (df['Date'].dt.month < 9)
df['Season'] = df.where(summer, other='Winter')
df['Season'] = df.where(~summer, other='Summer')
df['Hour'] = df['Time'].dt.hour
df = df.groupby(['Hour', 'Season']).mean().reset_index()
custom_axis = theme(axis_text_x = element_text(color="grey", size=6, angle=90, hjust=.3),
axis_text_y = element_text(color="grey", size=6),
plot_title = element_text(size = 25, face = "bold"),
axis_title = element_text(size = 10)
)
(
ggplot(data = df, mapping = aes(x='Hour', y='PM25',
color='Season')) + geom_point() +
custom_axis + ylab("Particulate matter 2.5 micrometres") + xlab("Hour") + labs(title="PM air quality report")
)
custom_axis = theme(axis_text_x = element_text(color="grey", size=6, angle=90, hjust=.3),
axis_text_y = element_text(color="grey", size=6),
plot_title = element_text(size = 25, face = "bold"),
axis_title = element_text(size = 10),
panel_spacing_y=.4,
figure_size=(8, 4)
)
(
ggplot(data = df, mapping = aes(x='Hour', y='PM25')) + geom_point(alpha=1) + facet_wrap('Season') +
custom_axis + ylab("Particulate matter 2.5 micrometres") + xlab("Hour") + labs(title="PM air quality report")
)

How to make axis tick labels visible on the other side of the plot in gridspec?

Plotting my favourite example dataframe,which looks like this:
x val1 val2 val3
0 0.0 10.0 NaN NaN
1 0.5 10.5 NaN NaN
2 1.0 11.0 NaN NaN
3 1.5 11.5 NaN 11.60
4 2.0 12.0 NaN 12.08
5 2.5 12.5 12.2 12.56
6 3.0 13.0 19.8 13.04
7 3.5 13.5 13.3 13.52
8 4.0 14.0 19.8 14.00
9 4.5 14.5 14.4 14.48
10 5.0 NaN 19.8 14.96
11 5.5 15.5 15.5 15.44
12 6.0 16.0 19.8 15.92
13 6.5 16.5 16.6 16.40
14 7.0 17.0 19.8 18.00
15 7.5 17.5 17.7 NaN
16 8.0 18.0 19.8 NaN
17 8.5 18.5 18.8 NaN
18 9.0 19.0 19.8 NaN
19 9.5 19.5 19.9 NaN
20 10.0 20.0 19.8 NaN
I have two subplots, for some other reasons it is best for me to use gridspec. The plotting code is as follows (it is quite comprehensive, so I would like to avoid major changes in the code that otherwise works perfectly and just doesn't do one unimportant detail):
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import gridspec
import matplotlib as mpl
df = pd.read_csv('H:/DocumentsRedir/pokus/dataframe.csv', delimiter=',')
# setting limits for x and y
ylimit=(0,10)
yticks1=np.arange(0,11,1)
xlimit1=(10,20)
xticks1 = np.arange(10,21,1)
# general plot formatting (axes colour, background etc.)
plt.style.use('ggplot')
plt.rc('axes',edgecolor='black')
plt.rc('axes', facecolor = 'white')
plt.rc('grid', color = 'grey')
plt.rc('grid', alpha = 0.3) # alpha is percentage of transparency
colours = ['g','b','r']
title1 = 'The plot'
# GRIDSPEC INTRO - rows, cols, distance of individual plots
fig = plt.figure(figsize=(6,4))
gs=gridspec.GridSpec(1,2, hspace=0.15, wspace=0.08,width_ratios=[1,1])
## SUBPLOT of GRIDSPEC with lines
# the first plot
axes1 = plt.subplot(gs[0,0])
for count, vals in enumerate(df.columns.values[1:]):
X = np.asarray(df[vals])
h = vals
p1 = plt.plot(X,df.index,color=colours[count],linestyle='-',linewidth=1.5,label=h)
# formatting
p1 = plt.ylim(ylimit)
p1 = plt.yticks(yticks1, yticks1, rotation=0)
p1 = axes1.yaxis.set_minor_locator(mpl.ticker.MultipleLocator(0.1))
p1 = plt.setp(axes1.get_yticklabels(),fontsize=8)
p1 = plt.gca().invert_yaxis()
p1 = plt.ylabel('x [unit]', fontsize=14)
p1 = plt.xlabel("Value [unit]", fontsize=14)
p1 = plt.tick_params('both', length=5, width=1, which='minor', direction = 'in')
p1 = axes1.xaxis.set_minor_locator(mpl.ticker.MultipleLocator(0.1))
p1 = plt.xlim(xlimit1)
p1 = plt.xticks(xticks1, xticks1, rotation=0)
p1 = plt.setp(axes1.get_xticklabels(),fontsize=8)
p1 = plt.legend(loc='best',fontsize = 8, ncol=2) #
# the second plot (something random)
axes2 = plt.subplot(gs[0,1])
for count, vals in enumerate(df.columns.values[1:]):
nonans = df[vals].dropna()
result=nonans-0.5
p2 = plt.plot(result,nonans.index,color=colours[count],linestyle='-',linewidth=1.5)
p2 = plt.ylim(ylimit)
p2 = plt.yticks(yticks1, yticks1, rotation=0)
p2 = axes2.yaxis.set_minor_locator(mpl.ticker.MultipleLocator(0.1))
p2 = plt.gca().invert_yaxis()
p2 = plt.xlim(xlimit1)
p2 = plt.xticks(xticks1, xticks1, rotation=0)
p2 = axes2.xaxis.set_minor_locator(mpl.ticker.MultipleLocator(0.1))
p2 = plt.setp(axes2.get_xticklabels(),fontsize=8)
p2 = plt.xlabel("Other value [unit]", fontsize=14)
p2 = plt.tick_params('x', length=5, width=1, which='minor', direction = 'in')
p2 = plt.setp(axes2.get_yticklabels(), visible=False)
fig.suptitle(title1, size=16)
plt.show()
However, is it possible to show the y tick labels of the second subplot on the right hand side? The current code produces this:
And I would like to know if there is an easy way to get this:
No, ok, found out it is precisely what I wanted.
I want the TICKS to be on BOTH sides, just the LABELS to be on the right. The solution above removes my ticks from the left side of the subplot, which doesn't look good. However, this answer seems to get the right solution :)
To sum up:
to get the ticks on both sides and labels on the right, this is what fixes it:
axes2.yaxis.tick_right(‌​)
axes2.yaxis.set_ticks_p‌​osition('both')
And if you need the same for x axis, it's axes2.xaxis.tick_top(‌​)
try something like
axes2.yaxis.tick_right()
Just look around Python Matplotlib Y-Axis ticks on Right Side of Plot.

Categories