How to fix Matplotlib plotting Pandas Series blank data - python

I am plotting a csv read in by pandas using matplotlib and the following code.
Image of CSV data:
fig, ax = plt.subplots(figsize=(10, 10))
plt.plot(dat['Forecast Hour'].iloc[0:45], dat['Forecasted End Time'].iloc[0:45],'b', marker='o')
plt.plot(dat['Forecast Hour'].iloc[0:46], dat['Forecasted Start Time'].iloc[0:46], 'r', marker='o')
bar = plt.bar(dat['Forecast Hour'].iloc[8:46], dat['Forecasted Event Length'].iloc[8:46], width=.8, color='gainsboro')
ax.tick_params(which='major',labelsize='12')
ax.grid(which='major', color='#CCCCCC', linestyle='-')
plt.xticks(rotation='90')
plt.xlabel('Forecast Run')
plt.ylabel('Forecasted Start/End Time')
plt.legend()
ax3 = ax.twinx()
mn, mx = ax.get_ylim()
ax3.set_ylim(0, 12)
ax3.set_ylabel('Forecasted Event Length')
When I try to run the following code I receive the error message:
ValueError: could not convert string to float: '11:00 PM'
When I convert the Nan values to blank spaces using:
dat = dat.replace(np.nan, '', regex=True)
The data will plot but also include the blank space data, like so (space between 9:00 pm and x axis):
Image of Graphed data
Ultimately, how do I a) stop matplotlib from plotting this "blank data" or b) make 9:00 pm the 0 point for my graph axes?
Any help is very much appreciated!

Related

Matplotlib - 24h Timeline graph

I want to make a timeline that shows the average number of messages sent over a 24h period. So far, I have managed to format both of the axes. The Y-axis already has the correct data in it.
These are the lists of data:
dates[] #a list of datetimes reduced to hours and minutes
values[] #a list of int
Now, for some time, I have tried to insert data into the graph. I have managed to insert the data now, but I assume that the X-axis is causing some problems because of formatting.
lineColor = "#f0f8ff"
chartColor = "#f0f8ff"
backgroundColor = "#36393f"
girdColor = "#8a8a8a"
dates = []
values = []
fig, ax = plt.subplots()
hours = mdates.HourLocator(interval=2)
d_fmt = mdates.DateFormatter('%H:%M')
ax.xaxis.set_minor_locator(mdates.HourLocator(interval=1))
ax.xaxis.set_major_locator(hours)
ax.xaxis.set_major_formatter(d_fmt)
ax.fill(dates, values)
ax.plot(dates, values, color=Commands.lineColor)
ax.set_xlim(["00:00", "23:59"])
plt.fill_between(dates, values,)
# region ChartDesign
ax.set_title('Amount of Messages')
ax.tick_params(axis='y', colors=Commands.chartColor)
ax.tick_params(axis='x', colors=Commands.chartColor)
ax.tick_params(which='minor', colors=Commands.chartColor)
ax.set_ylabel('Messages', color=Commands.chartColor)
plt.grid(True, color=Commands.girdColor)
ax.set_facecolor(Commands.backgroundColor)
ax.spines["bottom"].set_color(Commands.chartColor)
ax.spines["left"].set_color(Commands.chartColor)
ax.spines["top"].set_color(Commands.chartColor)
ax.spines["right"].set_color(Commands.chartColor)
fig.patch.set_facecolor(Commands.backgroundColor)
fig.tight_layout()
fig.autofmt_xdate()
# endregion
There are similar questions, but they aren't much use for me.
Since I don't have any sample data, I created a simple data and made a graph. The 0:00 time on the timeline is a challenge, so I need to be creative. I have replaced the last 0:00 with 24:00. Then I set the time interval value to 48 as the interval on the X axis. In your code, it will be every 2 hours. I have removed the code that I deemed unnecessary.
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import pandas as pd
import numpy as np
lineColor = "#f0f8ff"
chartColor = "#f0f8ff"
backgroundColor = "#36393f"
girdColor = "#8a8a8a"
date_rng = pd.date_range('2020-12-01', '2020-12-02', freq='1H')
dates = date_rng.strftime('%H:%M').tolist()
values = np.random.randint(0,25, size=25)
dates[-1] = '24:00'
fig, ax = plt.subplots(figsize=(12,9))
hours = mdates.HourLocator(interval=48)
ax.xaxis.set_major_locator(hours)
# ax.fill(dates, values)
ax.plot(dates, values, color=lineColor)
ax.fill_between(dates, values,)
# region ChartDesign
ax.set_title('Amount of Messages', color=chartColor)
ax.tick_params(axis='y', colors=chartColor)
ax.tick_params(axis='x', colors=chartColor)
# ax.tick_params(which='major', colors=chartColor)
ax.set_ylabel('Messages', color=chartColor)
ax.grid(True, color=girdColor)
ax.set_facecolor(backgroundColor)
ax.spines["bottom"].set_color(chartColor)
ax.spines["left"].set_color(chartColor)
ax.spines["top"].set_color(chartColor)
ax.spines["right"].set_color(chartColor)
fig.set_facecolor(backgroundColor)
fig.tight_layout()
fig.autofmt_xdate()
plt.show()

Adding a shaded box to a plot in python

I am looking to add a shaded box to my plot below. I want the box to go from Aug 25-Aug 30 and to run the length of the Y axis.
The following is my code for the two plots I have made...
df = pd.read_excel('salinity_temp.xlsx')
dates = df['Date']
sal = df['Salinity']
temp = df['Temperature']
fig, axes = plt.subplots(2, 1, figsize=(8,8), sharex=True)
axes[0].plot(dates, sal, lw=5, color="red")
axes[0].set_ylabel('Salinity (PSU)')
axes[0].set_title('Salinity', fontsize=14)
axes[1].set_title('Temperature', fontsize=14)
axes[1].plot(dates, temp, lw=5, color="blue")
axes[1].set_ylabel('Temperature (C)')
axes[1].set_xlabel('Dates, 2017', fontsize=12)
axes[1].xaxis.set_major_formatter(matplotlib.dates.DateFormatter('%b %d'))
axes[0].xaxis.set_major_formatter(matplotlib.dates.DateFormatter('%b %d'))
axes[1].xaxis_date()
axes[0].xaxis_date()
I want the shaded box to highlight when Hurricane Harvey hit Houston, Texas (Aug 25- Aug 30). My data looks like:
Date Salinity Temperature
20-Aug 15.88144647 31.64707184
21-Aug 18.83088846 31.43848419
22-Aug 19.51015264 31.47655487
23-Aug 23.41655369 31.198349
24-Aug 25.16410124 30.63014984
25-Aug 25.2273574 28.8677597
26-Aug 28.35557667 27.49458313
27-Aug 18.52829235 25.92834473
28-Aug 7.423231661 24.06635284
29-Aug 0.520394177 23.47881317
30-Aug 0.238508327 23.90857697
31-Aug 0.143210364 24.30892944
1-Sep 0.206473387 25.20442963
2-Sep 0.241343182 26.32663727
3-Sep 0.58000503 26.93431854
4-Sep 1.182055098 27.8212738
5-Sep 3.632014919 28.23947906
6-Sep 4.672006985 27.29686737
7-Sep 5.938766377 26.8693161
8-Sep 9.107671159 26.48963928
9-Sep 8.180587303 26.05213165
10-Sep 6.200532091 25.73104858
11-Sep 5.144526191 25.60035706
12-Sep 5.106032451 25.73139191
13-Sep 4.279492562 26.06132507
14-Sep 5.255868992 26.74919128
15-Sep 8.026764063 27.23724365
I have tried using the rectangle function in this link (https://discuss.analyticsvidhya.com/t/how-to-add-a-patch-in-a-plot-in-python/5518) however can't seem to get it to work properly.
Independent of your specific data, it sounds like you need axvspan. Try running this after your plotting code:
for ax in axes:
ax.axvspan('2017-08-25', '2017-08-30', color='black', alpha=0.5)
This will work if dates = df['Date'] is stored as type datetime64. It might not work with other datetime data types, and it won't work if dates contains date strings.

pandas dataframe recession highlighting plot

I have a pandas dataframe as shown in the figure below which has index as yyyy-mm,
US recession period (USREC) and timeseries varaible M1. Please see table below
Date USREC M1
2000-12 1088.4
2001-01 1095.08
2001-02 1100.58
2001-03 1108.1
2001-04 1 1116.36
2001-05 1 1117.8
2001-06 1 1125.45
2001-07 1 1137.46
2001-08 1 1147.7
2001-09 1 1207.6
2001-10 1 1166.64
2001-11 1 1169.7
2001-12 1182.46
2002-01 1190.82
2002-02 1190.43
2002-03 1194.85
2002-04 1186.82
2002-05 1186.9
2002-06 1194.55
2002-07 1199.26
2002-08 1183.7
2002-09 1197.1
2002-10 1203.47
I want to plot a chart in python that looks like the attached chart which was created in excel..
I have searched for various examples online, but none are able to show the chart like below. Can you please help? Thank you.
I would appreciate if there is any easier to use plotting library which has few inputs but easy to use for majority of plots similar to plots excel provides.
EDIT:
I checked out the example in the page https://matplotlib.org/examples/pylab_examples/axhspan_demo.html. The code I have used is below.
fig, axes = plt.subplots()
df['M1'].plot(ax=axes)
ax.axvspan(['USREC'],color='grey',alpha=0.5)
So I didnt see in any of the examples in the matplotlib.org webpage where I can input another column as axvspan range. In my code above I get the error
TypeError: axvspan() missing 1 required positional argument: 'xmax'
I figured it out. I created secondary Y axis for USREC and hid the axis label just like I wanted to, but it also hid the USREC from the legend. But that is a minor thing.
def plot_var(y1):
fig0, ax0 = plt.subplots()
ax1 = ax0.twinx()
y1.plot(kind='line', stacked=False, ax=ax0, color='blue')
df['USREC'].plot(kind='area', secondary_y=True, ax=ax1, alpha=.2, color='grey')
ax0.legend(loc='upper left')
ax1.legend(loc='upper left')
plt.ylim(ymax=0.8)
plt.axis('off')
plt.xlabel('Date')
plt.show()
plt.close()
plot_var(df['M1'])
There is a problem with Zenvega's answer: The recession lines are not vertical, as they should be. What exactly goes wrong, I am not entirely sure, but I show below how to get vertical lines.
My answer uses the following syntax ax.fill_between(date_index, y1=ymin, y2=ymax, where=True/False), where I compute the y1 and y2 arguments manually from the axis object and where the where argument takes the recession data as a boolean of True or False values.
import pandas as pd
import matplotlib.pyplot as plt
# get data: see further down for `string_data`
df = pd.read_csv(string_data, skipinitialspace=True)
df['Date'] = pd.to_datetime(df['Date'])
# convenience function
def plot_series(ax, df, index='Date', cols=['M1'], area='USREC'):
# convert area variable to boolean
df[area] = df[area].astype(int).astype(bool)
# set up an index based on date
df = df.set_index(keys=index, drop=False)
# line plot
df.plot(ax=ax, x=index, y=cols, color='blue')
# extract limits
y1, y2 = ax.get_ylim()
ax.fill_between(df[index].index, y1=y1, y2=y2, where=df[area], facecolor='grey', alpha=0.4)
return ax
# set up figure, axis
f, ax = plt.subplots()
plot_series(ax, df)
ax.grid(True)
plt.show()
# copy-pasted data from OP
from io import StringIO
string_data=StringIO("""
Date,USREC,M1
2000-12,0,1088.4
2001-01,0,1095.08
2001-02,0,1100.58
2001-03,0,1108.1
2001-04,1,1116.36
2001-05,1,1117.8
2001-06,1,1125.45
2001-07,1,1137.46
2001-08,1,1147.7
2001-09,1,1207.6
2001-10,1,1166.64
2001-11,1,1169.7
2001-12,0,1182.46
2002-01,0,1190.82
2002-02,0,1190.43
2002-03,0,1194.85
2002-04,0,1186.82
2002-05,0,1186.9
2002-06,0,1194.55
2002-07,0,1199.26
2002-08,0,1183.7
2002-09,0,1197.1
2002-10,0,1203.47""")
# after formatting, the data would look like this:
>>> df.head(2)
Date USREC M1
Date
2000-12-01 2000-12-01 False 1088.40
2001-01-01 2001-01-01 False 1095.08
See how the lines are vertical:
An alternative approach would be to use plt.axvspan() which would automatically calculate the y1 and y2values.

Shared x axes in Pandas Python

Usually I always get an answer to my questions here, so here is a new one. I'm working on some data analysis where I import different csv files, set index and then I try to plot it.
Here is the code. Please be aware that I use obdobje and -obdobje because the index comes from different files but the format is the same:
#to start plotting
fig, axes = plt.subplots(nrows=2, ncols=1)
#first dataframe
df1_D1[obdobje:].plot(ax=axes[0], linewidth=2, color='b', linestyle='solid')
#second dataframe
df2_D1[obdobje:].plot(ax=axes[0], linewidth=2, color='b',linestyle='dashed')
#third data frame
df_index[:-obdobje].plot(ax=axes[1])
plt.show()
Here is data that is imported in the dataframe:
Adj Close
Date
2015-12-01 73912.6016
2015-11-02 75638.3984
2015-10-01 79409.0000
2015-09-01 74205.5000
2015-08-03 75210.3984
Location CLI
TIME
1957-12-01 GBR 98.06755
1958-01-01 GBR 98.09290
1958-02-01 GBR 98.16694
1958-03-01 GBR 98.27734
1958-04-01 GBR 98.40984
And the output that I get is:
So, the problem is, that X axes are not shared. They are close, but not shared. Any suggestions how to solve this? I tried with sharex=True but Python crashed everytime.
Thanks in advance guys.
Best regards, David
You may want to reindex your final dataframe to a union of all data frames. matplotlib takes the x-axis of the last subplot as the axis of the entire plot when enabling sharex=True. This should get you along,
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
fig, axes = plt.subplots(nrows=2,
ncols=1,
sharex=True)
df1 = pd.DataFrame(
data = np.random.rand(25, 1),
index=pd.date_range('2015-05-05', periods=25),
columns=['DF1']
)
df2 = pd.DataFrame(
data = np.random.rand(25, 1),
index=pd.date_range('2015-04-10', periods=25),
columns=['DF2']
)
df3 = pd.DataFrame(
data = np.random.rand(50, 1),
index=pd.date_range('2015-03-20', periods=50),
columns=['DF3']
)
df3 = df3.reindex(index=df3.index.union(df2.index).union(df1.index))
df1.plot(ax=axes[0], linewidth=2, color='b', linestyle='solid')
df2.plot(ax=axes[0], linewidth=2, color='b', linestyle='dashed')
df3.plot(ax=axes[1])
plt.show()
Produces this,
As you can see, the axes are now aligned.

Changing axis on scatterplot to fixed intervals involving time

I have following code. my problem is I want to set the range of the y axis from 0:00 to 12:00 and have it equally spaced in increments of one. e.g. 0:00, 1:00, 2:00 etc. Any suggestions how I would go about doing this?
Also I also want to get rid of the extra :00 at the end of each number. As of right now it reads 00:00:00, 01:00:00 and so on when I only want it to read 0:00, 1:00 any ideas how I can go about doing this? here is the code I have so far.
import pandas as pd
import matplotlib.pyplot as plt
import datetime
data = pd.read_csv('data.csv', sep=',', header=None)
print (data)
ints = data[data[1]=='INT']
exts = data[data[1]=='EXT']
int_times = [datetime.datetime.time(datetime.datetime.strptime(t, '%H:%M')) for t in ints[4]]
ext_times = [datetime.datetime.time(datetime.datetime.strptime(t, '%H:%M')) for t in exts[4]]
int_dist = [d for d in ints[3]]
ext_dist = [d for d in exts[3]]
fig, ax = plt.subplots()
ax.scatter(int_dist, int_times, c='red', s=80)
ax.scatter(ext_dist, ext_times, c='blue', s=80)
plt.legend(['INT', 'EXT'], loc=4)
plt.xlabel('Distance')
plt.ylim(0,45000)
plt.show()
Well its possible to generate a list of time having only the minute and second. You need to change the format to '%M:%S'.
Next you need to change the labels using the plt.xticks(). I changed for x axis.
Here is a sample
start = datetime.combine(date.today(), time(0, 0))
axis_times = []
y_values = []
i = 0
while i<9:
start += timedelta(seconds=7)
axis_times.append(start.strftime("%M:%S"))
y_values.append(i)
i+=1
fig, ax = plt.subplots()
ax.scatter(range(len(axis_times)), y_values, c='red', s=80)
ax.scatter(range(len(axis_times)), y_values, c='blue', s=20)
plt.legend(['INT', 'EXT'], loc=4)
plt.xlabel('Distance')
plt.xticks(range(len(axis_times)), axis_times, size='small')
plt.show()
You can manually specify ticks to whatever you need. I can't run your example without the csv data but you can do,
import numpy as np
import pylab as plt
import datetime
#Some arbitrary data
x = np.linspace(0.,12.,100)
fig, ax = plt.subplots(1, 1)
ax.plot(x,np.sin(x)+6.)
#Set number of ticks to 12
ax.set_yticks(range(13))
#Relabel the ticks as needed
locs, labels = plt.yticks()
new_labels = [str(time) + ":00" for time in range(0,13)]
plt.yticks(locs, new_labels)
plt.show()
You can replace the new labels using datetime values or formatted strings which you obtain from you data (e.g. convert to string and remove the last 0)...

Categories