How to fix Matplotlib plotting Pandas Series blank data

How to fix Matplotlib plotting Pandas Series blank data - python

I am plotting a csv read in by pandas using matplotlib and the following code.
Image of CSV data:
fig, ax = plt.subplots(figsize=(10, 10))
plt.plot(dat['Forecast Hour'].iloc[0:45], dat['Forecasted End Time'].iloc[0:45],'b', marker='o')
plt.plot(dat['Forecast Hour'].iloc[0:46], dat['Forecasted Start Time'].iloc[0:46], 'r', marker='o')
bar = plt.bar(dat['Forecast Hour'].iloc[8:46], dat['Forecasted Event Length'].iloc[8:46], width=.8, color='gainsboro')
ax.tick_params(which='major',labelsize='12')
ax.grid(which='major', color='#CCCCCC', linestyle='-')
plt.xticks(rotation='90')
plt.xlabel('Forecast Run')
plt.ylabel('Forecasted Start/End Time')
plt.legend()
ax3 = ax.twinx()
mn, mx = ax.get_ylim()
ax3.set_ylim(0, 12)
ax3.set_ylabel('Forecasted Event Length')
When I try to run the following code I receive the error message:
ValueError: could not convert string to float: '11:00 PM'
When I convert the Nan values to blank spaces using:
dat = dat.replace(np.nan, '', regex=True)
The data will plot but also include the blank space data, like so (space between 9:00 pm and x axis):
Image of Graphed data
Ultimately, how do I a) stop matplotlib from plotting this "blank data" or b) make 9:00 pm the 0 point for my graph axes?
Any help is very much appreciated!

Related

Matplotlib - 24h Timeline graph

I want to make a timeline that shows the average number of messages sent over a 24h period. So far, I have managed to format both of the axes. The Y-axis already has the correct data in it.
These are the lists of data:
dates[] #a list of datetimes reduced to hours and minutes
values[] #a list of int
Now, for some time, I have tried to insert data into the graph. I have managed to insert the data now, but I assume that the X-axis is causing some problems because of formatting.
lineColor = "#f0f8ff"
chartColor = "#f0f8ff"
backgroundColor = "#36393f"
girdColor = "#8a8a8a"
dates = []
values = []
fig, ax = plt.subplots()
hours = mdates.HourLocator(interval=2)
d_fmt = mdates.DateFormatter('%H:%M')
ax.xaxis.set_minor_locator(mdates.HourLocator(interval=1))
ax.xaxis.set_major_locator(hours)
ax.xaxis.set_major_formatter(d_fmt)
ax.fill(dates, values)
ax.plot(dates, values, color=Commands.lineColor)
ax.set_xlim(["00:00", "23:59"])
plt.fill_between(dates, values,)
# region ChartDesign
ax.set_title('Amount of Messages')
ax.tick_params(axis='y', colors=Commands.chartColor)
ax.tick_params(axis='x', colors=Commands.chartColor)
ax.tick_params(which='minor', colors=Commands.chartColor)
ax.set_ylabel('Messages', color=Commands.chartColor)
plt.grid(True, color=Commands.girdColor)
ax.set_facecolor(Commands.backgroundColor)
ax.spines["bottom"].set_color(Commands.chartColor)
ax.spines["left"].set_color(Commands.chartColor)
ax.spines["top"].set_color(Commands.chartColor)
ax.spines["right"].set_color(Commands.chartColor)
fig.patch.set_facecolor(Commands.backgroundColor)
fig.tight_layout()
fig.autofmt_xdate()
# endregion
There are similar questions, but they aren't much use for me.

Since I don't have any sample data, I created a simple data and made a graph. The 0:00 time on the timeline is a challenge, so I need to be creative. I have replaced the last 0:00 with 24:00. Then I set the time interval value to 48 as the interval on the X axis. In your code, it will be every 2 hours. I have removed the code that I deemed unnecessary.
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import pandas as pd
import numpy as np
lineColor = "#f0f8ff"
chartColor = "#f0f8ff"
backgroundColor = "#36393f"
girdColor = "#8a8a8a"
date_rng = pd.date_range('2020-12-01', '2020-12-02', freq='1H')
dates = date_rng.strftime('%H:%M').tolist()
values = np.random.randint(0,25, size=25)
dates[-1] = '24:00'
fig, ax = plt.subplots(figsize=(12,9))
hours = mdates.HourLocator(interval=48)
ax.xaxis.set_major_locator(hours)
# ax.fill(dates, values)
ax.plot(dates, values, color=lineColor)
ax.fill_between(dates, values,)
# region ChartDesign
ax.set_title('Amount of Messages', color=chartColor)
ax.tick_params(axis='y', colors=chartColor)
ax.tick_params(axis='x', colors=chartColor)
# ax.tick_params(which='major', colors=chartColor)
ax.set_ylabel('Messages', color=chartColor)
ax.grid(True, color=girdColor)
ax.set_facecolor(backgroundColor)
ax.spines["bottom"].set_color(chartColor)
ax.spines["left"].set_color(chartColor)
ax.spines["top"].set_color(chartColor)
ax.spines["right"].set_color(chartColor)
fig.set_facecolor(backgroundColor)
fig.tight_layout()
fig.autofmt_xdate()
plt.show()

Adding a shaded box to a plot in python

I am looking to add a shaded box to my plot below. I want the box to go from Aug 25-Aug 30 and to run the length of the Y axis.
The following is my code for the two plots I have made...
df = pd.read_excel('salinity_temp.xlsx')
dates = df['Date']
sal = df['Salinity']
temp = df['Temperature']
fig, axes = plt.subplots(2, 1, figsize=(8,8), sharex=True)
axes[0].plot(dates, sal, lw=5, color="red")
axes[0].set_ylabel('Salinity (PSU)')
axes[0].set_title('Salinity', fontsize=14)
axes[1].set_title('Temperature', fontsize=14)
axes[1].plot(dates, temp, lw=5, color="blue")
axes[1].set_ylabel('Temperature (C)')
axes[1].set_xlabel('Dates, 2017', fontsize=12)
axes[1].xaxis.set_major_formatter(matplotlib.dates.DateFormatter('%b %d'))
axes[0].xaxis.set_major_formatter(matplotlib.dates.DateFormatter('%b %d'))
axes[1].xaxis_date()
axes[0].xaxis_date()
I want the shaded box to highlight when Hurricane Harvey hit Houston, Texas (Aug 25- Aug 30). My data looks like:
Date Salinity Temperature
20-Aug 15.88144647 31.64707184
21-Aug 18.83088846 31.43848419
22-Aug 19.51015264 31.47655487
23-Aug 23.41655369 31.198349
24-Aug 25.16410124 30.63014984
25-Aug 25.2273574 28.8677597
26-Aug 28.35557667 27.49458313
27-Aug 18.52829235 25.92834473
28-Aug 7.423231661 24.06635284
29-Aug 0.520394177 23.47881317
30-Aug 0.238508327 23.90857697
31-Aug 0.143210364 24.30892944
1-Sep 0.206473387 25.20442963
2-Sep 0.241343182 26.32663727
3-Sep 0.58000503 26.93431854
4-Sep 1.182055098 27.8212738
5-Sep 3.632014919 28.23947906
6-Sep 4.672006985 27.29686737
7-Sep 5.938766377 26.8693161
8-Sep 9.107671159 26.48963928
9-Sep 8.180587303 26.05213165
10-Sep 6.200532091 25.73104858
11-Sep 5.144526191 25.60035706
12-Sep 5.106032451 25.73139191
13-Sep 4.279492562 26.06132507
14-Sep 5.255868992 26.74919128
15-Sep 8.026764063 27.23724365
I have tried using the rectangle function in this link (https://discuss.analyticsvidhya.com/t/how-to-add-a-patch-in-a-plot-in-python/5518) however can't seem to get it to work properly.

Independent of your specific data, it sounds like you need axvspan. Try running this after your plotting code:
for ax in axes:
ax.axvspan('2017-08-25', '2017-08-30', color='black', alpha=0.5)
This will work if dates = df['Date'] is stored as type datetime64. It might not work with other datetime data types, and it won't work if dates contains date strings.

pandas dataframe recession highlighting plot

I have a pandas dataframe as shown in the figure below which has index as yyyy-mm,
US recession period (USREC) and timeseries varaible M1. Please see table below
Date USREC M1
2000-12 1088.4
2001-01 1095.08
2001-02 1100.58
2001-03 1108.1
2001-04 1 1116.36
2001-05 1 1117.8
2001-06 1 1125.45
2001-07 1 1137.46
2001-08 1 1147.7
2001-09 1 1207.6
2001-10 1 1166.64
2001-11 1 1169.7
2001-12 1182.46
2002-01 1190.82
2002-02 1190.43
2002-03 1194.85
2002-04 1186.82
2002-05 1186.9
2002-06 1194.55
2002-07 1199.26
2002-08 1183.7
2002-09 1197.1
2002-10 1203.47
I want to plot a chart in python that looks like the attached chart which was created in excel..
I have searched for various examples online, but none are able to show the chart like below. Can you please help? Thank you.
I would appreciate if there is any easier to use plotting library which has few inputs but easy to use for majority of plots similar to plots excel provides.
EDIT:
I checked out the example in the page https://matplotlib.org/examples/pylab_examples/axhspan_demo.html. The code I have used is below.
fig, axes = plt.subplots()
df['M1'].plot(ax=axes)
ax.axvspan(['USREC'],color='grey',alpha=0.5)
So I didnt see in any of the examples in the matplotlib.org webpage where I can input another column as axvspan range. In my code above I get the error
TypeError: axvspan() missing 1 required positional argument: 'xmax'

I figured it out. I created secondary Y axis for USREC and hid the axis label just like I wanted to, but it also hid the USREC from the legend. But that is a minor thing.
def plot_var(y1):
fig0, ax0 = plt.subplots()
ax1 = ax0.twinx()
y1.plot(kind='line', stacked=False, ax=ax0, color='blue')
df['USREC'].plot(kind='area', secondary_y=True, ax=ax1, alpha=.2, color='grey')
ax0.legend(loc='upper left')
ax1.legend(loc='upper left')
plt.ylim(ymax=0.8)
plt.axis('off')
plt.xlabel('Date')
plt.show()
plt.close()
plot_var(df['M1'])

There is a problem with Zenvega's answer: The recession lines are not vertical, as they should be. What exactly goes wrong, I am not entirely sure, but I show below how to get vertical lines.
My answer uses the following syntax ax.fill_between(date_index, y1=ymin, y2=ymax, where=True/False), where I compute the y1 and y2 arguments manually from the axis object and where the where argument takes the recession data as a boolean of True or False values.
import pandas as pd
import matplotlib.pyplot as plt
# get data: see further down for `string_data`
df = pd.read_csv(string_data, skipinitialspace=True)
df['Date'] = pd.to_datetime(df['Date'])
# convenience function
def plot_series(ax, df, index='Date', cols=['M1'], area='USREC'):
# convert area variable to boolean
df[area] = df[area].astype(int).astype(bool)
# set up an index based on date
df = df.set_index(keys=index, drop=False)
# line plot
df.plot(ax=ax, x=index, y=cols, color='blue')
# extract limits
y1, y2 = ax.get_ylim()
ax.fill_between(df[index].index, y1=y1, y2=y2, where=df[area], facecolor='grey', alpha=0.4)
return ax
# set up figure, axis
f, ax = plt.subplots()
plot_series(ax, df)
ax.grid(True)
plt.show()
# copy-pasted data from OP
from io import StringIO
string_data=StringIO("""
Date,USREC,M1
2000-12,0,1088.4
2001-01,0,1095.08
2001-02,0,1100.58
2001-03,0,1108.1
2001-04,1,1116.36
2001-05,1,1117.8
2001-06,1,1125.45
2001-07,1,1137.46
2001-08,1,1147.7
2001-09,1,1207.6
2001-10,1,1166.64
2001-11,1,1169.7
2001-12,0,1182.46
2002-01,0,1190.82
2002-02,0,1190.43
2002-03,0,1194.85
2002-04,0,1186.82
2002-05,0,1186.9
2002-06,0,1194.55
2002-07,0,1199.26
2002-08,0,1183.7
2002-09,0,1197.1
2002-10,0,1203.47""")
# after formatting, the data would look like this:
>>> df.head(2)
Date USREC M1
Date
2000-12-01 2000-12-01 False 1088.40
2001-01-01 2001-01-01 False 1095.08
See how the lines are vertical:
An alternative approach would be to use plt.axvspan() which would automatically calculate the y1 and y2values.

Shared x axes in Pandas Python

Usually I always get an answer to my questions here, so here is a new one. I'm working on some data analysis where I import different csv files, set index and then I try to plot it.
Here is the code. Please be aware that I use obdobje and -obdobje because the index comes from different files but the format is the same:
#to start plotting
fig, axes = plt.subplots(nrows=2, ncols=1)
#first dataframe
df1_D1[obdobje:].plot(ax=axes[0], linewidth=2, color='b', linestyle='solid')
#second dataframe
df2_D1[obdobje:].plot(ax=axes[0], linewidth=2, color='b',linestyle='dashed')
#third data frame
df_index[:-obdobje].plot(ax=axes[1])
plt.show()
Here is data that is imported in the dataframe:
Adj Close
Date
2015-12-01 73912.6016
2015-11-02 75638.3984
2015-10-01 79409.0000
2015-09-01 74205.5000
2015-08-03 75210.3984
Location CLI
TIME
1957-12-01 GBR 98.06755
1958-01-01 GBR 98.09290
1958-02-01 GBR 98.16694
1958-03-01 GBR 98.27734
1958-04-01 GBR 98.40984
And the output that I get is:
So, the problem is, that X axes are not shared. They are close, but not shared. Any suggestions how to solve this? I tried with sharex=True but Python crashed everytime.
Thanks in advance guys.
Best regards, David

You may want to reindex your final dataframe to a union of all data frames. matplotlib takes the x-axis of the last subplot as the axis of the entire plot when enabling sharex=True. This should get you along,
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
fig, axes = plt.subplots(nrows=2,
ncols=1,
sharex=True)
df1 = pd.DataFrame(
data = np.random.rand(25, 1),
index=pd.date_range('2015-05-05', periods=25),
columns=['DF1']
)
df2 = pd.DataFrame(
data = np.random.rand(25, 1),
index=pd.date_range('2015-04-10', periods=25),
columns=['DF2']
)
df3 = pd.DataFrame(
data = np.random.rand(50, 1),
index=pd.date_range('2015-03-20', periods=50),
columns=['DF3']
)
df3 = df3.reindex(index=df3.index.union(df2.index).union(df1.index))
df1.plot(ax=axes[0], linewidth=2, color='b', linestyle='solid')
df2.plot(ax=axes[0], linewidth=2, color='b', linestyle='dashed')
df3.plot(ax=axes[1])
plt.show()
Produces this,
As you can see, the axes are now aligned.

Changing axis on scatterplot to fixed intervals involving time

I have following code. my problem is I want to set the range of the y axis from 0:00 to 12:00 and have it equally spaced in increments of one. e.g. 0:00, 1:00, 2:00 etc. Any suggestions how I would go about doing this?
Also I also want to get rid of the extra :00 at the end of each number. As of right now it reads 00:00:00, 01:00:00 and so on when I only want it to read 0:00, 1:00 any ideas how I can go about doing this? here is the code I have so far.
import pandas as pd
import matplotlib.pyplot as plt
import datetime
data = pd.read_csv('data.csv', sep=',', header=None)
print (data)
ints = data[data[1]=='INT']
exts = data[data[1]=='EXT']
int_times = [datetime.datetime.time(datetime.datetime.strptime(t, '%H:%M')) for t in ints[4]]
ext_times = [datetime.datetime.time(datetime.datetime.strptime(t, '%H:%M')) for t in exts[4]]
int_dist = [d for d in ints[3]]
ext_dist = [d for d in exts[3]]
fig, ax = plt.subplots()
ax.scatter(int_dist, int_times, c='red', s=80)
ax.scatter(ext_dist, ext_times, c='blue', s=80)
plt.legend(['INT', 'EXT'], loc=4)
plt.xlabel('Distance')
plt.ylim(0,45000)
plt.show()

Well its possible to generate a list of time having only the minute and second. You need to change the format to '%M:%S'.
Next you need to change the labels using the plt.xticks(). I changed for x axis.
Here is a sample
start = datetime.combine(date.today(), time(0, 0))
axis_times = []
y_values = []
i = 0
while i<9:
start += timedelta(seconds=7)
axis_times.append(start.strftime("%M:%S"))
y_values.append(i)
i+=1
fig, ax = plt.subplots()
ax.scatter(range(len(axis_times)), y_values, c='red', s=80)
ax.scatter(range(len(axis_times)), y_values, c='blue', s=20)
plt.legend(['INT', 'EXT'], loc=4)
plt.xlabel('Distance')
plt.xticks(range(len(axis_times)), axis_times, size='small')
plt.show()

You can manually specify ticks to whatever you need. I can't run your example without the csv data but you can do,
import numpy as np
import pylab as plt
import datetime
#Some arbitrary data
x = np.linspace(0.,12.,100)
fig, ax = plt.subplots(1, 1)
ax.plot(x,np.sin(x)+6.)
#Set number of ticks to 12
ax.set_yticks(range(13))
#Relabel the ticks as needed
locs, labels = plt.yticks()
new_labels = [str(time) + ":00" for time in range(0,13)]
plt.yticks(locs, new_labels)
plt.show()
You can replace the new labels using datetime values or formatted strings which you obtain from you data (e.g. convert to string and remove the last 0)...

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

How to fix Matplotlib plotting Pandas Series blank data - python

Related

Matplotlib - 24h Timeline graph

Adding a shaded box to a plot in python

pandas dataframe recession highlighting plot

Shared x axes in Pandas Python

Changing axis on scatterplot to fixed intervals involving time

Categories

Resources