Plotting superimposed charts (line and bar) with pandas and matplotlib - python

I am testing the capabilities of pandas to plot financial data (price and volume) on the same chart. If I try to render both data as lines, it works fine:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
a = pd.date_range('2019-01-01', '2019-06-01',freq = 'D')
b = np.random.normal(size = len(a), loc = 50)
v = np.random.normal(size = len(a), loc = 1000)
c = pd.DataFrame(index = a, data = zip(b,v), columns = ['price', 'volume'])
fig, ax = plt.subplots(figsize = (15,8))
bx = ax.twinx()
c.price.plot.line(ax = ax, color = 'r')
c.volume.plot.line(ax = bx, color = 'g', alpha = .2)
plt.show()
This gives:
However if I try to render one as a line and the other as a bar chart, by replacing the 3 last lines by:
c.price.plot.line(ax = ax, color = 'r')
c.volume.plot.bar(ax = bx, color = 'g', alpha = .2)
plt.show()
This gives the wrong result:
Would anybody know how to make the above code work with line + bar ??

Use Matplotlib plotting library.
Matplotlib's function pyplot has functions bar and plot. You can use them to display data on the same chart.
Example

Related

Plotting a scatterplot gif from a dataframe

I have a Dataframe with 6 rows of data and 4 columns. Is there any way to generate a gif scatterplot (y which are the 4 columns in different color versus x which are the index rows) plot in which in every frame of the gif, first data point of the Column 1 and its first respective row data is plotted in different color versus the shared x axis which are the indexes, at the same time, column 2, 3 and 4 first data points are plotted, and this goes progressively until the last 6th point is plotted for all of the columns? If a gif is not possible at all, is there any other way to generate at least movie so that I can include in my ppt slide? I appreciate any feedback you might have! The error I am getting is generating an empty plot and saying: TypeError: cannot unpack non-iterable AxesSubplot object. But I am not sure if this is preventing the result from the plotting.
This is a sample of my data and code effort:
import pandas as pd
import numpy as np
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation
import random
from itertools import count
from IPython import display
row_data = np.arange(0, 6)
column_X = np.random.rand(6,)
column_Y = np.random.rand(6,)
column_Z = np.random.rand(6,)
column_K = np.random.rand(6,)
my_df = pd.DataFrame()
my_df['column_X'] = column_X
my_df['column_Y'] = column_Y
my_df['column_Z'] = column_Z
my_df['column_K'] = column_K
my_df.index = row_data
my_df['index'] = row_data
def animate(j):
fig, ax = plt.subplot(sharex= True)
ax[1]=my_df['column_X', color = 'blue']
ax[2]=my_df['column_Y', color = 'red']
ax[3]=my_df['column_Z', color = 'brown']
ax[4]=my_df['column_K', color = 'green']
y=my_df['index']
x.append()
y.append()
plt.xlabel(color = 'blue')
plt.ylabel(color = 'red')
ax.set_ylabel("progressive sales through time")
ax.set_xlabel("progressive time")
plt.plot(x,y)
animation_1 = animation.FuncAnimation(plt.gcf(),animate,interval=1000)
plt.show()
# Inside Jupyter:
video_1 = animation_1.to_html5_video()
html_code_1 = display.HTML(video_1)
display.display(html_code_1)
plt.tight_layout()
plt.show()
Good question! matplotlib animations can be tricky. I struggled a bit with this one, mainly because you want different colors for the different columns. You need 4 different Line2D objects to do this.
# VSCode notebook magic
%matplotlib widget
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation
my_df = pd.DataFrame()
my_df["column_X"] = np.random.rand(6)
my_df["column_Y"] = np.random.rand(6)
my_df["column_Z"] = np.random.rand(6)
my_df["column_K"] = np.random.rand(6)
fig, ax = plt.subplots()
# four y-data lists, x-data is shared
xdata, y1, y2, y3, y4 = [], [], [], [], []
# four Line3D objects with different colors
graph1, = ax.plot([], [], 'ro-')
graph2, = ax.plot([], [], 'go-')
graph3, = ax.plot([], [], 'bo-')
graph4, = ax.plot([], [], 'ko-')
# set up the plot
plt.xlim(-1, 6)
plt.xlabel('Time')
plt.ylim(0, 1)
plt.ylabel('Price')
# animation function
def animate(i):
xdata.append(i)
y1.append(my_df.iloc[i,0])
y2.append(my_df.iloc[i,1])
y3.append(my_df.iloc[i,2])
y4.append(my_df.iloc[i,3])
graph1.set_data(xdata, y1)
graph2.set_data(xdata, y2)
graph3.set_data(xdata, y3)
graph4.set_data(xdata, y4)
return (graph1,graph2,graph3,graph4,)
anim = animation.FuncAnimation(fig, animate, frames=6, interval=500, blit=True)
anim.save('test.mp4')
#plt.show()
Here's the resulting .gif (converted from .mp4 using Adobe Express):

Making a transparent area within a stacked area chart in Matplotlib

I am trying to build this type of chart: a mix between a line chart and a stacked area chart using Matplotlib and seaborn. I just want the white area below to be fully transparent. I tried changing the alpha parameter but it does not make the area transparent, just white at best. I am using the below code:
plt.plot(df.index,"5y Avg",data=df,
color=avg_color,
linestyle="dotted",
label= '5y Avg')
plt.stackplot(df.index,df["5Y Max"],color="#B1B3B6",labels= ['5y Range'])
plt.stackplot(df_test.index,df["5Y Min"],color="white",alpha=1)
You can get the effect you want simply by changing the approach to the problem: in place of making transparent the area of the bottom stackplot, you can color only the portion of the graph you want with matplotlib.axes.Axes.fill_between:
ax.fill_between(x = df.index, y1 = df['5Y Min'], y2 = df['5Y Max'], color = '#B1B3B6', label = '5y Range')
Complete Code
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
df = pd.DataFrame()
df['index'] = np.arange(1, 53 + 1, 1)
df['5y Avg'] = 2000/53*df['index'] + 100*np.random.rand(len(df))
df['5Y Max'] = 3200/53*df['index'] + 100*np.random.rand(len(df))
df['5Y Min'] = 1000/53*df['index'] + 100*np.random.rand(len(df))
avg_color = '#45A1A2'
df = df.set_index('index')
plt.style.use('seaborn-whitegrid')
fig, ax = plt.subplots()
ax.plot(df.index, df['5y Avg'],
color = avg_color,
linestyle = 'dotted',
label = '5y Avg')
ax.fill_between(x = df.index, y1 = df['5Y Min'], y2 = df['5Y Max'], color = '#B1B3B6', label = '5y Range')
ax.legend(frameon = True)
plt.show()
Plot

boxplot show max and min fliers results in TypeError: 'AxesSubplot' object is not subscriptable

I am preparing box plots with a whisker interval of [2,98]. The issue is that I am working with air quality data and have a large range of data points, so the outliers take up the entire figure and overshadow the boxplots. I would like to plot the max and min outliers only and have tried the method from Matplotlib boxplot show only max and min fliers, however, I get an error message that says TypeError: 'AxesSubplot' object is not subscriptable.
Here is my code:
fig,ax = plt.subplots(1, figsize=(8,6))
g = sns.boxplot(data=mda8, orient='v', width = 0.7, whis = (2,98))
fliers = g['fliers']
for fly in fliers:
fdata=fly.get_data
fly.set_data([fdata[0][0],fdata[0][-1],fdata[1][0],fdata[1][-1]])
xvalues = ['Niland', 'El Centro', 'Calexico']
plt.xticks(np.arange(3), xvalues, fontsize=12)
ax.set_ylabel('Ozone MDA8 (ppb)',fontsize=15)
ax.set_ylim(0,105)
plt.show()
Here's some sample data:
mda8 = pd.DataFrame({
'T1':[35.000000, 32.125000, 32.000000, 35.250000, 28.875000, 28.500000, 29.375000, 25.125000, 34.166667, 35.250000],
'T2':[28.375, 30.750, 33.250, 34.000, 32.875, 30.250, 29.875, 100.409, 29.625, 1.232],
'T3':[34.250, 102.232, 28.250, 33.000, 27.625, 21.500, 28.375, 30.250, 3.454, 33.750]})
I need help with plotting the max and min outliers only and am open to doing another method besides the one that I tried here.
EDIT here's the link to my csv file https://drive.google.com/file/d/1E3A0UAYCbSN53JXtfsbrA4i_Phci_JWf/view?usp=sharing
A possible approach could be:
hide the outliers plotted by seaborn.boxplot by passing showfliers = False parameter:
sns.boxplot(data=mda8, orient='v', width = 0.7, whis = (2,98), showfliers = False)
get the list of outliers for each column, find maximum and minimum and plot only them:
outliers = {col: list(stat['fliers']) for col in mda8.columns for stat in boxplot_stats(mda8[col])}
min_max_outliers = {key: [np.min(value), np.max(value)] if value != [] else [] for key, value in outliers.items()}
i = 0
for key, value in min_max_outliers.items():
if value != []:
ax.scatter([i, i], value, marker = 'd', facecolor = 'black')
i += 1
Complete Code
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from matplotlib.cbook import boxplot_stats
mda8 = pd.DataFrame({'T1': [35.000000, 32.125000, 32.000000, 35.250000, 28.875000, 28.500000, 29.375000, 25.125000, 34.166667, 35.250000],
'T2': [28.375, 30.750, 33.250, 34.000, 32.875, 30.250, 29.875, 100.409, 29.625, 1.232],
'T3': [34.250, 102.232, 28.250, 33.000, 27.625, 21.500, 28.375, 30.250, 3.454, 33.750]})
fig,ax = plt.subplots(1, figsize=(8,6))
sns.boxplot(data=mda8, orient='v', width = 0.7, whis = (2,98), showfliers = False)
outliers = {col: list(stat['fliers']) for col in mda8.columns for stat in boxplot_stats(mda8[col])}
min_max_outliers = {key: [np.min(value), np.max(value)] if value != [] else [] for key, value in outliers.items()}
i = 0
for key, value in min_max_outliers.items():
if value != []:
ax.scatter([i, i], value, marker = 'd', facecolor = 'black')
i += 1
xvalues = ['Niland', 'El Centro', 'Calexico']
plt.xticks(np.arange(3), xvalues, fontsize=12)
ax.set_ylabel('Ozone MDA8 (ppb)',fontsize=15)
ax.set_ylim(0,105)
plt.show()
EDIT
Working on the data your provided, if I plot them as they are:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
mda8 = pd.read_csv(r'data/MDA8_allregions.csv')
mda8 = mda8.drop(['date', 'date.1', 'date.2'], axis = 1)
fig, ax = plt.subplots(1, figsize = (8, 6))
sns.boxplot(data = mda8, orient = 'v', width = 0.7, whis = (2, 98), showfliers = True)
plt.show()
I get:
In the code above I change the parameter showfliers = False, in order to hide outliers.
Then, as suggested by JohanC in the comment, a simpler way to plot outliers is to plot min and max for each column:
for i, col in enumerate(mda8.columns, 0):
ax.scatter([i, i], [mda8[col].min(), mda8[col].max()], marker = 'd', facecolor = 'black')
Complete Code
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
mda8 = pd.read_csv(r'data/MDA8_allregions.csv')
mda8 = mda8.drop(['date', 'date.1', 'date.2'], axis = 1)
fig, ax = plt.subplots(1, figsize = (8, 6))
sns.boxplot(data = mda8, orient = 'v', width = 0.7, whis = (2, 98), showfliers = False)
for i, col in enumerate(mda8.columns, 0):
ax.scatter([i, i], [mda8[col].min(), mda8[col].max()], marker = 'd', facecolor = 'black')
plt.show()

How to change colors automatically once a parameter is changed

In the following code, the color of bars changes as the threshold is changed. Instead of using the threshold and plotting the horizontal line in the code, I want to use the y parameter in the OnMouseMove function so that the user can change the location of "threshold". Then, I want the colors to be updated as the y is changed.
I think what I need is called "observer pattern" or perhaps a trick using the animation tools but not sure how to implement it. I appreciate any insight on how to do this. Thanks
%matplotlib notebook
import pandas as pd
import numpy as np
from scipy import stats
import matplotlib.colors as mcol
import matplotlib.cm as cm
import matplotlib.pyplot as plt
np.random.seed(12345)
df = pd.DataFrame([np.random.normal(335,1500,300),
np.random.normal(410,900,300),
np.random.normal(410,1200,300),
np.random.normal(480,550,300)],
index=[1,2,3,4])
fig, ax = plt.subplots()
plt.show()
bars = plt.bar(range(df.shape[0]), df.mean(axis = 1), color = 'lightslategrey')
fig = plt.gcf()
threshold=420
plt.axhline(y = threshold, color = 'grey', alpha = 0.5)
cm1 = mcol.LinearSegmentedColormap.from_list("Test",["b", "white", "purple"])
cpick = cm.ScalarMappable(cmap=cm1)
cpick.set_array([])
percentages = []
for bar in bars:
percentage = (bar.get_height()-threshold)/bar.get_height()
if percentage>1: percentage = 1
if percentage<0: percentage=0
percentages.append(percentage)
cpick.to_rgba(percentages)
bars = plt.bar(range(df.shape[0]), df.mean(axis = 1), color = cpick.to_rgba(percentages))
plt.colorbar(cpick, orientation='horizontal')
def onMouseMove(event):
ax.lines = [ax.lines[0]]
plt.axhline(y=event.ydata, color="k")
fig.canvas.mpl_connect('motion_notify_event', onMouseMove)
plt.xticks(range(df.shape[0]), df.index, alpha = 0.8)
First you should use exactly one bar plot and exactly one axhline (using more will make everything chaotic). You can set the colors of the bars via
for bar in bars:
bar.set_color(..)
and you can update the axhline's position via line.set_ydata(position).
Now, for every mouse move event you need to update the axhline's position, calculate the percentages and apply a new colors to the bars. So those things should be done in a function, which is called every time the mouse move event is triggered. After those settings have been applied the canvas needs to be drawn for them to become visible.
Here is a complete code.
import pandas as pd
import numpy as np
import matplotlib.colors as mcol
import matplotlib.cm as cm
import matplotlib.pyplot as plt
np.random.seed(12345)
df = pd.DataFrame([np.random.normal(335,1500,300),
np.random.normal(410,900,300),
np.random.normal(410,1200,300),
np.random.normal(480,550,300)],
index=[1,2,3,4])
fig, ax = plt.subplots()
threshold=420.
bars = plt.bar(range(df.shape[0]), df.mean(axis = 1), color = 'lightslategrey')
axline = plt.axhline(y = threshold, color = 'grey', alpha = 0.5)
cm1 = mcol.LinearSegmentedColormap.from_list("Test",["b", "white", "purple"])
cpick = cm.ScalarMappable(cmap=cm1)
cpick.set_array([])
plt.colorbar(cpick, orientation='horizontal')
def percentages(threshold):
percentages = []
for bar in bars:
percentage = (bar.get_height()-threshold)/bar.get_height()
if percentage>1: percentage = 1
if percentage<0: percentage=0
percentages.append(percentage)
return percentages
def update(threshold):
axline.set_ydata(threshold)
perc = percentages(threshold)
for bar, p in zip(bars, perc):
bar.set_color(cpick.to_rgba(p))
# update once before showing
update(threshold)
def onMouseMove(event):
if event.inaxes == ax:
update(event.ydata)
fig.canvas.draw_idle()
fig.canvas.mpl_connect('motion_notify_event', onMouseMove)
plt.xticks(range(df.shape[0]), df.index, alpha = 0.8)
plt.show()

Legend in matplotlib

I'm wondering how can I do the following:
I have a DataFrame with points and classes. I'd like to draw all points and use one color for each class. How can I specify how classes refer to colors in the legend?
fig = plt.figure(figsize=(18,10), dpi=1600)
df = pd.DataFrame(dict(points1 = data_plot[:,0], points2 = data_plot[:,1], \
target = target[0:2000]))
colors = {1: 'green', 2:'red', 3:'blue', 4:'yellow', 5:'orange', 6:'pink', \
7:'brown', 8:'black', 9:'white'}
fig, ax = plt.subplots()
ax.scatter(df['points1'], df['points2'], c = df['target'].apply(lambda x: colors[x]))
The easiest way to get your legend to have separate entries for each color (and therefore it's target value) is to create a separate plot object for each target value.
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
x = np.random.rand(100)
y = np.random.rand(100)
target = np.random.randint(1,9, size=100)
df = pd.DataFrame(dict(points1=x, points2=y, target=target))
colors = {1: 'green', 2:'red', 3:'blue', 4:'yellow', 5:'orange', 6:'pink', \
7:'brown', 8:'black', 9:'white'}
fig, ax = plt.subplots()
for k,v in colors.items():
series = df[df['target'] == k]
scat = ax.scatter(series['points1'], series['points2'], c=v, label=k)
plt.legend()

Categories