How to shade kdeplot under a certain value - python

I would like to shade the area under the kdeplot below the value -1.64.
I drew the kdeplot and a vertical line at -1.64.
How do I fill the triangular area between the kdeplot, the x-axis, and the vertical line?
Also, I would like the height of the orange vertical line to just reach the kdeplot.
fig, ax = plt.subplots(nrows=2, ncols=2,figsize=(12,6), tight_layout=True, sharex=True, sharey=True)
sns.kdeplot(pop_norm.fev1_z, ax=ax[0,0], legend=False)
ax[0,0].axvline(x=pop_norm.fev1_z.mean(), linestyle = '--', alpha =0.5)
ax[0,0].axvline(x=-1.64, linestyle = '-', color = palette[1], alpha =0.5)
ax[0,0].set_xlabel("FEV1 z-score")
ax[0,0].set_ylabel("Population density")
sns.kdeplot(pop_norm.fvc_z, ax=ax[0,1], legend=False)
ax[0,1].axvline(x=pop_norm.fvc_z.mean(), linestyle = '--', alpha =0.5)
ax[0,1].axvline(x=-1.64, linestyle = '-', color = palette[1], alpha =0.5)
ax[0,1].set_xlabel("FVC z-score")
sns.kdeplot(pop_norm.fev1fvc_z, ax=ax[1,0], legend=False)
ax[1,0].axvline(x=pop_norm.fev1fvc_z.mean(), linestyle = '--', alpha =0.5)
ax[1,0].axvline(x=-1.64, linestyle = '-', color = palette[1], alpha =0.5)
ax[1,0].set_xlabel("FEV1/FVC z-score")
ax[1,0].set_ylabel("Population density")
sns.kdeplot(pop_norm.fef2575_z, ax=ax[1,1], legend=False)
ax[1,1].axvline(x=pop_norm.fef2575_z.mean(), linestyle = '--', alpha =0.5)
ax[1,1].axvline(x=-1.64, linestyle = '-', color = palette[1], alpha =0.5)
ax[1,1].set_xlabel("FEF 25-75% z-score")

You can extract the curve of the kde by extracting the x and y data from the last lines element in the subplot.
These x and y can be used both to interpolate the curve height at given x values, and as parameters for fill_between().
Here is an example with one subplot:
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
pop_norm_fev1_z = np.random.normal(-.004, .01, 500).cumsum()
fig, ax = plt.subplots(figsize=(12, 6))
sns.kdeplot(pop_norm_fev1_z, ax=ax, legend=False)
x = ax.lines[-1].get_xdata()
y = ax.lines[-1].get_ydata()
mean = pop_norm_fev1_z.mean()
ax.vlines(mean, 0, np.interp(mean, x, y), linestyle='--', alpha=0.5)
x_special = -1.64
ax.vlines(x_special, 0, np.interp(x_special, x, y), linestyle='-', color='crimson', alpha=0.5)
ax.fill_between(x, 0, y, where=x < x_special, color='gold', alpha=0.3)
plt.show()

Related

Multiple seaborn pointplots not showing the right color on the legend

I have three point plot i'm trying to chart and show a legend. The colors do not match the colors called out in the plots. I tried using the solution from this post, but that did not work.
Here is the code I'm using:
fig, ax = plt.subplots()
a = sns.pointplot(x=l[1:], y = np.exp(model_m.params[1:]), label = 'factor',
ax = ax, color = 'green')
b = sns.pointplot(x=l[1:], y = np.exp(model_m.conf_int()[1:][:,1]),
ax = ax, label = 'conf_int+', color = 'red')
c = sns.pointplot(x=l[1:], y = np.exp(model_m.conf_int()[1:][:,0]),
ax = ax, label = 'conf_int-', color = 'blue')
plt.title('Model M Discrete')
ax.legend(labels = ['factor', 'conf_inf+', 'conf_inf-'],
title = 'legend')
Here is what it produces:
The easiest solution would be to use sns.lineplot instead of sns.pointplot:
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
fig, ax = plt.subplots()
x = np.arange(10)
sns.lineplot(x=x, y=1 + np.random.rand(10).cumsum(),
ax=ax, label='factor', color='green', marker='o')
sns.lineplot(x=x, y=2 + np.random.rand(10).cumsum(),
ax=ax, label='conf_int+', color='red', marker='o')
sns.lineplot(x=x, y=3 + np.random.rand(10).cumsum(),
ax=ax, label='conf_int-', color='blue', marker='o')
ax.set_title('Model M Discrete')
ax.legend(title='legend')
plt.tight_layout()
plt.show()
Another option would be to iterate through the generated "pathCollections" and assign a label (for some reason label= doesn't work in sns.pointplot).
fig, ax = plt.subplots()
sns.pointplot(x=x, y=1 + np.random.rand(10).cumsum(),
ax=ax, color='green')
sns.pointplot(x=x, y=2 + np.random.rand(10).cumsum(),
ax=ax, color='red')
sns.pointplot(x=x, y=3 + np.random.rand(10).cumsum(),
ax=ax, label='conf_int-', color='blue')
for curve, label in zip(ax.collections, ['factor', 'conf_int+', 'conf_int-']):
curve.set_label(label)
ax.set_title('Model M Discrete')
ax.legend(title='legend')
Still another way is to mimic a long form dataframe with hue which automatically creates a legend:
fig, ax = plt.subplots()
x = np.arange(10)
y1 = 1 + np.random.rand(10).cumsum()
y2 = 2 + np.random.rand(10).cumsum()
y3 = 3 + np.random.rand(10).cumsum()
sns.pointplot(x=np.tile(x, 3),
y=np.concatenate([y1, y2, y3]),
hue=np.repeat(['factor', 'conf_int+', 'conf_int-'], len(x)),
ax=ax, palette=['green', 'red', 'blue'])
Note that in both cases only a dot is shown in the legend, not a line.

Plotting a time series with markers colored by weekday and a legend

I have a data frame with index as the date. The columns are different time series and I have added another column to mark which weekday the particular observation belongs to. Something like this:
What I want to do is:
a) plot a time series, say Series 1, add markers to the plot and color them by the weekday. I have got the 2 plots using plt.scatter and plt.plot:
plt.scatter(x = df.index, y = df['Series1'], c = df['day'])
plt.plot(df.index, df['Series1'], marker = 'o')
However, I am unable to add a legend to the first figure. To the second figure I am unable to add either different colored markers or the legend.
Can someone help out.
b) If I can achieve a), then I'd like to plot all three series on the same figure.
Thanks!
Yes this is all possible! So first let's get all three time series on the same plot
import matplotlib.pyplot as plt
plt.figure()
plt.subplot(1,1,1)
line1, = plt.plot(df.index, df['Series1'])
line2, = plt.plot(df.index, df['Series2'])
line3, = plt.plot(df.index, df['Series3'])
plt.scatter(df.index, df['Series1'], c = df['day'], marker = 'o')
plt.scatter(df.index, df['Series2'], c = df['day'], marker = 'v')
plt.scatter(df.index, df['Series3'], c = df['day'], marker = 'x')
plt.legend(handles=[line1, line2, line3])
plt.show()
If you want to make sure that the markers are in front of the lines in order to get a cleaner plot we can use the zorder property.
import matplotlib.pyplot as plt
plt.figure()
plt.subplot(1,1,1)
line1, = plt.plot(df.index, df['Series1'], zorder=1)
line2, = plt.plot(df.index, df['Series2'], zorder=1)
line3, = plt.plot(df.index, df['Series3'], zorder=1)
plt.scatter(df.index, df['Series1'], c = df['day'], marker = 'o', s = 100, zorder=2)
plt.scatter(df.index, df['Series2'], c = df['day'], marker = 'v', s = 100, zorder=2)
plt.scatter(df.index, df['Series3'], c = df['day'], marker = 'x', s = 100, zorder=2)
plt.legend(handles=[line1, line2, line3])
plt.show()
You can show the difference in days using a colorbar
import matplotlib.pyplot as plt
plt.figure()
plt.subplot(1,1,1)
line1, = plt.plot(df.index, df['Series1'], zorder=1)
line2, = plt.plot(df.index, df['Series2'], zorder=1)
line3, = plt.plot(df.index, df['Series3'], zorder=1)
plt.scatter(df.index, df['Series1'], c = df['day'], marker = 'o', s = 100, zorder=2)
plt.scatter(df.index, df['Series2'], c = df['day'], marker = 'v', s = 100, zorder=2)
plt.scatter(df.index, df['Series3'], c = df['day'], marker = 'x', s = 100, zorder=2)
plt.legend(handles=[line1, line2, line3])
plt.colorbar()
plt.show()
If you want the legend to contain the color associated with the different days then you can make a custom colormap and then have a custom legend as
from matplotlib.patches import Patch
from matplotlib.lines import Line2D
import matplotlib.pyplot as plt
# define the colormap
cmap = plt.cm.jet
cmaplist = [cmap(i) for i in range(1,cmap.N,cmap.N//max(df['day']))]
plt.figure()
plt.subplot(1,1,1)
line1, = plt.plot(df.index, df['Series1'], zorder=1)
line2, = plt.plot(df.index, df['Series2'], zorder=1)
line3, = plt.plot(df.index, df['Series3'], zorder=1)
plt.scatter(df.index, df['Series1'], c = df['day'], cmap='jet', marker = 'o', s = 100, zorder=2)
plt.scatter(df.index, df['Series2'], c = df['day'], cmap='jet', marker = 'v', s = 100, zorder=2)
plt.scatter(df.index, df['Series3'], c = df['day'], cmap='jet', marker = 'x', s = 100, zorder=2)
legend_elements = [Line2D([0], [0], color='b', lw=4, label='Line'),
Line2D([0], [0], marker='o', color='w', label='Scatter',
markerfacecolor='g', markersize=15),
Patch(facecolor='orange', edgecolor='r',
label='Color Patch')]
legend_elements = []
for ix, i in enumerate(df['day']):
temp = Line2D([0], [0], color = cmaplist[i-1][0:3], lw=4, label=str(i))
legend_elements.append(temp)
plt.legend(handles=legend_elements)
plt.show()

Make border of Label, bbox or axes.text flush with spines of Graph in python matplotlib

for a certain manuscript i need to position my label of the Graph exactly in the right or left top corner. The label needs a border with the same thickness as the spines of the graph. Currently i do it like this:
import matplotlib.pyplot as plt
import numpy as np
my_dpi=96
xposr_box=0.975
ypos_box=0.94
nrows=3
Mytext="label"
GLOBAL_LINEWIDTH=2
fig, axes = plt.subplots(nrows=nrows, sharex=True, sharey=True, figsize=
(380/my_dpi, 400/my_dpi), dpi=my_dpi)
fig.subplots_adjust(hspace=0.0001)
colors = ('k', 'r', 'b')
for ax, color in zip(axes, colors):
data = np.random.random(1) * np.random.random(10)
ax.plot(data, marker='o', linestyle='none', color=color)
for ax in ['top','bottom','left','right']:
for idata in range(0,nrows):
axes[idata].spines[ax].set_linewidth(GLOBAL_LINEWIDTH)
axes[0].text(xposr_box, ypos_box , Mytext, color='black',fontsize=8,
horizontalalignment='right',verticalalignment='top', transform=axes[0].transAxes,
bbox=dict(facecolor='white', edgecolor='black',linewidth=GLOBAL_LINEWIDTH))
plt.savefig("Label_test.png",format='png', dpi=600,transparent=True)
So i control the position of the box with the parameters:
xposr_box=0.975
ypos_box=0.94
If i change the width of my plot, the position of my box also changes, but it should always have the top and right ( or left) spine directly on top of the graphs spines:
import matplotlib.pyplot as plt
import numpy as np
my_dpi=96
xposr_box=0.975
ypos_box=0.94
nrows=3
Mytext="label"
GLOBAL_LINEWIDTH=2
fig, axes = plt.subplots(nrows=nrows, sharex=True, sharey=True, figsize=
(500/my_dpi, 400/my_dpi), dpi=my_dpi)
fig.subplots_adjust(hspace=0.0001)
colors = ('k', 'r', 'b')
for ax, color in zip(axes, colors):
data = np.random.random(1) * np.random.random(10)
ax.plot(data, marker='o', linestyle='none', color=color)
for ax in ['top','bottom','left','right']:
for idata in range(0,nrows):
axes[idata].spines[ax].set_linewidth(GLOBAL_LINEWIDTH)
axes[0].text(xposr_box, ypos_box , Mytext, color='black',fontsize=8,
horizontalalignment='right',verticalalignment='top',transform=axes[0].transAxes,
bbox=dict(facecolor='white', edgecolor='black',linewidth=GLOBAL_LINEWIDTH))
plt.savefig("Label_test.png",format='png', dpi=600,transparent=True)
This should also be the case if the image is narrower not wider as in this example.I would like to avoid doing this manually. Is there a way to always position it where it should? Independent on the width and height of the plot
and the amount of stacked Graphs?
The problem is that the position of a text element is relative to the text's extent, not to its surrounding box. While it would in principle be possible to calculate the border padding and position the text such that it sits at coordinates (1,1)-borderpadding, this is rather cumbersome since (1,1) is in axes coordinates and borderpadding in figure points.
There is however a nice alternative, using matplotlib.offsetbox.AnchoredText. This creates a textbox which can be placed easily relative the the axes, using the location parameters like a legend, e.g. loc="upper right". Using a zero padding around that text box directly places it on top of the axes spines.
from matplotlib.offsetbox import AnchoredText
txt = AnchoredText("text", loc="upper right", pad=0.4, borderpad=0, )
ax.add_artist(txt)
A complete example:
import matplotlib.pyplot as plt
from matplotlib.offsetbox import AnchoredText
import numpy as np
my_dpi=96
nrows=3
Mytext="label"
plt.rcParams["axes.linewidth"] = 2
plt.rcParams["patch.linewidth"] = 2
fig, axes = plt.subplots(nrows=nrows, sharex=True, sharey=True, figsize=
(500./my_dpi, 400./my_dpi), dpi=my_dpi)
fig.subplots_adjust(hspace=0.0001)
colors = ('k', 'r', 'b')
for ax, color in zip(axes, colors):
data = np.random.random(1) * np.random.random(10)
ax.plot(data, marker='o', linestyle='none', color=color)
txt = AnchoredText(Mytext, loc="upper right",
pad=0.4, borderpad=0, prop={"fontsize":8})
axes[0].add_artist(txt)
plt.show()
In principle you can align text to the Axes spines using Annotations and position them in Axes coordinates (x and y between 0 and 1) using xycoords = 'axes fraction. However, because you use a bbox that bbox will overlap with the spines.
Instead, you can use ax.text together a ScaledTransformation, which, if done right, also positions the text in Axes coordinates and shifts it by a fixed amount. If you provide a pad size to the bbox keyword, you know exactly how much the bbox will overlap with the spines in figure points (1 inch is 72 points), so that the shift is easily calculated. Here a little demonstration how to do this:
from matplotlib import pyplot as plt
import numpy as np
import matplotlib.transforms as transforms
GLOBAL_LINEWIDTH=2
pad = 10
fig,ax = plt.subplots()
x = np.linspace(0,1,20)
ax.plot(x,x**2, 'ro')
offset_bl = transforms.ScaledTranslation(
pad/72, pad/72, fig.dpi_scale_trans,
)
offset_br = transforms.ScaledTranslation(
-pad/72, pad/72, fig.dpi_scale_trans,
)
offset_tl = transforms.ScaledTranslation(
pad/72, -pad/72, fig.dpi_scale_trans,
)
offset_tr = transforms.ScaledTranslation(
-pad/72, -pad/72, fig.dpi_scale_trans,
)
for pos in ['top','bottom','left','right']:
ax.spines[pos].set_linewidth(GLOBAL_LINEWIDTH)
ax.text(
0,0, 'bottom left',
fontsize = 16, fontweight='bold', va='bottom', ha='left',
bbox=dict(
facecolor = 'white', edgecolor='black', lw = GLOBAL_LINEWIDTH,
pad = pad
),
transform=ax.transAxes + offset_bl,
)
ax.text(
1,0, 'bottom right',
fontsize = 16, fontweight='bold', va='bottom', ha='right',
bbox=dict(
facecolor = 'white', edgecolor='black', lw = GLOBAL_LINEWIDTH,
pad = pad
),
transform=ax.transAxes + offset_br,
)
ax.text(
0,1, 'top left',
fontsize = 16, fontweight='bold', va='top', ha='left',
bbox=dict(
facecolor = 'white', edgecolor='black', lw = GLOBAL_LINEWIDTH,
pad = pad
),
transform=ax.transAxes + offset_tl,
)
ax.text(
1,1, 'top right',
fontsize = 16, fontweight='bold', va='top', ha='right',
bbox=dict(
facecolor = 'white', edgecolor='black', lw = GLOBAL_LINEWIDTH,
pad = pad
),
transform=ax.transAxes + offset_tr,
)
plt.show()
And here is the result:

How do I animate this graph so that the dot moves and the green line plots over a different range for each element in loop?

I am trying to make this animated so that the dot and the green line move due to the for loop. This code displays 3 different graphs one below the other. The middle graph has no animation section.
x =lag_range
count = 0
plt.ion()
fig, ax = plt.subplots()
for b in x:
plt.subplot(311)
plt.plot(x,pear_corr, color='b', linewidth=1.5, label ='Pearson')
plt.plot(x,spear_corr, color ='r', linewidth=1.5, label='Spearman')
plt.plot(x[count],pear_corr[count],'yo')
plt.legend()
axes = plt.gca()
plt.ylabel('Correlation coefficients')
plt.xlabel('Lag times /days')
axes.set_xlim([min(lag_list),last])
axes.set_ylim(-1,1)
plt.subplot(312)
plt.plot(x,pear_p_values, color='b', linewidth=1.5)
plt.plot(x,spear_p_values, color ='r', linewidth=1.5)
axes = plt.gca()
plt.ylabel('P values')
plt.xlabel('Lag times /days')
axes.set_xlim([min(lag_list),last])
plt.subplot(313)
ax1 = plt.subplot(313)
x_for_p = range(len(x_prices))
ax1.plot(x_for_p, x_prices, color ='grey', linewidth=1.5)
ax1.set_ylabel('Share price', color ='grey')
ax1.tick_params('y', colors='grey')
ax1.set_xlabel('Days')
axes = plt.gca()
axes.set_xlim([min(lag_list),(2*last)])
ax2 = ax1.twinx()
x_for_den = range(b,(b+len(x_prices)))
ax2.plot(x_for_den, y_planes, color='g', linewidth=1.5)
ax2.set_ylabel('Plane density', color='g')
ax2.tick_params('y', colors='g')
count += 1
plt.pause(2)
plt.draw()
cross_corr2_vis(prices, density_p3)
If you could share a working code or just definitions of variables pear_corr, spear_corr, etc., the following code might have not resulted in this simple animation:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation
N_points = 1000
x = np.linspace(0,2.*np.pi,N_points)
y = np.sin(x)
fig, ax = plt.subplots()
ax.set_xlim([0,2.*np.pi])
ax.set_ylim([-1,1])
line, = ax.plot( [],[], lw=2, color='g')
sctr = ax.scatter([],[], s=100, color='r')
def animate(i):
line.set_ydata(y[:i+1]) # update
line.set_xdata(x[:i+1])
sctr.set_offsets((x[i],y[i]))
return line,sctr
ani = animation.FuncAnimation(fig, animate, N_points, interval=5, blit=True)
plt.show()

Average line for bar chart in matplotlib

How do we draw an average line (horizontal) for a histogram in using matplotlib?
Right now, I'm able to draw the histogram without any issues.
Here is the code I'm using:
## necessary variables
ind = np.arange(N) # the x locations for the groups
width = 0.2 # the width of the bars
plt.tick_params(axis='both', which='major', labelsize=30)
plt.tick_params(axis='both', which='minor', labelsize=30)
ax2 = ax.twinx()
## the bars
rects1 = ax.bar(ind, PAAE1, width,
color='0.2',
error_kw=dict(elinewidth=2,ecolor='red'),
label='PAAE1')
rects2 = ax.bar(ind+width, PAAE2, width,
color='0.3',
error_kw=dict(elinewidth=2,ecolor='black'),
label='PAAE2')
rects3 = ax2.bar(ind+width+width, AAE1, width,
color='0.4',
error_kw=dict(elinewidth=2,ecolor='red'),
label='AAE1')
rects4 = ax2.bar(ind+3*width, AAE2, width,
color='0.5',
error_kw=dict(elinewidth=2,ecolor='black'),
label='AAE3')
maxi = max(dataset[2])
maxi1 = max(dataset[4])
f_max = max(maxi, maxi1)
lns = [rects1,rects2,rects3,rects4]
labs = [l.get_label() for l in lns]
ax.legend(lns, labs, loc='upper center', ncol=4)
# axes and labels
ax.set_xlim(-width,len(ind)+width)
ax.set_ylim(0, 100)
ax.set_ylabel('PAAE', fontsize=25)
ax2.set_ylim(0, f_max+500)
ax2.set_ylabel('AAE (mW)', fontsize=25)
xTickMarks = dataset[0]
ax.set_xticks(ind+width)
xtickNames = ax.set_xticklabels(xTickMarks)
plt.setp(xtickNames, rotation=90, fontsize=25)
I want to plot the average line for PAAE 1, 2 and AAE 1, 2.
What should I be using to plot the average line?
If you'd like a vertical line to denote the mean use axvline(x_value). This will place a vertical line that always spans the full (or specified fraction of) y-axis. There's also axhline for horizontal lines.
In other works, you might have something like this:
ax.axvline(data1.mean(), color='blue', linewidth=2)
ax.axvline(data2.mean(), color='green', linewidth=2)
As a more complete, but unnecessarily complex example (most of this is nicely annotating the means with curved arrows):
import numpy as np
import matplotlib.pyplot as plt
data1 = np.random.normal(0, 1, 1000)
data2 = np.random.normal(-2, 1.5, 1000)
fig, ax = plt.subplots()
bins = np.linspace(-10, 5, 50)
ax.hist(data1, bins=bins, color='blue', label='Dataset 1',
alpha=0.5, histtype='stepfilled')
ax.hist(data2, bins=bins, color='green', label='Dataset 2',
alpha=0.5, histtype='stepfilled')
ax.axvline(data1.mean(), color='blue', linewidth=2)
ax.axvline(data2.mean(), color='green', linewidth=2)
# Add arrows annotating the means:
for dat, xoff in zip([data1, data2], [15, -15]):
x0 = dat.mean()
align = 'left' if xoff > 0 else 'right'
ax.annotate('Mean: {:0.2f}'.format(x0), xy=(x0, 1), xytext=(xoff, 15),
xycoords=('data', 'axes fraction'), textcoords='offset points',
horizontalalignment=align, verticalalignment='center',
arrowprops=dict(arrowstyle='-|>', fc='black', shrinkA=0, shrinkB=0,
connectionstyle='angle,angleA=0,angleB=90,rad=10'),
)
ax.legend(loc='upper left')
ax.margins(0.05)
plt.show()

Categories