Different shading under Seaborn Distplot - python

I'm trying to create plot with shadings which are based on this MIC(1) line.
Different shading above than beneath.
from scipy import stats
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
def createSkewDist(mean, sd, skew, size):
# calculate the degrees of freedom 1 required to obtain the specific skewness statistic, derived from simulations
loglog_slope=-2.211897875506251
loglog_intercept=1.002555437670879
df2=500
df1 = 10**(loglog_slope*np.log10(abs(skew)) + loglog_intercept)
# sample from F distribution
fsample = np.sort(stats.f(df1, df2).rvs(size=size))
# adjust the variance by scaling the distance from each point to the distribution mean by a constant, derived from simulations
k1_slope = 0.5670830069364579
k1_intercept = -0.09239985798819927
k2_slope = 0.5823114978219056
k2_intercept = -0.11748300123471256
scaling_slope = abs(skew)*k1_slope + k1_intercept
scaling_intercept = abs(skew)*k2_slope + k2_intercept
scale_factor = (sd - scaling_intercept)/scaling_slope
new_dist = (fsample - np.mean(fsample))*scale_factor + fsample
# flip the distribution if specified skew is negative
if skew < 0:
new_dist = np.mean(new_dist) - new_dist
# adjust the distribution mean to the specified value
final_dist = new_dist + (mean - np.mean(new_dist))
return final_dist
desired_mean = 30
desired_skew = 1.5
desired_sd = 20
final_dist = createSkewDist(mean=desired_mean, sd=desired_sd, skew=desired_skew, size=1000000)
# inspect the plots & moments, try random sample
fig, ax = plt.subplots(figsize=(12,7))
sns.distplot(final_dist,
hist=False,
ax=ax,
color='darkred',
kde_kws=dict(linewidth=4))
l1 = ax.lines[0]
# Get the xy data from the lines so that we can shade
x1 = l1.get_xydata()[:,0]
x1[0] = 0
y1 = l1.get_xydata()[:,1]
y1[0] = 0
ax.fill_between(x1,y1, color="lemonchiffon", alpha=0.3)
ax.set_ylim(0.0001,0.03)
ax.axhline(0.002, ls="--")
ax.set_xlim(1.5, 200)
ax.set_yticklabels([])
ax.set_xticklabels([])
trans = transforms.blended_transform_factory(
ax.get_yticklabels()[0].get_transform(), ax.transData)
ax.text(0,0.0025, "{}".format("MIC(1) = 1"), color="blue", transform=trans,
ha="right", va="top", fontsize = 12)
trans_2 = transforms.blended_transform_factory(
ax.get_xticklabels()[0].get_transform(), ax.transData)
ax.text(84,0, "{}".format("\n84"), color="darkred", transform=trans_2,
ha="center", va="top", fontsize = 12)
ax.text(1.5,0, "{}".format("\n0"), color="darkred", transform=trans_2,
ha="center", va="top", fontsize = 12)
ax.axvline(x = 84, ymin = 0, ymax = 0.03, ls = '--', color = 'darkred' )
ax.set_yticks([])
ax.set_xticks([])
ax.spines['top'].set_color(None)
ax.spines['right'].set_color(None)
ax.spines['left'].set_linewidth(2)
ax.spines['bottom'].set_linewidth(2)
ax.set_ylabel("Concentration [mg/L]", labelpad = 80, fontsize = 15)
ax.set_xlabel("Time [h]", labelpad = 80, fontsize = 15)
ax.set_title("AUC/MIC", fontsize = 20, pad = 30)
plt.annotate("AUC/MIC",
xy=(18, 0.02),
xytext=(18, 0.03),
arrowprops=dict(arrowstyle="->"), fontsize = 12);
;
That's what I have:
And that's what I'd like to have (it's done in paint, so forgive me :) ):
I was experimenting with fill_between and fill_betweenx. However, without any satisfying results. Definitely, run out of ideas. I'd really appreciate any help on this. Best wishes!

Your fill_between works as expected. The problem is that color="lemonchiffon" with alpha=0.3 is barely visible. Try to use a brighter color and/or a higher value for alpha.
So, this colors the part of the graph between zero and the kde curve.
Now, to create a different coloring above and below the horizontal line, where= and np.minimum can be used in fill_between:
pos_hline = 0.002
ax.fill_between(x1, pos_hline, y1, color="yellow", alpha=0.3, where=y1 > pos_hline)
ax.fill_between(x1, 0, np.minimum(y1, pos_hline), color="blue", alpha=0.3)
Without where=y1 > pos_hline, fill_between would also color the region above the curve where the curve falls below that horizontal line.
PS: Note that sns.histplot has been deprecated since Seaborn version 0.11. To only plot the kde curve, you can use sns.kdeplot:
sns.kdeplot(final_dist, ax=ax, color='darkred', linewidth=4)

Related

plt.subplots does not correctly draw sns.lineplot [duplicate]

This question already has answers here:
What is the difference between drawing plots using plot, axes or figure in matplotlib?
(2 answers)
How to add a title to each subplot
(10 answers)
Closed 11 months ago.
I have the following code:
df = sns.load_dataset('titanic')
# Data
data = df[df.age.notna()].age
# Fit a normal distribution to the data:
mu, std = scipy.stats.norm.fit(data)
# bin formulas
bin_f = {'sturges' : 1 + math.log(len(df), 2)}
# Plot the histogram.
sns.histplot( data = data, stat='density', bins=int(bin_f['sturges']), alpha=0.6, color='g', kde = True, legend = True)
# Plot the PDF.
xmin, xmax = plt.xlim()
x = np.linspace(xmin, xmax, 1000)
p = scipy.stats.norm.pdf(x, mu, std)
sns.lineplot(x = x, y = p, color = 'black', linewidth=2)
title = f"Fit results: mu = {round(mu, 2)}, std ={round(std, 2)} "
plt.title(title)
Which produces this plot:
When I try to produce it in a subplot it wont work as expected:
f, ax = plt.subplots(nrows = 1, ncols = 2, figsize=(15, 8))
# Data
data = df[df.age.notna()].age
# Fit a normal distribution to the data:
mu, std = scipy.stats.norm.fit(data)
# bin formulas
bin_f = {'sturges' : 1 + math.log(len(df), 2)}
# Plot the histogram.
sns.histplot(ax = ax[0], data = data, stat='density', bins=int(bin_f['sturges']), alpha=0.4, color='g', kde = True, legend = True)
# Plot the PDF.
xmin, xmax = plt.xlim()
x = np.linspace(xmin, xmax, 1000)
p = scipy.stats.norm.pdf(x, mu, std)
sns.lineplot(x = x, y = p, color = 'black', linewidth=2, ax=ax[0])
title = f"Fit results: mu = {round(mu, 2)}, std ={round(std, 2)} "
plt.title(title)
For some reason the title is only for a second plot and the previously plotted lineplot ( the black one ) is only a small tick in a second plot rather than a normal curve as in the first image. I am not sure why this is happening as the only difference is just using plt.subplots and referencing ax, where is my mistake?
My goal is to have the first graph as seen in the first picture as a the first subplot in the second plot.

Render y-axis properly when overlaying pandas KDE and histogram

Similar questions to this have been asked before but not using these exact two plotting functions together so here we are:
I have a column from a pandas DataFrame that I am plotting both a histogram and the KDE. However, when I plot them, the y-axis is using the raw data value range instead of discrete number of samples/bin (what I want). How can I fix this? The actual plot is perfect, but the y-axis is wrong.
Data:
t2 = [140547476703.0, 113395471484.0, 158360225172.0, 105497674121.0, 186457736557.0, 153705359063.0, 36826568371.0, 200653068740.0, 190761317478.0, 126529980843.0, 98776029557.0, 132773701862.0, 14780432449.0, 167507656251.0, 121353262386.0, 136377019007.0, 134190768743.0, 218619462126.0, 07912778721.0, 215628911255.0, 147024833865.0, 94136343562.0, 135685803096.0, 165901502129.0, 45476074790.0, 125195690010.0, 113910844263.0, 123134290987.0, 112028565305.0, 93448218430.0, 07341012378.0, 93146854494.0, 132958913610.0, 102326700019.0, 196826471714.0, 122045354980.0, 76591131961.0, 134694468251.0, 120212625727.0, 108456858852.0, 106363042112.0, 193367024628.0, 39578667378.0, 178075400604.0, 155513974664.0, 132834624567.0, 137336282646.0, 125379267464.0]
Code:
fig = plt.figure()
# plot hist + kde
t2[t2.columns[0]].plot.kde(color = "maroon", label = "_nolegend_")
t2[t2.columns[0]].plot.hist(density = True, edgecolor = "grey", color = "tomato", title = t2.columns[0])
# plot mean/stdev
m = t2[t2.columns[0]].mean()
stdev = t2[t2.columns[0]].std()
plt.axvline(m, color = "black", ymax = 0.05, label = "mean")
plt.axvline(m-2*stdev, color = "black", ymax = 0.05, linestyle = ":", label = "+/- 2*Stdev")
plt.axvline(m+2*stdev, color = "black", ymax = 0.05, linestyle = ":")
plt.legend()
What it looks like now:
If you want the real counts, the you'll need to scale the KDE up by the width of the bins multiplied by the number of observations. The trickiest part is accessing the data pandas uses to plot the KDE. (I've removed parts related to the legend to simplify the problem at hand).
import matplotlib.pyplot as plt
import numpy as np
# Calculate KDE, get data
axis = t2[t2.columns[0]].plot.kde(color = "maroon", label = "_nolegend_")
xdata = axis.get_children()[0]._x
ydata = axis.get_children()[0]._y
plt.clf()
# Real figure
fig, ax = plt.subplots(figsize=(7,5))
# Plot Histogram, no density.
x = ax.hist(t2[t2.columns[0]], edgecolor = "grey", color = "tomato")
# size of the bins * N obs
scale = np.diff(x[1])[0]*len(t2)
# Plot scaled KDE
ax.plot(xdata, ydata*scale, color='blue')
ax.set_ylabel('N observations')
plt.show()

How to draw the normal distribution of a barplot with log x axis?

I'd like to draw a lognormal distribution of a given bar plot.
Here's the code
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
import numpy as np; np.random.seed(1)
import scipy.stats as stats
import math
inter = 33
x = np.logspace(-2, 1, num=3*inter+1)
yaxis = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.01,0.03,0.3,0.75,1.24,1.72,2.2,3.1,3.9,
4.3,4.9,5.3,5.6,5.87,5.96,6.01,5.83,5.42,4.97,4.60,4.15,3.66,3.07,2.58,2.19,1.90,1.54,1.24,1.08,0.85,0.73,
0.84,0.59,0.55,0.53,0.48,0.35,0.29,0.15,0.15,0.14,0.12,0.14,0.15,0.05,0.05,0.05,0.04,0.03,0.03,0.03, 0.02,
0.02,0.03,0.01,0.01,0.01,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0,0]
fig, ax = plt.subplots()
ax.bar(x[:-1], yaxis, width=np.diff(x), align="center", ec='k', color='w')
ax.set_xscale('log')
plt.xlabel('Diameter (mm)', fontsize='12')
plt.ylabel('Percentage of Total Particles (%)', fontsize='12')
plt.ylim(0,8)
plt.xlim(0.01, 10)
fig.set_size_inches(12, 12)
plt.savefig("Test.png", dpi=300, bbox_inches='tight')
Resulting plot:
What I'm trying to do is to draw the Probability Density Function exactly like the one shown in red in the graph below:
An idea is to convert everything to logspace, with u = log10(x). Then draw the density histogram in there. And also calculate a kde in the same space. Everything gets drawn as y versus u. When we have u at a top twin axes, x can stay at the bottom. Both axes get aligned by setting the same xlims, but converted to logspace on the top axis. The top axis can be hidden to get the desired result.
import matplotlib.pyplot as plt
import numpy as np
from scipy import stats
inter = 33
u = np.linspace(-2, 1, num=3*inter+1)
x = 10**u
us = np.linspace(u[0], u[-1], 500)
yaxis = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.01,0.03,0.3,0.75,1.24,1.72,2.2,3.1,3.9,
4.3,4.9,5.3,5.6,5.87,5.96,6.01,5.83,5.42,4.97,4.60,4.15,3.66,3.07,2.58,2.19,1.90,1.54,1.24,1.08,0.85,0.73,
0.84,0.59,0.55,0.53,0.48,0.35,0.29,0.15,0.15,0.14,0.12,0.14,0.15,0.05,0.05,0.05,0.04,0.03,0.03,0.03, 0.02,
0.02,0.03,0.01,0.01,0.01,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0,0]
yaxis = np.array(yaxis)
# reconstruct data from the given frequencies
u_data = np.repeat((u[:-1] + u[1:]) / 2, (yaxis * 100).astype(np.int))
kde = stats.gaussian_kde((u[:-1]+u[1:])/2, weights=yaxis, bw_method=0.2)
total_area = (np.diff(u)*yaxis).sum() # total area of all bars; divide by this area to normalize
fig, ax = plt.subplots()
ax2 = ax.twiny()
ax2.bar(u[:-1], yaxis, width=np.diff(u), align="edge", ec='k', color='w', label='frequencies')
ax2.plot(us, total_area*kde(us), color='crimson', label='kde')
ax2.plot(us, total_area * stats.norm.pdf(us, u_data.mean(), u_data.std()), color='dodgerblue', label='lognormal')
ax2.legend()
ax.set_xscale('log')
ax.set_xlabel('Diameter (mm)', fontsize='12')
ax.set_ylabel('Percentage of Total Particles (%)', fontsize='12')
ax.set_ylim(0,8)
xlim = np.array([0.01,10])
ax.set_xlim(xlim)
ax2.set_xlim(np.log10(xlim))
ax2.set_xticks([]) # hide the ticks at the top
plt.tight_layout()
plt.show()
PS: Apparently this also can be achieved directly without explicitly using u (at the cost of being slightly more cryptic):
x = np.logspace(-2, 1, num=3*inter+1)
xs = np.logspace(-2, 1, 500)
total_area = (np.diff(np.log10(x))*yaxis).sum() # total area of all bars; divide by this area to normalize
kde = gaussian_kde((np.log10(x[:-1])+np.log10(x[1:]))/2, weights=yaxis, bw_method=0.2)
ax.bar(x[:-1], yaxis, width=np.diff(x), align="edge", ec='k', color='w')
ax.plot(xs, total_area*kde(np.log10(xs)), color='crimson')
ax.set_xscale('log')
Note that the bandwidth set for gaussian_kde is a somewhat arbitrarily value. Larger values give a more equalized curve, smaller values keep closer to the data. Some experimentation can help.

How to I fill the central 95% confidence interval of a matplotlib histogram?

I am able to make a matplotlib histogram no problem. However, I'm wondering if it's possible to use something like fillbetween to change the fill color of the central 95% CI of my data.
I can only get fillbetween to work when if I use a trick with a numpy histogram and bincenters. i.e.:
bins = np.linspace(-a.max(),a.max(),400)
hist = np.histogram(a,bins = bins)[0]
bincenters = 0.5*(bins[1:] + bins[:-1])
b = plt.plot(bincenters,hist, linestyle = 'None')
plt.fill_between(bincenters,hist, color = '#7f7f7f')
plt.fill_between(bincenters, hist, interpolate=False,
where=((bincenters>=lower_p) & (bincenters<=upper_p)), hatch = '...', facecolor = '#7f7f7f')```
Here's my existing code that I'd rather use to create the matplotlib histogram (which I think looks better) with some extras plotting on top:
#Create Histogram
axes[1] = boota.plot.hist(ax = axes[1],bins = 50, legend = None, histtype = 'bar', color = '#7f7f7f')
axes[1].set_xlabel('Spatial Decay Rate (α)', size = 16, fontweight = 'bold')
axes[1].set_ylabel('Frequency', labelpad = 11, size = 16, fontweight = 'bold')
#Ticklabels
axes[0].tick_params(labelsize = 14)
axes[1].tick_params(labelsize = 14)
#draw vertical line at remote powerlaw (rem_a)
rem_a = 0.649
axes[1].axvline(x=rem_a, color='k', linestyle='dashed', linewidth=1.5, label='remote decay \nrate $α_r$ = 0.649')
legend = axes[1].legend(ncol = 1, loc = 'upper left', fontsize='large')
legend.draw_frame(False)
at2 = AnchoredText("B",prop=dict(size=20), loc='upper right',frameon=False)
axes[1].add_artist(at2)
Check out fill_betweenx which I think is better fit here
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm
arr = np.random.normal(size=500)
ci = norm(*norm.fit(arr)).interval(0.95) # fit a normal distribution and get 95% c.i.
height, bins, patches = plt.hist(arr, alpha=0.3)
plt.fill_betweenx([0, height.max()], ci[0], ci[1], color='g', alpha=0.1) # Mark between 0 and the highest bar in the histogram

Loop to create subplot /Python

i have a little problem to create a subplot loop.
The following code show my result for one plot.... So it starts with a dayloop than with a hour loop (8 timesteps).
If i run the code i get a nice QUiver plot with the colorbar.
for dd in range(1,15):
day=str(dd)
readfile=fns[files_indizes[dd]]
if dd < 10:
nc_u_comp = NetCDFFile(ROOT+u_comp1+'0'+day+comp)
nc_v_comp = NetCDFFile(ROOT+v_comp1+'0'+day+comp)
else:
nc_u_comp = NetCDFFile(ROOT+u_comp1+day+comp)
nc_v_comp = NetCDFFile(ROOT+v_comp1+day+comp)
time = nc_u_comp.variables['time'][:]
index=readfile.find(comp)
index=index+len(comp)
date=readfile[index-14:index-6]
plt.clf()
for tt in range(0,len(time)):
if tt < 10:
h =str(0)+str(tt)
else:
h=str(tt)
varU=nc_u_comp.variables['u10'][tt,:,:]
varV=nc_v_comp.variables['v10'][tt,:,:]
lat = nc_u_comp.variables['latitude'][:]
lon = nc_u_comp.variables['longitude'][:]
plt.rcParams["figure.figsize"] = [10,10]
#plane projection of the world
#map with box size (defintion on the top)
box = sgeom.box(minx=llcrnrlon, maxx=urcrnrlon, miny=llcrnrlat, maxy=urcrnrlat)
x0, y0, x1, y1 = box.bounds
#Map plot. The middel of the map is central_longitude
#proj = ccrs.PlateCarree(central_longitude=0)
proj=ccrs.PlateCarree()
#Change middelpoint of the map
box_proj = ccrs.PlateCarree(central_longitude=0)
ax2 = plt.axes(projection=proj)
ax2.set_extent([x0, x1, y0, y1], box_proj)
ax2.add_feature(cartopy.feature.BORDERS, linestyle='-', alpha=.5)
ax2.coastlines(resolution='50m')
#Definition of the scale_bar
gl = ax2.gridlines(ccrs.PlateCarree(), \
linestyle='--', alpha=1, linewidth=0.5, draw_labels=True)
gl.xlabels_top = False
gl.ylabels_right = False
gl.xformatter = LONGITUDE_FORMATTER
gl.yformatter = LATITUDE_FORMATTER
magnitude = (varU ** 2 + varV ** 2) ** 0.5
strm =plt.streamplot(lon , lat , varU, varV, linewidth=2, density=2, color=magnitude)
cbar= plt.colorbar()
cbar.set_label('$m/s$')
name='Wind in 10 m '+ date + h+' UTC'
ax2.set_aspect('auto')
plt.title(name, y=1)
Now i want to create an 2x4 Subplot array with a colorbar allocate to the complete Subplot array.
I find some infromation in the internet, but it doesn't run with my code. Maybe someone can help me?
This shows how to plot an array of simple Cartopy maps in 4 rows 2 columns. Also shows how to plot a colorbar to accompany the maps array. Hope it helps.
import numpy as np
import cartopy.crs as ccrs
import matplotlib.pyplot as plt
import matplotlib as mpl
# create figure with figsize big enough to accomodate all maps, labels, etc.
fig = plt.figure(figsize=(8, 10), tight_layout=False)
# define plot array's arrangement
columns = 2
rows = 4
# set projection to use
projex = ccrs.PlateCarree()
# set the colormap and norm for
# the colorbar to use
cmap1 = mpl.cm.magma
norm1 = mpl.colors.Normalize(vmin=0, vmax=100)
def plotmymap(axs):
# your plot specs of each map should replace this
img = np.random.randint(100, size=(15, 30)) # 2d array of random values (1-100)
# render image on current axis
plims = plt.imshow(img, extent=[-180,180,-90,90], alpha=0.5, cmap=cmap1, norm=norm1)
axs.set_global()
axs.coastlines()
# add title to the map
axs.set_title("Map_"+str(i))
return plims # for use by colorbar
for i in range(1, columns*rows +1):
# add a subplot into the array of plots
ax = fig.add_subplot(rows, columns, i, projection=projex)
plims = plotmymap(ax) # a simple maps is created on subplot
# add a subplot for vertical colorbar
bottom, top = 0.1, 0.9
left, right = 0.1, 0.8
fig.subplots_adjust(top=top, bottom=bottom, left=left, right=right, hspace=0.15, wspace=0.25)
cbar_ax = fig.add_axes([0.85, bottom, 0.05, top-bottom])
fig.colorbar(plims, cax=cbar_ax) # plot colorbar
plt.show() # this plot all the maps
The resulting plots:

Categories