Is it possible to take a histogram from seaborn and add a normal distribution?
Say I had something like this scatter plot and histogram from the documentation.
import seaborn as sns
penguins = sns.load_dataset("penguins")
sns.jointplot(data=penguins, x="bill_length_mm", y="bill_depth_mm");
plt.savefig('deletethis.png', bbox_inches='tight')
Can i superimpose a distribution on the sides like the image below?
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import norm
x = np.random.normal(size=100000)
# Plot histogram in one-dimension
plt.hist(x,bins=80,density=True)
xvals = np.arange(-4,4,0.01)
plt.plot(xvals, norm.pdf(xvals),label='$N(0,1)$')
plt.legend();
The following gives a Kernel Density Estimate which displays the distribution (and if it is normal):
g = sns.JointGrid(data=penguins, x="bill_length_mm", y="bill_depth_mm")
g.plot_joint(sns.scatterplot, s=100, alpha=.5)
g.plot_marginals(sns.histplot, kde=True)
The following superimposes a normal distribution on the histograms in the axes.
import seaborn as sns
import numpy as np
import pandas as pd
from scipy.stats import norm
df1 = penguins.loc[:,["bill_length_mm", "bill_depth_mm"]]
axs = sns.jointplot("bill_length_mm", "bill_depth_mm", data=df1)
axs.ax_joint.scatter("bill_length_mm", "bill_depth_mm", data=df1, c='r', marker='x')
axs.ax_marg_x.cla()
axs.ax_marg_y.cla()
sns.distplot(df1.bill_length_mm, ax=axs.ax_marg_x, fit=norm)
sns.distplot(df1.bill_depth_mm, ax=axs.ax_marg_y, vertical=True, fit=norm)
Related
I have a heatmap done in seaborn and a contour plotted via matplotlib.pyplot.
Is it possible to overlay the two?
Seaborn uses matplotlib under the hood. You can combine seaborn plots as if they were directly created by matplotlib. To draw onto the same subplot, the same ax should be used. To align the centers of the heatmap cells with the contour lines, you need to add 0.5 to the x and the y coordinates.
Here is an example to get you started:
from matplotlib import pyplot as plt
import seaborn as sns
import numpy as np
from scipy.ndimage.filters import gaussian_filter
data = gaussian_filter(np.random.randn(20, 40), sigma=2)
fig, ax = plt.subplots(figsize=(15, 5))
sns.heatmap(data=data, cbar_kws={'pad': 0.02}, ax=ax)
ax.contour(np.arange(.5, data.shape[1]), np.arange(.5, data.shape[0]), data, colors='yellow')
plt.show()
I want to plot seaborn boxplot with box from min to max, rather than from 2nd to 3rd quartile, can I control it in matplotlib or seaborn? I know I can control the whiskers - how about boxes?
Here is an approach that mimics seaborn's boxplot via a horiontal plot using an aggregated dataframe.
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
# set sns plot style
sns.set()
tips = sns.load_dataset('tips')
fig, (ax1, ax2) = plt.subplots(nrows=2)
sns.boxplot(x='total_bill', y='day', data=tips, ax=ax1)
day_min_max = tips[['day', 'total_bill']].groupby('day').agg(['min', 'max', 'median'])
day_min_max.columns = day_min_max.columns.droplevel(0) # remove the old column name, only leaving 'min' and 'max'
ax2.use_sticky_edges = False
sns.barplot(y=day_min_max.index, x=day_min_max['median'] - day_min_max['min'], left=day_min_max['min'], ec='k', ax=ax2)
sns.barplot(y=day_min_max.index, x=day_min_max['max'] - day_min_max['median'], left=day_min_max['median'], ec='k', ax=ax2)
plt.tight_layout()
plt.show()
Depicting the first and third quartiles is the defining characteristic of a boxplot, so I don't think that this option exists. However, if you want to use the minima and maxima, you are not going to plot any whiskers, and hence you can simply use a barplot instead:
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
data = np.random.rand(10, 3)
sns.barplot(x=np.arange(10), y=data.ptp(axis=1), bottom=data.min(axis=1))
plt.show()
Is there a way to show pair-correlation values with seaborn.pairplot(), as in the example below (created with ggpairs() in R)? I can make the plots using the attached code, but cannot add the correlations. Thanks
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
iris = sns.load_dataset('iris')
g = sns.pairplot(iris, kind='scatter', diag_kind='kde')
# remove upper triangle plots
for i, j in zip(*np.triu_indices_from(g.axes, 1)):
g.axes[i, j].set_visible(False)
plt.show()
If you use PairGrid instead of pairplot, then you can pass a custom function that would calculate the correlation coefficient and display it on the graph:
from scipy.stats import pearsonr
def reg_coef(x,y,label=None,color=None,**kwargs):
ax = plt.gca()
r,p = pearsonr(x,y)
ax.annotate('r = {:.2f}'.format(r), xy=(0.5,0.5), xycoords='axes fraction', ha='center')
ax.set_axis_off()
iris = sns.load_dataset("iris")
g = sns.PairGrid(iris)
g.map_diag(sns.distplot)
g.map_lower(sns.regplot)
g.map_upper(reg_coef)
Hello I am new to Python and to Seaborn. I would just like to set x-limits and y-limits to a Seaborn jointplot. Furthermore I would like to plot this figure without the distribution information above and at the right side of the main plot. How can I do that? I am trying something like this:
import numpy as np
import matplotlib.pyplot as plt from
matplotlib.ticker import NullFormatter
import seaborn as sns
sns.set(style="ticks")
xData = np.random.rand(100,1)*5
yData = np.random.rand(100,1)*10
xlim = [-15 15]
ylim = [-20 20]
g = sns.jointplot(xData, yData, kind="hex", color="b", xlim, ylim)
xlim and ylim should be tuples. hence your code should be:
import numpy as np
import matplotlib.pyplot as plt from
matplotlib.ticker import NullFormatter
import seaborn as sns
sns.set(style="ticks")
xData = np.random.rand(100,1)*5
yData = np.random.rand(100,1)*10
g = sns.jointplot(xData, yData, kind="hex", color="b", xlim = (-15,15), ylim = (-20,20))
The joint axes of a seaborn jointplot of kind="hex" is a matplotlib hexbin plot. Hence you can just call plt.hexbin(xData, yData).
import numpy as np
import matplotlib.pyplot as plt
xData = np.random.rand(100,1)*5
yData = np.random.rand(100,1)*10
xlim = [-5, 10]
ylim = [-5, 15]
plt.hexbin(xData, yData, gridsize=10)
plt.xlim(xlim)
plt.ylim(ylim)
plt.show()
Im making a density plot with matplotlib and I would also like to get rug plot under it. good example to make density plot is here How to create a density plot in matplotlib?
but I couldn't find any good example for rug plot. in R it can be done easly by rug(data).
You can plot markers at each datapoint.
from scipy import stats
import numpy as np
import matplotlib.pyplot as plt
sample = np.hstack((np.random.randn(30), np.random.randn(20)+5))
density = stats.kde.gaussian_kde(sample)
fig, ax = plt.subplots(figsize=(8,4))
x = np.arange(-6,12,0.1)
ax.plot(x, density(x))
ax.plot(sample, [0.01]*len(sample), '|', color='k')
You can find an example here!
ax = fig.add_subplot(111)
ax.plot(x1, np.zeros(x1.shape), 'b+', ms=20) # rug plot
x_eval = np.linspace(-10, 10, num=200)
ax.plot(x_eval, kde1(x_eval), 'k-', label="Scott's Rule")
ax.plot(x_eval, kde1(x_eval), 'r-', label="Silverman's Rule")
Seems to be the core of it!
You can also use Seaborn.distplot, which wraps histogram, KDE and rugs altogether. Figures made by Seaborn are also prettier by default.
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sample = np.hstack((np.random.randn(30), np.random.randn(20)+5))
fig, ax = plt.subplots(figsize=(8,4))
sns.distplot(sample, rug=True, hist=False, rug_kws={"color": "g"},
kde_kws={"color": "k", "lw": 3})
plt.show()
Here's the answer for people just looking for a rugplot to use on a matplotlib axis: you can use a seaborn function.
import seaborn as sns
sns.rugplot(xdata, height=0.025, axis=ax, color='k')
This looks much nicer than a pure-matplotlib kludge because the rug is aligned to (flush with) the x-axis.