scatterplot matrix with marginal probability distributions in seaborn - python

It is straightfoward to do scatter plot matrices with seaborn pairplot. Jointplot also allows combining scatter plots with marginal probability distributions for a single plot.
Although the option diag_kind='kde' let you plot the probability distributions in the diagonal (useful when x_varsand y_vars are the same) I want to combine both to have marginal probability distributions in a matrix scatter plot. Something like this:
How do I get marginal probability distributions in a matrix scatter plot in seaborn as shown in my screenshot above?

Many thanks mwaskom for the guiding.
As you suggested, I built my own matplotlib figure and plotted the seaborn plots there guided by this piece of documentation.
def basic_conf(f,a,xin,yin,x,y):
ax = f.add_subplot(a)
ax.tick_params(axis='both', which='major', labelsize=10)
ax.spines["right"].set_visible(False)
ax.spines["top"].set_visible(False)
if xin !=0:
ax.set_yticklabels([])
ax.set_ylabel(" ",fontsize=0).set_visible(False)
ax.set_ylabel(y,fontsize=10)
ax.set_xticklabels([])
ax.set_xlabel(" ",fontsize=0).set_visible(False)
return ax
def xhist_conf(f,a,x):
ax = f.add_subplot(a)
ax.spines["right"].set_visible(False)
ax.spines["left"].set_visible(False)
ax.spines["top"].set_visible(False)
ax.set_yticklabels([])
ax.yaxis.set_ticks_position('none')
ax.set_xlabel(x,fontsize=10)
ax.set_ylabel(" ").set_visible(False)#,fontsize='xx-small'
return ax
def yhist_conf(f,a,y):
ax = f.add_subplot(a)
ax.tick_params(axis='both', which='major', labelsize=10)
ax.spines["right"].set_visible(False)
ax.spines["top"].set_visible(False)
ax.spines["bottom"].set_visible(False)
ax.set_xticklabels([])
ax.set_yticklabels([])
ax.xaxis.set_ticks_position('none')
ax.set_xlabel(" ",fontsize='xx-small').set_visible(False)
ax.set_ylabel(" ",fontsize=0).set_visible(False)
return ax
def includer(ax,x,y):
r,_=stats.pearsonr(concat_convert[x],concat_convert[y])
ax.text(0.1, 0.9, f'ρ = {r:.2f}', transform=ax.transAxes)#,fontsize='xx-small'
x_vars=["$P_{LA}$", "$R^{Ao}_P$", "$C^{Ao}_P$", "$R^{Ao}_S$", "$B_{VAD}$", "$A_{VAD}$", "HR", "EF"]
y_vars=["${Q}^{avg}_{M}$", "${Q}^{max}_{M}$","${Q}^{avg}_{Ao}$", "${Q}^{max}_{Ao}$", "${Q}^{avg}_{VAD}$", "${Q}^{max}_{VAD}$", "$Q_{RAT}$"]
sns.set(context="paper",font_scale=1.75,style="ticks")
f = plt.figure(figsize=(18, 16), dpi=600)
gs = f.add_gridspec(8, 9)
plt.rcParams['font.size'] = '10'
plt.rcParams['xtick.labelsize']='8'
with sns.axes_style("ticks"):
xin=0
for x in x_vars:
yin=0
for y in y_vars:
ax = basic_conf(f,gs[yin,xin],xin,yin,x,y)
sns.regplot(ax=ax, data=concat_convert, x=x, y=y, scatter_kws={'s':4})
includer(ax,x,y)
yin=yin+1
xin=xin+1
xin=0
for x in x_vars:
ax = xhist_conf(f,gs[yin,xin],x)
sns.histplot(ax=ax, data=concat_convert, x=x, kde=True)
xin=xin+1
yin=0
for y in y_vars:
ax = yhist_conf(f,gs[yin,xin],y)
sns.histplot(ax=ax, data=concat_convert, y=y, kde=True)
yin=yin+1
for i in range(len(y_vars)):
ax = f.add_subplot(gs[i,2])
ax.set_xlim((0.001,0.0014))
ax = f.add_subplot(gs[len(y_vars),0])
ax.ticklabel_format(style='sci',scilimits=(0,0), axis='x')
ax = f.add_subplot(gs[len(y_vars),5])
ax.ticklabel_format(style='sci',scilimits=(0,0), axis='x')
And it get me exactly what I want:
Many thanks.
EDIT: Final code snippet and obtained plot.

Related

Combine Binned barplot with lineplot

I'd like to represent two datasets on the same plot, one as a line as one as a binned barplot. I can do each individually:
tobar = pd.melt(pd.DataFrame(np.random.randn(1000).cumsum()))
tobar["bins"] = pd.qcut(tobar.index, 20)
bp = sns.barplot(data=tobar, x="bins", y="value")
toline = pd.melt(pd.DataFrame(np.random.randn(1000).cumsum()))
lp = sns.lineplot(data=toline, x=toline.index, y="value")
But when I try to combine them, of course the x axis gets messed up:
fig, ax = plt.subplots()
ax2 = ax.twinx()
bp = sns.barplot(data=tobar, x="bins", y="value", ax=ax)
lp = sns.lineplot(data=toline, x=toline.index, y="value", ax=ax2)
bp.set(xlabel=None)
I also can't seem to get rid of the bin labels.
How can I get these two informations on the one plot?
This answer explains why it's better to plot the bars with matplotlib.axes.Axes.bar instead of sns.barplot or pandas.DataFrame.bar.
In short, the xtick locations correspond to the actual numeric value of the label, whereas the xticks for seaborn and pandas are 0 indexed, and don't correspond to the numeric value.
This answer shows how to add bar labels.
ax2 = ax.twinx() can be used for the line plot if needed
Works the same if the line plot is different data.
Tested in python 3.11, pandas 1.5.2, matplotlib 3.6.2, seaborn 0.12.1
Imports and DataFrame
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
# test data
np.random.seed(2022)
df = pd.melt(pd.DataFrame(np.random.randn(1000).cumsum()))
# create the bins
df["bins"] = pd.qcut(df.index, 20)
# add a column for the mid point of the interval
df['mid'] = df.bins.apply(lambda row: row.mid.round().astype(int))
# pivot the dataframe to calculate the mean of each interval
pt = df.pivot_table(index='mid', values='value', aggfunc='mean').reset_index()
Plot 1
# create the figure
fig, ax = plt.subplots(figsize=(30, 7))
# add a horizontal line at y=0
ax.axhline(0, color='black')
# add the bar plot
ax.bar(data=pt, x='mid', height='value', width=4, alpha=0.5)
# set the labels on the xticks - if desired
ax.set_xticks(ticks=pt.mid, labels=pt.mid)
# add the intervals as labels on the bars - if desired
ax.bar_label(ax.containers[0], labels=df.bins.unique(), weight='bold')
# add the line plot
_ = sns.lineplot(data=df, x=df.index, y="value", ax=ax, color='tab:orange')
Plot 2
fig, ax = plt.subplots(figsize=(30, 7))
ax.axhline(0, color='black')
ax.bar(data=pt, x='mid', height='value', width=4, alpha=0.5)
ax.set_xticks(ticks=pt.mid, labels=df.bins.unique(), rotation=45)
ax.bar_label(ax.containers[0], weight='bold')
_ = sns.lineplot(data=df, x=df.index, y="value", ax=ax, color='tab:orange')
Plot 3
The bar width is the width of the interval
fig, ax = plt.subplots(figsize=(30, 7))
ax.axhline(0, color='black')
ax.bar(data=pt, x='mid', height='value', width=50, alpha=0.5, ec='k')
ax.set_xticks(ticks=pt.mid, labels=df.bins.unique(), rotation=45)
ax.bar_label(ax.containers[0], weight='bold')
_ = sns.lineplot(data=df, x=df.index, y="value", ax=ax, color='tab:orange')

How the number of grid lines can be increased in seaborn?

I have such a piece of code for plotting:
sns.set_style("darkgrid")
fig, ax = plt.subplots(1, 1)
x = np.arange(10)
ax.plot(x, x)
And it gives me:
How the number of grid lines can be increased in seaborn, to make it denser?
Based on this question : add minor gridlines to matplotlib plot using seaborn, you can do it like that.
sns.set_style("darkgrid")
fig, ax = plt.subplots(1, 1)
x = np.arange(10)
ax.plot(x, x)
ax.get_xaxis().set_minor_locator(mpl.ticker.AutoMinorLocator())
ax.get_yaxis().set_minor_locator(mpl.ticker.AutoMinorLocator())
ax.grid(b=True, which='major', color='w', linewidth=1.0)
ax.grid(b=True, which='minor', color='w', linewidth=0.5)
You obtain this figure :

How to display axis tick labels over plotted values using matplotlib and seaborn?

I am using matplotlib and seaborn in order to scatter plot some data contained in two arrays, x and y, (2-dimensional plot). The issue here is when it comes to displaying both axis labels over the data, because plotted data overlaps the values so the are unseen.
I have tried different possibilities such as resetting the labels after the plot is done and setting them later, or using annotation marks. Anyways any of those options worked for me...
The piece of code I am using to generate this scatter plot is:
sns.set_style("whitegrid")
ax = sns.scatterplot(x=x, y=y, s=125)
ax.set_xlim(-20, 20)
ax.set_ylim(-20, 20)
ax.spines['left'].set_position('zero')
ax.spines['left'].set_color('black')
ax.spines['right'].set_color('none')
ax.yaxis.tick_left()
ax.spines['bottom'].set_position('zero')
ax.spines['bottom'].set_color('black')
ax.spines['top'].set_color('none')
ax.xaxis.tick_bottom()
values = ax.get_xticks()
ax.set_xticklabels(["{0:.0%}".format(x/100) for x in values])
values = ax.get_yticks()
ax.set_yticklabels(["{0:.0%}".format(y/100) for y in values])
ax.tick_params(axis='both', which='major', labelsize=15)
ax.grid(True)
The generated plot is as follows:
But the desired output should be something like this:
Thank you in advance for any advice or help!
You need two things: zorder and a bold weight for the ticklabels. The zorder of scatter points needs to be lower than that of the tick labels so that the latter appear on the top. 'fontweight': 'bold' is to have boldfaced tick labels.
The axis appears shifted off the 0 but that is because you did not provide any data. So I have to choose some random data
# import commands here
x = np.random.randint(-100, 100, 10000)
y = np.random.randint(-100, 100, 10000)
ax = sns.scatterplot(x=x, y=y, s=125, zorder=-1)
# Rest of the code
values = ax.get_xticks()
ax.set_xticklabels(["{0:.0%}".format(x/100) for x in values], fontdict={'fontweight': 'bold'})
values = ax.get_yticks()
ax.set_yticklabels(["{0:.0%}".format(y/100) for y in values], fontdict={'fontweight': 'bold'})
ax.tick_params(axis='both', which='major', labelsize=15, zorder=1)
ax.grid(True)

How to make a bar chart with only a height indicator not showing the full bar?

This chart almost looks good but is probably not the way to model this in matplotlib. How to have two horizontal bars that extend to the left and right of vertical line at an x-point to show the change of the two datasets eg SDR from 0.7 to 0.25.
Currently i patch things together with '$-$' markers which make misaligned legends and i am not able to place properly. If i change the figsize the markers start misaligning from the vertical bar at their x-point, eg SDR.
How to model this kind of chart proberly?
layer0 = np.random.random(10)
fig, ax = plt.subplots(1,1, figsize=(15/2,1.5*2.5),)
ind = np.arange(10, dtype=np.float64)*1#cordx
ax.plot(ind[0::2]+0.05, layer0[0::2]-0.04, ls='None', marker='$-$', markersize=40)
ax.plot(ind[1::2]-0.15, layer0[1::2]-0.04, ls='None', marker='$-$', markersize=40)
ax.set_ylim(0,1.05)
ax.set_yticks(np.arange(0, 1.1, step=0.1))
ax.set_xticks(ind[0::2]+0.5)
ax.set_xticklabels( ('SDR', 'SSR', 'SCR', 'RCR', 'GUR') )
plt.grid(b=True)
plt.grid(color='black', which='major', axis='y', linestyle='--', lw=0.2)
plt.show()
Alternatively, you can use a horizontal bar chart barh which is more intuitive in this case. Here the key parameter is left which will shift your horizontal bar charts to left/right.
Following is a complete answer:
import numpy as np
import matplotlib.pyplot as plt
np.random.seed(2)
layer0 = np.random.random(10)
fig, ax = plt.subplots(1,1, figsize=(15/2,1.5*2.5),)
n = 10
width = 0.5
ind = np.arange(n, dtype=np.float64)*1#cordx
ax.barh(layer0[0::2], [width]*int(n/2), height=0.01, left = ind[0::2])
ax.barh(layer0[1::2], [width]*int(n/2), height=0.01, left = ind[0::2]+width)
ax.set_ylim(0,1.05)
ax.set_yticks(np.arange(0, 1.1, step=0.1))
ax.set_xticks(ind[0::2]+0.5)
ax.set_xticklabels( ('SDR', 'SSR', 'SCR', 'RCR', 'GUR') )
plt.grid(b=True)
plt.grid(color='black', which='major', axis='y', linestyle='--', lw=0.2)
plt.show()
up until now i havent thought of bar charts with bottom offset, which seems to be ok:
layer0 = np.random.random(10)
fig, ax = plt.subplots(1,1, figsize=(15/1.3,1.5*2.5),)# sharey=True)
ind = np.arange(10, dtype=np.float64)*1#cordx
height=0.03
width=0.8
ax.bar(ind[0::2]-width/2, height, width=width, bottom=layer0[0::2]-height)
ax.bar(ind[0::2]+width/2, height, width=width, bottom=layer0[1::2]-height)
ax.set_ylim(-0.,1.05)
plt.grid(color='black', which='major', axis='x', linestyle='-', lw=0.8)

Matplotlib help: Formatting a scatter plot to be square

I have a scatter plot where the axis are both limited at -100 and 100. However, when I graph the data, I always get an unappealing looking plot that is rectangular with incorrect axis labels. I'd like the plot to be a square with -100 and 100 as the last axis labels. Does anyone have advice for fixing this formatting issue?
My code is as follows:
import scipy.stats
import numpy as np
r = scipy.stats.pearsonr(x_val, y_val)
fig, ax = matplotlib.pyplot.subplots()
ax.scatter(x_val, y_val, s=75, color='green', edgecolor='black', linewidth = 2, alpha=0.4)
ax.set_axisbelow(True)
matplotlib.pyplot.axvline(0, c='#262626', linewidth=1.5, alpha=0.9)
matplotlib.pyplot.axhline(0, c='#262626', linewidth=1.5, alpha=0.9)
matplotlib.pyplot.grid(linewidth=1, color='#bfbfbf')
matplotlib.pyplot.xticks(np.arange(-100, 100, 20.0),fontsize=14, fontweight='bold',
fontname='Helvetica')
matplotlib.pyplot.yticks(np.arange(-100, 100, 20.0),fontsize=14, fontweight='bold',
fontname='Helvetica')
matplotlib.pyplot.text(-95, 85,'Pearson\'s r: %.3f'%r[0], fontsize=14, fontweight='bold',
fontname='Helvetica')
matplotlib.pyplot.show()

Categories