How to combine a pairplot and a triangular heatmap? - python

I am trying to make an upper triangle correlation matrix which ideally I would like to superpose to another picture of a lower triangle matrix. Therefore, I would like the mask color to be setup to none or transparent (otherwise if it's white I will not be able to superpose)...any idea about how to do this in seaborn?
EDIT
Here is what I would like to do: using a set of columns from dataframe, I would like to plot the pairplot (lower triangle) and the correlation map (upper triangle) of these columns
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
rs = np.random.RandomState(112358)
d1 = pd.DataFrame(data=rs.normal(size=(100, 10)), columns=[*'abcdefghij' ])
corr1 = d1.corr()
mask1 = np.tril(np.ones_like(corr1, dtype=bool))
fig, ax = plt.subplots(figsize=(11, 9))
sns.heatmap(corr1, mask=mask1, cmap='PRGn', vmax=.3, vmin=-.3,
square=True, linewidths=.5, cbar_kws={"shrink": .85, "pad":-.01}, ax=ax)
def hide_current_axis(*args, **kwds):
plt.gca().set_visible(False)
e = sns.pairplot(d1)
e.map_upper(hide_current_axis)
plt.show()
This code of course works, but it plots the two figures separately.

The normal way to create a triangular heatmap is to mask away the not-needed part. Nothing will be drawn there, the original background color will stay visible. If you draw a second heatmap, it also will only draw where it isn't masked away.
Here is some code to demonstrate the idea.
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_theme(style="white")
rs = np.random.RandomState(112358)
d1 = pd.DataFrame(data=rs.normal(size=(100, 10)), columns=[*'abcdefghij' ])
d2 = pd.DataFrame(data=rs.normal(size=(100, 10)), columns=[*'abcdefghij' ])
corr1 = d1.corr()
corr2 = d2.corr()
mask1 = np.tril(np.ones_like(corr1, dtype=bool))
mask2 = np.triu(np.ones_like(corr2, dtype=bool))
fig, ax = plt.subplots(figsize=(11, 9))
sns.heatmap(corr1, mask=mask1, cmap='PRGn', vmax=.3, vmin=-.3,
square=True, linewidths=.5, cbar_kws={"shrink": .85, "pad":-.01}, ax=ax)
sns.heatmap(corr1, mask=mask2, cmap='RdYlBu', vmax=.3, vmin=-.3,
square=True, linewidths=.5, cbar_kws={"shrink": .85}, ax=ax)
# the following lines color and hatch the axes background, only the diagonals are visible
ax.patch.set_facecolor('grey')
ax.patch.set_edgecolor('yellow')
ax.patch.set_hatch('xx')
plt.show()
About the new question, to combine a pairplot with a triangular heatmap. As a pairplot is a figure-level function, it creates its own figure with subplots. It should be created first.
As a second step, a special ax for the heatmap can be created, using the positions of the pairplot's subplots. Setting its facecolor to 'none' makes it fully transparent (the default would be white, hiding everything behind).
Adding a colorbar can be more cumbersome, as the pairplot doesn't leave a good spot to position it.
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
def hide_current_axis(*args, **kwds):
plt.gca().set_visible(False)
rs = np.random.RandomState(112358)
d1 = pd.DataFrame(data=rs.normal(size=(20, 5)), columns=[*'abcde'])
e = sns.pairplot(d1)
e.map_upper(hide_current_axis)
(xmin, _), (_, ymax) = e.axes[0, 0].get_position().get_points()
(_, ymin), (xmax, _) = e.axes[-1, -1].get_position().get_points()
ax = e.fig.add_axes([xmin, ymin, xmax - xmin, ymax - ymin], facecolor='none')
corr1 = d1.corr()
mask1 = np.tril(np.ones_like(corr1, dtype=bool))
sns.heatmap(corr1, mask=mask1, cmap='seismic', vmax=.5, vmin=-.5,
linewidths=.5, cbar=False, annot=True, annot_kws={'size': 22}, ax=ax)
ax.set_xticks([])
ax.set_yticks([])
# ax.xaxis.tick_top()
# ax.yaxis.tick_right()
plt.show()
As mentioned in the comments, an approach more faithful to seaborn's philosophy would be to color the axes of the upper right subplots according to the correlation together with a numeric display. I couldn't find example code, here is my attempt:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import pearsonr
def corrfunc(x, y, **kwds):
cmap = kwds['cmap']
norm = kwds['norm']
ax = plt.gca()
ax.tick_params(bottom=False, top=False, left=False, right=False)
sns.despine(ax=ax, bottom=True, top=True, left=True, right=True)
r, _ = pearsonr(x, y)
facecolor = cmap(norm(r))
ax.set_facecolor(facecolor)
lightness = (max(facecolor[:3]) + min(facecolor[:3]) ) / 2
ax.annotate(f"r={r:.2f}", xy=(.5, .5), xycoords=ax.transAxes,
color='white' if lightness < 0.7 else 'black', size=26, ha='center', va='center')
rs = np.random.RandomState(112358)
d1 = pd.DataFrame(data=rs.normal(size=(20, 5)), columns=[*'abcde'])
g = sns.PairGrid(d1)
g.map_lower(plt.scatter, s=10)
g.map_diag(sns.histplot, kde=False)
g.map_upper(corrfunc, cmap=plt.get_cmap('seismic'), norm=plt.Normalize(vmin=-.5, vmax=.5))
g.fig.subplots_adjust(wspace=0.06, hspace=0.06) # equal spacing in both directions
plt.show()

Sometimes, it is useful to change the size of the correlation value based on the abs(r). Higher values ​​-> larger numbers.
def corrfunc(x, y, **kwds):
cmap = kwds['cmap']
norm = kwds['norm']
ax = plt.gca()
ax.tick_params(bottom=False, top=False, left=False, right=False)
sns.despine(ax=ax, bottom=True, top=True, left=True, right=True)
r, _ = pearsonr(x, y)
facecolor = cmap(norm(r))
ax.set_facecolor(facecolor)
lightness = (max(facecolor[:3]) + min(facecolor[:3]) ) / 2
tam = int(70*abs(r))
if tam < 10:
tam = 10
ax.annotate(f"{r:.2f}", xy=(.5, .5), xycoords=ax.transAxes,
color='white' if lightness < 0.7 else 'black', size=tam, ha='center', va='center')

Related

How to customize seaborn boxplot with specific color sequence when boxplots have hue

I want to make boxplots with hues but I want to color code it so that each specific X string is a certain color with the hue just being a lighter color. I am able to do a boxplot without a hue. When I incorporate the hue, I get the second boxplot which loses the colors. Can someone help me customize the colors for the figure that contains the hue?
Essentially, its what the answer for this question is but with boxplots.
This is my code:
first boxplot
order=['Ash1','E1A','FUS','p53']
colors=['gold','teal','darkorange','royalblue']
color_dict=dict(zip(order,colors))
fig,ax=plt.subplots(figsize=(25,15))
bp=sns.boxplot(data=df_idrs, x=df_idrs["construct"], y=df_idrs['Norm_Ef_IDR/Ef_GS'],ax=ax,palette=color_dict)
sns.stripplot(ax=ax,y='Norm_Ef_IDR/Ef_GS', x='construct', data=df_idrs,palette=color_dict,
jitter=1, marker='o', alpha=0.4,edgecolor='black',linewidth=1, dodge=True)
ax.axhline(y=1,linestyle="--",color='black',linewidth=2)
plt.legend(loc='upper left', bbox_to_anchor=(1.03, 1))
second boxplot
order=['Ash1','E1A','FUS','p53']
colors=['gold','teal','darkorange','royalblue']
color_dict=dict(zip(order,colors))
fig,ax=plt.subplots(figsize=(25,15))
bp=sns.boxplot(data=df_idrs, x=df_idrs["construct"], y=df_idrs['Norm_Ef_IDR/Ef_GS'],ax=ax, hue=df_idrs["location"])
sns.stripplot(y='Norm_Ef_IDR/Ef_GS', x='construct', data=df_idrs, hue=df_idrs["location"],
jitter=1, marker='o', alpha=0.4,edgecolor='black',linewidth=1, dodge=True)
ax.axhline(y=1,linestyle="--",color='black',linewidth=2)
plt.legend(loc='upper left', bbox_to_anchor=(1.03, 1))
The only thing that changed was the palette to hue. I have seen many examples on here but I am unable to get them to work. Using the second code, I have tried the following:
Nothing happens for this one.
for ind, bp in enumerate(ax.findobj(PolyCollection)):
rgb = to_rgb(colors[ind // 2])
if ind % 2 != 0:
rgb = 0.5 + 0.5 * np.array(rgb) # make whiter
bp.set_facecolor(rgb)
I get index out of range for the following one.
for i in range(0,4):
mybox = bp.artists[i]
mybox.set_facecolor(color_dict[order[i]])
Matplotlib stores the boxes in ax.patches, but there are also 2 dummy patches (used to construct the legend) that need to be filtered away. The dots of the stripplot are stored in ax.collections. There are also 2 dummy collections for the legend, but as those come at the end, they don't form a problem.
Some remarks:
sns.boxplot returns the subplot on which it was drawn; as it is called with ax=ax it will return that same ax
Setting jitter=1in the stripplot will smear the dots over a width of 1. 1 is the distance between the x positions, and the boxes are only 0.4 wide. To avoid clutter, the code below uses jitter=0.4.
Here is some example code starting from dummy test data:
from matplotlib import pyplot as plt
from matplotlib.legend_handler import HandlerTuple
from matplotlib.patches import PathPatch
from matplotlib.colors import to_rgb
import seaborn as sns
import pandas as pd
import numpy as np
np.random.seed(20230215)
order = ['Ash1', 'E1A', 'FUS', 'p53']
colors = ['gold', 'teal', 'darkorange', 'royalblue']
hue_order = ['A', 'B']
df_idrs = pd.DataFrame({'construct': np.repeat(order, 200),
'Norm_Ef_IDR/Ef_GS': (np.random.normal(0.03, 1, 800).cumsum() + 10) / 15,
'location': np.tile(np.repeat(hue_order, 100), 4)})
fig, ax = plt.subplots(figsize=(12, 5))
sns.boxplot(data=df_idrs, x=df_idrs['construct'], y=df_idrs['Norm_Ef_IDR/Ef_GS'], hue='location',
order=order, hue_order=hue_order, ax=ax)
box_colors = [f + (1 - f) * np.array(to_rgb(c)) # whiten colors depending on hue
for c in colors for f in np.linspace(0, 0.5, len(hue_order))]
box_patches = [p for p in ax.patches if isinstance(p, PathPatch)]
for patch, color in zip(box_patches, box_colors):
patch.set_facecolor(color)
sns.stripplot(y='Norm_Ef_IDR/Ef_GS', x='construct', data=df_idrs, hue=df_idrs['location'],
jitter=0.4, marker='o', alpha=0.4, edgecolor='black', linewidth=1, dodge=True, ax=ax)
for collection, color in zip(ax.collections, box_colors):
collection.set_facecolor(color)
ax.axhline(y=1, linestyle='--', color='black', linewidth=2)
handles = [tuple(box_patches[i::len(hue_order)]) for i in range(len(hue_order))]
ax.legend(handles=handles, labels=hue_order, title='hue category',
handlelength=4, handler_map={tuple: HandlerTuple(ndivide=None, pad=0)},
loc='upper left', bbox_to_anchor=(1.01, 1))
plt.tight_layout()
plt.show()

How to center "hue" coloring using seaborn stripplot

This is my plot:
I would like the coloring to be centered at 0 within the plot. While I managed to have the legend centered at 0, this does not apply to the dots in the plot (i.e. I would expect them to be gray at the zero value).
This is my code which generates the plots:
import matplotlib.colors as mcolors
import matplotlib.cm as cm
import seaborn as sns
def plot_jitter(df):
plot = sns.stripplot(x='category', y='overall_margin', hue='overall_margin', data=df,
palette='coolwarm_r',
jitter=True, edgecolor='none', alpha=.60)
plot.get_legend().set_visible(False)
sns.despine()
plt.axhline(0, 0,1,color='grey').set_linestyle("--")
#Drawing the side color bar
normalize = mcolors.TwoSlopeNorm(vcenter=0, vmin=df['overall_margin'].min(), vmax=df['overall_margin'].max())
colormap = cm.coolwarm_r
[plt.plot(color=colormap(normalize(x))) for x in df['overall_margin']]
scalarmappaple = cm.ScalarMappable(norm=normalize, cmap=colormap)
scalarmappaple.set_array(df['overall_margin'])
plt.colorbar(scalarmappaple)
By using sns.scatterplot instead of sns.stripplot you can use the c, norm and cmap parameters like so.
# Load demo data, scale `total_bill` to be in the range [0, 1]
tips = sns.load_dataset("tips")
tips["total_bill"] = tips["total_bill"].div(100)
Building the plot:
fig, ax = plt.subplots()
# Get/set params for the colour mapping
vcenter = 0.15
vmin, vmax = tips["total_bill"].min(), tips["total_bill"].max()
normalize = mcolors.TwoSlopeNorm(vcenter=vcenter, vmin=vmin, vmax=vmax)
colormap = cm.coolwarm_r
# plot with:
# - `c`: array of floats for colour mapping
# - `cmap`: the colourmap you want
#  - `norm`: to scale the data from `c`
sns.scatterplot(
x="day",
y="total_bill",
data=tips,
c=tips["total_bill"],
norm=normalize,
cmap=colormap,
ax=ax,
)
ax.axhline(vcenter, color="grey", ls="--")
# Tweak the points to mimic `sns.stripplot`
pts = ax.collections[0]
pts.set_offsets(pts.get_offsets() + np.c_[np.random.uniform(-.1, .1, len(tips)), np.zeros(len(tips))])
ax.margins(x=0.15)
scalarmappaple = cm.ScalarMappable(norm=normalize, cmap=colormap)
scalarmappaple.set_array(tips["total_bill"])
fig.colorbar(scalarmappaple)
Which produces:
The code to mimic stripplot is from seaborn's github issues

How to put a colorbar in seaborn scatterplot legend

I have the next scatterplot
But i want to change the dots on the legend by continuos color map like this:
This is my code:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
sns.set_style("whitegrid")
gene_list = pd.read_csv('interseccion.csv', header=None)
glist = gene_list.squeeze().str.strip().tolist()
names = gp.get_library_name()
enr = gp.enrichr(gene_list= glist,
gene_sets=['KEGG_2019_Human'],
organism='Human', # don't forget to set organism to the one you desired! e.g. Yeast
description='KEGG',
# no_plot=True,
cutoff=0.5 # test dataset, use lower value from range(0,1)
)
resultados = enr.results.head(15)
resultados['-log10(FDR)'] = -np.log10(resultados['Adjusted P-value'])
resultados['Genes'] = resultados['Genes'].str.split(';')
resultados['Genes'] = resultados['Genes'].apply(lambda x: len(x))
g = sns.scatterplot(data=resultados, x="-log10(FDR)", y="Term", hue='-log10(FDR)', palette="seismic"
, size="Genes", sizes=(30, 300), legend=True)
g.legend(loc=6, bbox_to_anchor=(1, 0.5), ncol=1)
g.fig.colorbar()
plt.ylabel('')
plt.xlabel('-log10(FDR)')
When i try to put a color bar with the funcion plt.colorbar() is not possible
I customized the code in the official sample with the understanding that I wanted to add a legend and color bars to the Seaborn scatterplot. A colormap has been created to match the colors of the sample graph, but it can be drawn without problems by specifying the colormap name. The color bar is customized by getting its position and adjusting it manually in the legend. The height of the color bar is halved to match the legend.
import seaborn as sns
import matplotlib.pyplot as plt
tips = sns.load_dataset("tips")
fig, ax = plt.subplots()
g = sns.scatterplot(
data=tips, x="total_bill", y="tip", hue="size", size="size",
sizes=(20, 200), legend="full", ax=ax)
g.legend(loc='upper right', bbox_to_anchor=(1.2, 1.0), ncol=1)
norm = plt.Normalize(tips['size'].min(), tips['size'].max())
cmap = sns.cubehelix_palette(light=1, as_cmap=True)
sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
sm.set_array([])
cax = fig.add_axes([ax.get_position().x1+0.05, ax.get_position().y0, 0.06, ax.get_position().height / 2])
ax.figure.colorbar(sm, cax=cax)
plt.show()

matplotlib/seaborn violin plot with colormap

I want to create a violin plot, with either matplotlib or searborn, in which the plot is colored according to a colormap.
This is what I get:
This is what I would like to get (I used Photoshop here):
How can I obtain the desired plot?
I thought there would be a better was to do this, but, based on #ImportanceOfBeingErnest's comment, I guess this is actually the way to go:
from matplotlib.path import Path
from matplotlib.patches import PathPatch
from mpl_toolkits.axes_grid1.axes_divider import make_axes_locatable
x = [np.random.normal(loc=i, scale=1, size=(100,)) for i in range(5)]
fig, ax = plt.subplots()
violins = ax.violinplot(x)
ymin, ymax = ax.get_ylim()
xmin, xmax = ax.get_xlim()
# create a numpy image to use as a gradient
Nx,Ny=1,1000
imgArr = np.tile(np.linspace(0,1,Ny), (Nx,1)).T
cmap = 'hsv'
for violin in violins['bodies']:
path = Path(violin.get_paths()[0].vertices)
patch = PathPatch(path, facecolor='none', edgecolor='none')
ax.add_patch(patch)
img = ax.imshow(imgArr, origin="lower", extent=[xmin,xmax,ymin,ymax], aspect="auto",
cmap=cmap,
clip_path=patch)
# colorbar
ax_divider = make_axes_locatable(ax)
cax = ax_divider.append_axes("right", size="5%", pad="2%")
norm = matplotlib.colors.Normalize(vmin=ymin, vmax=ymax)
cb = matplotlib.colorbar.ColorbarBase(cax, cmap=matplotlib.cm.get_cmap(cmap),
norm=norm,
orientation='vertical')

Scatterplot with point colors representing a continuous variable in seaborn FacetGrid

I am trying to generate multi-panel figure using seaborn in python and I want the color of the points in my multi-panel figure to be specified by a continuous variable. Here's an example of what I am trying to do with the "iris" dataset:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
iris = sns.load_dataset('iris')
g = sns.FacetGrid(iris, col = 'species', hue = 'petal_length', palette = 'seismic')
g = g.map(plt.scatter, 'sepal_length', 'sepal_width', s = 100, alpha = 0.5)
g.add_legend()
This makes the following figure:
Which is nice, but the legend is way too long. I'd like to sample out like 1/4 of these values (ideally) or barring that display a colorbar instead.
For instance, something like this might be acceptable, but I'd still want to split it over the three species.
plt.scatter(iris.sepal_length, iris.sepal_width, alpha = .8, c = iris.petal_length, cmap = 'seismic')
cbar = plt.colorbar()
Any idea about how I can get the best of both of these plots?
Edit:
This topic seems like a good start.
https://github.com/mwaskom/seaborn/issues/582
Somehow, for this user, simply appending plt.colorbar after everything else ran seemed to somehow work. Doesn't seem to help in this case though.
The FacetGrid hue is categorical, not continuous. It will require a little bit of work to get a continuous colormap for a scatterplot in the FacetGrid (unlike with imshow in the linked Github issue, matplotlib does not keep a reference to the "currently active scatterplot mapper" so that a magic call to plt.colorbar doesn't pick up the mapping applied to the point colors).
g = sns.FacetGrid(iris, col='species', palette = 'seismic')
def facet_scatter(x, y, c, **kwargs):
"""Draw scatterplot with point colors from a faceted DataFrame columns."""
kwargs.pop("color")
plt.scatter(x, y, c=c, **kwargs)
vmin, vmax = 0, 7
cmap = sns.diverging_palette(240, 10, l=65, center="dark", as_cmap=True)
g = g.map(facet_scatter, 'sepal_length', 'sepal_width', "petal_length",
s=100, alpha=0.5, vmin=vmin, vmax=vmax, cmap=cmap)
# Make space for the colorbar
g.fig.subplots_adjust(right=.92)
# Define a new Axes where the colorbar will go
cax = g.fig.add_axes([.94, .25, .02, .6])
# Get a mappable object with the same colormap as the data
points = plt.scatter([], [], c=[], vmin=vmin, vmax=vmax, cmap=cmap)
# Draw the colorbar
g.fig.colorbar(points, cax=cax)
Since you were asking about a legend for the scatter, one may adapt #mwaskom's solution to produce a legend with scatter points like so:
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
iris = sns.load_dataset('iris')
g = sns.FacetGrid(iris, col='species', palette = 'seismic')
def facet_scatter(x, y, c, **kwargs):
kwargs.pop("color")
plt.scatter(x, y, c=c, **kwargs)
vmin, vmax = 0, 7
cmap = plt.cm.viridis
norm=plt.Normalize(vmin=vmin, vmax=vmax)
g = g.map(facet_scatter, 'sepal_length', 'sepal_width', "petal_length",
s=100, alpha=0.5, norm=norm, cmap=cmap)
# Make space for the colorbar
g.fig.subplots_adjust(right=.9)
lp = lambda i: plt.plot([], color=cmap(norm(i)), marker="o", ls="", ms=10, alpha=0.5)[0]
labels = np.arange(0,7.5,0.5)
h = [lp(i) for i in labels]
g.fig.legend(handles=h, labels=labels, fontsize=9)
plt.show()

Categories