Overplot seaborn regplot and swarmplot - python

I would like to overplot a swarmplot and regplot in seaborn, so that I can have a y=x line through my swarmplot.
Here is my code:
import matplotlib.pyplot as plt
import seaborn as sns
sns.regplot(y=y, x=x, marker=' ', color='k')
sns.swarmplot(x=x_data, y=y_data)
I don't get any errors when I plot, but the regplot never shows on the plot. How can I fix this?
EDIT: My regplot and swarmplot don't overplot and instead, plot in the same frame but separated by some unspecified y amount. If I flip them so regplot is above the call to swarmplot, regplot doesn't show up at all.
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
df = pd.DataFrame({"x":x_data,"y":y_data} )
sns.regplot(y="y", x="x", data= df, color='k', scatter_kws={"alpha" : 0.0})
sns.swarmplot(y="y", x="x", data= df)
SECOND EDIT: The double axis solution from below works beautifully!

In principle the approach of plotting a swarmplot and a regplot simulatneously works fine.
The problem here is that you set an empty marker (marker = " "). This destroys the regplot, such that it's not shown. Apparently this is only an issue when plotting several things to the same graph; plotting a single regplot with empty marker works fine.
The solution would be not to specify the marker argument, but instead set the markers invisible by using the scatter_kws argument: scatter_kws={"alpha" : 0.0}.
Here is a complete example:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
## generate some data
n=19; m=9
y_data = []
for i in range(m):
a = (np.random.poisson(lam=0.99-float(i)/m,size=n)+i*.9+np.random.rand(1)*2)
a+=(np.random.rand(n)-0.5)*2
y_data.append(a*m)
y_data = np.array(y_data).flatten()
x_data = np.floor(np.sort(np.random.rand(n*m))*m)
## put them into dataframe
df = pd.DataFrame({"x":x_data,"y":y_data} )
## plotting
sns.regplot(y="y", x="x", data= df, color='k', scatter_kws={"alpha" : 0.0})
sns.swarmplot(x="x", y="y", data= df)
plt.show()
Concerning the edited part of the question:
Since swarmplot is a categorical plot, the axis in the plot still goes from -0.5 to 8.5 and not as the labels suggest from 10 to 18.
A possible workaround is to use two axes and twiny.
fig, ax = plt.subplots()
ax2 = ax.twiny()
sns.swarmplot(x="x", y="y", data= df, ax=ax)
sns.regplot(y="y", x="x", data= df, color='k', scatter_kws={"alpha" : 0.0}, ax=ax2)
ax2.grid(False) #remove grid as it overlays the other plot

Related

How to replace a point to legend in seaborn [duplicate]

I've spent hours on trying to do what I thought was a simple task, which is to add labels onto an XY plot while using seaborn.
Here's my code
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
df_iris=sns.load_dataset("iris")
sns.lmplot('sepal_length', # Horizontal axis
'sepal_width', # Vertical axis
data=df_iris, # Data source
fit_reg=False, # Don't fix a regression line
size = 8,
aspect =2 ) # size and dimension
plt.title('Example Plot')
# Set x-axis label
plt.xlabel('Sepal Length')
# Set y-axis label
plt.ylabel('Sepal Width')
I would like to add to each dot on the plot the text in "species" column.
I've seen many examples using matplotlib but not using seaborn.
Any ideas? Thank you.
One way you can do this is as follows:
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
%matplotlib inline
df_iris=sns.load_dataset("iris")
ax = sns.lmplot('sepal_length', # Horizontal axis
'sepal_width', # Vertical axis
data=df_iris, # Data source
fit_reg=False, # Don't fix a regression line
size = 10,
aspect =2 ) # size and dimension
plt.title('Example Plot')
# Set x-axis label
plt.xlabel('Sepal Length')
# Set y-axis label
plt.ylabel('Sepal Width')
def label_point(x, y, val, ax):
a = pd.concat({'x': x, 'y': y, 'val': val}, axis=1)
for i, point in a.iterrows():
ax.text(point['x']+.02, point['y'], str(point['val']))
label_point(df_iris.sepal_length, df_iris.sepal_width, df_iris.species, plt.gca())
Here's a more up-to-date answer that doesn't suffer from the string issue described in the comments.
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
df_iris=sns.load_dataset("iris")
plt.figure(figsize=(20,10))
p1 = sns.scatterplot(x='sepal_length', # Horizontal axis
y='sepal_width', # Vertical axis
data=df_iris, # Data source
size = 8,
legend=False)
for line in range(0,df_iris.shape[0]):
p1.text(df_iris.sepal_length[line]+0.01, df_iris.sepal_width[line],
df_iris.species[line], horizontalalignment='left',
size='medium', color='black', weight='semibold')
plt.title('Example Plot')
# Set x-axis label
plt.xlabel('Sepal Length')
# Set y-axis label
plt.ylabel('Sepal Width')
Thanks to the 2 other answers, here is a function scatter_text that makes it possible to reuse these plots several times.
import seaborn as sns
import matplotlib.pyplot as plt
def scatter_text(x, y, text_column, data, title, xlabel, ylabel):
"""Scatter plot with country codes on the x y coordinates
Based on this answer: https://stackoverflow.com/a/54789170/2641825"""
# Create the scatter plot
p1 = sns.scatterplot(x, y, data=data, size = 8, legend=False)
# Add text besides each point
for line in range(0,data.shape[0]):
p1.text(data[x][line]+0.01, data[y][line],
data[text_column][line], horizontalalignment='left',
size='medium', color='black', weight='semibold')
# Set title and axis labels
plt.title(title)
plt.xlabel(xlabel)
plt.ylabel(ylabel)
return p1
Use the function as follows:
df_iris=sns.load_dataset("iris")
plt.figure(figsize=(20,10))
scatter_text('sepal_length', 'sepal_width', 'species',
data = df_iris,
title = 'Iris sepals',
xlabel = 'Sepal Length (cm)',
ylabel = 'Sepal Width (cm)')
See also this answer on how to have a function that returns a plot:
https://stackoverflow.com/a/43926055/2641825
Below is a solution that does not iterate over rows in the data frame using the dreaded for loop.
There are many issues regarding iterating over a data frame.
The answer is don't iterate! See this link.
The solution below relies on a function (plotlabel) within the petalplot function, which is called by df.apply.
Now, I know readers will comment on the fact that I use scatter and not lmplot, but that is a bit besides the point.
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
df_iris=sns.load_dataset("iris")
def petalplot(df):
def plotlabel(xvar, yvar, label):
ax.text(xvar+0.002, yvar, label)
fig = plt.figure(figsize=(30,10))
ax = sns.scatterplot(x = 'sepal_length', y = 'sepal_width', data=df)
# The magic starts here:
df.apply(lambda x: plotlabel(x['sepal_length'], x['sepal_width'], x['species']), axis=1)
plt.title('Example Plot')
plt.xlabel('Sepal Length')
plt.ylabel('Sepal Width')
petalplot(df_iris)
Same idea with Scott Boston's answer, however with Seaborn v0.12+, you can leverage seaborn.FacetGrid.apply to add labels on plots and set up your figure in one go:
import seaborn as sns
import pandas as pd
%matplotlib inline
sns.set_theme()
df_iris = sns.load_dataset("iris")
(
sns.lmplot(
data=df_iris,
x="sepal_length",
y="sepal_width",
fit_reg=False,
height=8,
aspect=2
)
.apply(lambda grid: [
grid.ax.text(r["sepal_length"]+.02, r["sepal_width"], r["species"])
for r in df_iris.to_dict(orient="records")
])
.set(title="Example Plot")
.set_axis_labels("Sepal Length", "Sepal Width")
)
Or, if you don't need to use lmplot, also from v0.12, you can use the seaborn.objects interface. This way we don't need to manually iterate over the Iris dataframe nor refer to df_iris or column names sepal_... multiple times.
import seaborn.objects as so
(
so.Plot(df_iris, x="sepal_length", y="sepal_width", text="species")
.add(so.Dot())
.add(so.Text(halign="left"))
.label(title="Example plot", x="Sepal Length", y="Sepal Width")
.layout(size=(20, 10))
)
This produces the below figure:
Use the powerful declarative API to avoid loops (seaborn>=0.12).
Specifically, put x,y, and annotations into a pandas data frame and call plotting.
Here is an example from my own research work.
import seaborn.objects as so
import pandas as pd
df = pd.DataFrame(..,columns=['phase','P(X=1)','text'])
fig,ax = plt.subplots()
p = so.Plot(df,x='phase',y='P(X=1)',text='text').add(so.Dot(marker='+')).add(so.Text(halign='left'))
p.on(ax).show()

how to set IQR on seaborn box plot to 10% 90% [duplicate]

I want to plot seaborn boxplot with box from min to max, rather than from 2nd to 3rd quartile, can I control it in matplotlib or seaborn? I know I can control the whiskers - how about boxes?
Here is an approach that mimics seaborn's boxplot via a horiontal plot using an aggregated dataframe.
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
# set sns plot style
sns.set()
tips = sns.load_dataset('tips')
fig, (ax1, ax2) = plt.subplots(nrows=2)
sns.boxplot(x='total_bill', y='day', data=tips, ax=ax1)
day_min_max = tips[['day', 'total_bill']].groupby('day').agg(['min', 'max', 'median'])
day_min_max.columns = day_min_max.columns.droplevel(0) # remove the old column name, only leaving 'min' and 'max'
ax2.use_sticky_edges = False
sns.barplot(y=day_min_max.index, x=day_min_max['median'] - day_min_max['min'], left=day_min_max['min'], ec='k', ax=ax2)
sns.barplot(y=day_min_max.index, x=day_min_max['max'] - day_min_max['median'], left=day_min_max['median'], ec='k', ax=ax2)
plt.tight_layout()
plt.show()
Depicting the first and third quartiles is the defining characteristic of a boxplot, so I don't think that this option exists. However, if you want to use the minima and maxima, you are not going to plot any whiskers, and hence you can simply use a barplot instead:
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
data = np.random.rand(10, 3)
sns.barplot(x=np.arange(10), y=data.ptp(axis=1), bottom=data.min(axis=1))
plt.show()

horizontal grid only (in python using pandas plot + pyplot)

I would like to get only horizontal grid using pandas plot.
The integrated parameter of pandas only has grid=True or grid=False, so I tried with matplotlib pyplot, changing the axes parameters, specifically with this code:
import pandas as pd
import matplotlib.pyplot as plt
fig = plt.figure()
ax2 = plt.subplot()
ax2.grid(axis='x')
df.plot(kind='bar',ax=ax2, fontsize=10, sort_columns=True)
plt.show(fig)
But I get no grid, neither horizontal nor vertical. Is Pandas overwriting the axes? Or am I doing something wrong?
Try setting the grid after plotting the DataFrame. Also, to get the horizontal grid, you need to use ax2.grid(axis='y'). Below is an answer using a sample DataFrame.
I have restructured how you define ax2 by making use of subplots.
import pandas as pd
import matplotlib.pyplot as plt
df = pd.DataFrame({'lab':['A', 'B', 'C'], 'val':[10, 30, 20]})
fig, ax2 = plt.subplots()
df.plot(kind='bar',ax=ax2, fontsize=10, sort_columns=True)
ax2.grid(axis='y')
plt.show()
Alternatively, you can also do the following: Use the axis object returned from the DataFrame plot directly to turn on the horizontal grid
fig = plt.figure()
ax2 = df.plot(kind='bar', fontsize=10, sort_columns=True)
ax2.grid(axis='y')
Third option as suggested by #ayorgo in the comments is to chain the two commands as
df.plot(kind='bar',ax=ax2, fontsize=10, sort_columns=True).grid(axis='y')

Seaborn Heatmap: Move colorbar on top of the plot

I have a basic heatmap created using the seaborn library, and want to move the colorbar from the default, vertical and on the right, to a horizontal one above the heatmap. How can I do this?
Here's some sample data and an example of the default:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
# Create data
df = pd.DataFrame(np.random.random((5,5)), columns=["a","b","c","d","e"])
# Default heatma
ax = sns.heatmap(df)
plt.show()
Looking at the documentation we find an argument cbar_kws. This allows to specify argument passed on to matplotlib's fig.colorbar method.
cbar_kws : dict of key, value mappings, optional.
Keyword arguments for fig.colorbar.
So we can use any of the possible arguments to fig.colorbar, providing a dictionary to cbar_kws.
In this case you need location="top" to place the colorbar on top. Because colorbar by default positions the colorbar using a gridspec, which then does not allow for the location to be set, we need to turn that gridspec off (use_gridspec=False).
sns.heatmap(df, cbar_kws = dict(use_gridspec=False,location="top"))
Complete example:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
df = pd.DataFrame(np.random.random((5,5)), columns=["a","b","c","d","e"])
ax = sns.heatmap(df, cbar_kws = dict(use_gridspec=False,location="top"))
plt.show()
I would like to show example with subplots which allows to control size of plot to preserve square geometry of heatmap. This example is very short:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
# Create data
df = pd.DataFrame(np.random.random((5,5)), columns=["a","b","c","d","e"])
# Define two rows for subplots
fig, (cax, ax) = plt.subplots(nrows=2, figsize=(5,5.025), gridspec_kw={"height_ratios":[0.025, 1]})
# Draw heatmap
sns.heatmap(df, ax=ax, cbar=False)
# colorbar
fig.colorbar(ax.get_children()[0], cax=cax, orientation="horizontal")
plt.show()
You have to use axes divider to put colorbar on top of a seaborn figure. Look for the comments.
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
from mpl_toolkits.axes_grid1.axes_divider import make_axes_locatable
from mpl_toolkits.axes_grid1.colorbar import colorbar
# Create data
df = pd.DataFrame(np.random.random((5,5)), columns=["a","b","c","d","e"])
# Use axes divider to put cbar on top
# plot heatmap without colorbar
ax = sns.heatmap(df, cbar = False)
# split axes of heatmap to put colorbar
ax_divider = make_axes_locatable(ax)
# define size and padding of axes for colorbar
cax = ax_divider.append_axes('top', size = '5%', pad = '2%')
# make colorbar for heatmap.
# Heatmap returns an axes obj but you need to get a mappable obj (get_children)
colorbar(ax.get_children()[0], cax = cax, orientation = 'horizontal')
# locate colorbar ticks
cax.xaxis.set_ticks_position('top')
plt.show()
For more info read this official example of matplotlib: https://matplotlib.org/gallery/axes_grid1/demo_colorbar_with_axes_divider.html?highlight=demo%20colorbar%20axes%20divider
Heatmap argument like sns.heatmap(df, cbar_kws = {'orientation':'horizontal'}) is useless because it put colorbar on bottom position.

Adding labels in x y scatter plot with seaborn

I've spent hours on trying to do what I thought was a simple task, which is to add labels onto an XY plot while using seaborn.
Here's my code
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
df_iris=sns.load_dataset("iris")
sns.lmplot('sepal_length', # Horizontal axis
'sepal_width', # Vertical axis
data=df_iris, # Data source
fit_reg=False, # Don't fix a regression line
size = 8,
aspect =2 ) # size and dimension
plt.title('Example Plot')
# Set x-axis label
plt.xlabel('Sepal Length')
# Set y-axis label
plt.ylabel('Sepal Width')
I would like to add to each dot on the plot the text in "species" column.
I've seen many examples using matplotlib but not using seaborn.
Any ideas? Thank you.
One way you can do this is as follows:
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
%matplotlib inline
df_iris=sns.load_dataset("iris")
ax = sns.lmplot('sepal_length', # Horizontal axis
'sepal_width', # Vertical axis
data=df_iris, # Data source
fit_reg=False, # Don't fix a regression line
size = 10,
aspect =2 ) # size and dimension
plt.title('Example Plot')
# Set x-axis label
plt.xlabel('Sepal Length')
# Set y-axis label
plt.ylabel('Sepal Width')
def label_point(x, y, val, ax):
a = pd.concat({'x': x, 'y': y, 'val': val}, axis=1)
for i, point in a.iterrows():
ax.text(point['x']+.02, point['y'], str(point['val']))
label_point(df_iris.sepal_length, df_iris.sepal_width, df_iris.species, plt.gca())
Here's a more up-to-date answer that doesn't suffer from the string issue described in the comments.
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
df_iris=sns.load_dataset("iris")
plt.figure(figsize=(20,10))
p1 = sns.scatterplot(x='sepal_length', # Horizontal axis
y='sepal_width', # Vertical axis
data=df_iris, # Data source
size = 8,
legend=False)
for line in range(0,df_iris.shape[0]):
p1.text(df_iris.sepal_length[line]+0.01, df_iris.sepal_width[line],
df_iris.species[line], horizontalalignment='left',
size='medium', color='black', weight='semibold')
plt.title('Example Plot')
# Set x-axis label
plt.xlabel('Sepal Length')
# Set y-axis label
plt.ylabel('Sepal Width')
Thanks to the 2 other answers, here is a function scatter_text that makes it possible to reuse these plots several times.
import seaborn as sns
import matplotlib.pyplot as plt
def scatter_text(x, y, text_column, data, title, xlabel, ylabel):
"""Scatter plot with country codes on the x y coordinates
Based on this answer: https://stackoverflow.com/a/54789170/2641825"""
# Create the scatter plot
p1 = sns.scatterplot(x, y, data=data, size = 8, legend=False)
# Add text besides each point
for line in range(0,data.shape[0]):
p1.text(data[x][line]+0.01, data[y][line],
data[text_column][line], horizontalalignment='left',
size='medium', color='black', weight='semibold')
# Set title and axis labels
plt.title(title)
plt.xlabel(xlabel)
plt.ylabel(ylabel)
return p1
Use the function as follows:
df_iris=sns.load_dataset("iris")
plt.figure(figsize=(20,10))
scatter_text('sepal_length', 'sepal_width', 'species',
data = df_iris,
title = 'Iris sepals',
xlabel = 'Sepal Length (cm)',
ylabel = 'Sepal Width (cm)')
See also this answer on how to have a function that returns a plot:
https://stackoverflow.com/a/43926055/2641825
Below is a solution that does not iterate over rows in the data frame using the dreaded for loop.
There are many issues regarding iterating over a data frame.
The answer is don't iterate! See this link.
The solution below relies on a function (plotlabel) within the petalplot function, which is called by df.apply.
Now, I know readers will comment on the fact that I use scatter and not lmplot, but that is a bit besides the point.
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
df_iris=sns.load_dataset("iris")
def petalplot(df):
def plotlabel(xvar, yvar, label):
ax.text(xvar+0.002, yvar, label)
fig = plt.figure(figsize=(30,10))
ax = sns.scatterplot(x = 'sepal_length', y = 'sepal_width', data=df)
# The magic starts here:
df.apply(lambda x: plotlabel(x['sepal_length'], x['sepal_width'], x['species']), axis=1)
plt.title('Example Plot')
plt.xlabel('Sepal Length')
plt.ylabel('Sepal Width')
petalplot(df_iris)
Same idea with Scott Boston's answer, however with Seaborn v0.12+, you can leverage seaborn.FacetGrid.apply to add labels on plots and set up your figure in one go:
import seaborn as sns
import pandas as pd
%matplotlib inline
sns.set_theme()
df_iris = sns.load_dataset("iris")
(
sns.lmplot(
data=df_iris,
x="sepal_length",
y="sepal_width",
fit_reg=False,
height=8,
aspect=2
)
.apply(lambda grid: [
grid.ax.text(r["sepal_length"]+.02, r["sepal_width"], r["species"])
for r in df_iris.to_dict(orient="records")
])
.set(title="Example Plot")
.set_axis_labels("Sepal Length", "Sepal Width")
)
Or, if you don't need to use lmplot, also from v0.12, you can use the seaborn.objects interface. This way we don't need to manually iterate over the Iris dataframe nor refer to df_iris or column names sepal_... multiple times.
import seaborn.objects as so
(
so.Plot(df_iris, x="sepal_length", y="sepal_width", text="species")
.add(so.Dot())
.add(so.Text(halign="left"))
.label(title="Example plot", x="Sepal Length", y="Sepal Width")
.layout(size=(20, 10))
)
This produces the below figure:
Use the powerful declarative API to avoid loops (seaborn>=0.12).
Specifically, put x,y, and annotations into a pandas data frame and call plotting.
Here is an example from my own research work.
import seaborn.objects as so
import pandas as pd
df = pd.DataFrame(..,columns=['phase','P(X=1)','text'])
fig,ax = plt.subplots()
p = so.Plot(df,x='phase',y='P(X=1)',text='text').add(so.Dot(marker='+')).add(so.Text(halign='left'))
p.on(ax).show()

Categories