Related
I've spent hours on trying to do what I thought was a simple task, which is to add labels onto an XY plot while using seaborn.
Here's my code
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
df_iris=sns.load_dataset("iris")
sns.lmplot('sepal_length', # Horizontal axis
'sepal_width', # Vertical axis
data=df_iris, # Data source
fit_reg=False, # Don't fix a regression line
size = 8,
aspect =2 ) # size and dimension
plt.title('Example Plot')
# Set x-axis label
plt.xlabel('Sepal Length')
# Set y-axis label
plt.ylabel('Sepal Width')
I would like to add to each dot on the plot the text in "species" column.
I've seen many examples using matplotlib but not using seaborn.
Any ideas? Thank you.
One way you can do this is as follows:
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
%matplotlib inline
df_iris=sns.load_dataset("iris")
ax = sns.lmplot('sepal_length', # Horizontal axis
'sepal_width', # Vertical axis
data=df_iris, # Data source
fit_reg=False, # Don't fix a regression line
size = 10,
aspect =2 ) # size and dimension
plt.title('Example Plot')
# Set x-axis label
plt.xlabel('Sepal Length')
# Set y-axis label
plt.ylabel('Sepal Width')
def label_point(x, y, val, ax):
a = pd.concat({'x': x, 'y': y, 'val': val}, axis=1)
for i, point in a.iterrows():
ax.text(point['x']+.02, point['y'], str(point['val']))
label_point(df_iris.sepal_length, df_iris.sepal_width, df_iris.species, plt.gca())
Here's a more up-to-date answer that doesn't suffer from the string issue described in the comments.
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
df_iris=sns.load_dataset("iris")
plt.figure(figsize=(20,10))
p1 = sns.scatterplot(x='sepal_length', # Horizontal axis
y='sepal_width', # Vertical axis
data=df_iris, # Data source
size = 8,
legend=False)
for line in range(0,df_iris.shape[0]):
p1.text(df_iris.sepal_length[line]+0.01, df_iris.sepal_width[line],
df_iris.species[line], horizontalalignment='left',
size='medium', color='black', weight='semibold')
plt.title('Example Plot')
# Set x-axis label
plt.xlabel('Sepal Length')
# Set y-axis label
plt.ylabel('Sepal Width')
Thanks to the 2 other answers, here is a function scatter_text that makes it possible to reuse these plots several times.
import seaborn as sns
import matplotlib.pyplot as plt
def scatter_text(x, y, text_column, data, title, xlabel, ylabel):
"""Scatter plot with country codes on the x y coordinates
Based on this answer: https://stackoverflow.com/a/54789170/2641825"""
# Create the scatter plot
p1 = sns.scatterplot(x, y, data=data, size = 8, legend=False)
# Add text besides each point
for line in range(0,data.shape[0]):
p1.text(data[x][line]+0.01, data[y][line],
data[text_column][line], horizontalalignment='left',
size='medium', color='black', weight='semibold')
# Set title and axis labels
plt.title(title)
plt.xlabel(xlabel)
plt.ylabel(ylabel)
return p1
Use the function as follows:
df_iris=sns.load_dataset("iris")
plt.figure(figsize=(20,10))
scatter_text('sepal_length', 'sepal_width', 'species',
data = df_iris,
title = 'Iris sepals',
xlabel = 'Sepal Length (cm)',
ylabel = 'Sepal Width (cm)')
See also this answer on how to have a function that returns a plot:
https://stackoverflow.com/a/43926055/2641825
Below is a solution that does not iterate over rows in the data frame using the dreaded for loop.
There are many issues regarding iterating over a data frame.
The answer is don't iterate! See this link.
The solution below relies on a function (plotlabel) within the petalplot function, which is called by df.apply.
Now, I know readers will comment on the fact that I use scatter and not lmplot, but that is a bit besides the point.
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
df_iris=sns.load_dataset("iris")
def petalplot(df):
def plotlabel(xvar, yvar, label):
ax.text(xvar+0.002, yvar, label)
fig = plt.figure(figsize=(30,10))
ax = sns.scatterplot(x = 'sepal_length', y = 'sepal_width', data=df)
# The magic starts here:
df.apply(lambda x: plotlabel(x['sepal_length'], x['sepal_width'], x['species']), axis=1)
plt.title('Example Plot')
plt.xlabel('Sepal Length')
plt.ylabel('Sepal Width')
petalplot(df_iris)
Same idea with Scott Boston's answer, however with Seaborn v0.12+, you can leverage seaborn.FacetGrid.apply to add labels on plots and set up your figure in one go:
import seaborn as sns
import pandas as pd
%matplotlib inline
sns.set_theme()
df_iris = sns.load_dataset("iris")
(
sns.lmplot(
data=df_iris,
x="sepal_length",
y="sepal_width",
fit_reg=False,
height=8,
aspect=2
)
.apply(lambda grid: [
grid.ax.text(r["sepal_length"]+.02, r["sepal_width"], r["species"])
for r in df_iris.to_dict(orient="records")
])
.set(title="Example Plot")
.set_axis_labels("Sepal Length", "Sepal Width")
)
Or, if you don't need to use lmplot, also from v0.12, you can use the seaborn.objects interface. This way we don't need to manually iterate over the Iris dataframe nor refer to df_iris or column names sepal_... multiple times.
import seaborn.objects as so
(
so.Plot(df_iris, x="sepal_length", y="sepal_width", text="species")
.add(so.Dot())
.add(so.Text(halign="left"))
.label(title="Example plot", x="Sepal Length", y="Sepal Width")
.layout(size=(20, 10))
)
This produces the below figure:
Use the powerful declarative API to avoid loops (seaborn>=0.12).
Specifically, put x,y, and annotations into a pandas data frame and call plotting.
Here is an example from my own research work.
import seaborn.objects as so
import pandas as pd
df = pd.DataFrame(..,columns=['phase','P(X=1)','text'])
fig,ax = plt.subplots()
p = so.Plot(df,x='phase',y='P(X=1)',text='text').add(so.Dot(marker='+')).add(so.Text(halign='left'))
p.on(ax).show()
I'm working with Matplotlib and have a large number of 1D heatmaps, each with their own label. However, the labels are misaligned with the plots and I cannot figure out to get this to work automatically.
Here's an MWE
import numpy as np
import matplotlib.pyplot as plt
data = np.random.rand(10, 1000)
dogs = ["woof", "bark", "bowwow"]
fig, axs = plt.subplots(10)
for i in range(10):
axs[i].scatter(np.linspace(0, 1, 1000), np.linspace(0,1,1000)*0, 2000,
c=data[i, :], marker="|", cmap='inferno')
axs[i].set_frame_on(False)
axs[i].set_yticklabels([])
axs[i].set_xticklabels([])
axs[i].set_xticks([])
axs[i].set_yticks([])
axs[i].set_ylabel(dogs[i%3], rotation='horizontal')
plt.show()
I experimented with
axs[i].yaxis.set_label_coords(x, y)
for various values of x and y, and nothing seems to work. I would prefer to have it align automatically, with the bottom of the text corresponding to the bottom of the individual plot.
Attached is an image showcasing the alignment issue.
Example
You could create your heatmaps via seaborn, and use yticklabels=[label_name] to set the labels. Rotating the labels to 0 degrees should have them nicely aligned. Note that the data is expected to have a shape of 1xN.
from matplotlib import pyplot as plt
import seaborn as sns
import numpy as np
labels = ['Alkaid', 'Mizar', 'Alioth', 'Megrez', 'Phecda', 'Merak', 'Dubhe']
nrows = len(labels)
fig, axs = plt.subplots(nrows=nrows, figsize=(10, 5))
for ax_i, data_i, label_i in zip(axs, np.random.randn(nrows, 1, 100).cumsum(axis=2), labels):
sns.heatmap(data=data_i, xticklabels=[], yticklabels=[label_i], cmap='inferno', cbar=False, ax=ax_i)
ax_i.tick_params(axis='y', rotation=0, labelsize=22, length=0) # length means length of the tick mark
plt.tight_layout()
plt.show()
After a bit of playing around, I found that
axs[0].set_ylabel("Pseudotime", fontsize=12, rotation='horizontal', ha='right', va='center')
is sufficient for aligning the y-labels.
I want to create a heatmap with seaborn, similar to this (with the following code):
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
# Create data
df = pd.DataFrame(np.random.random((5,5)), columns=["a","b","c","d","e"])
# Default heatmap
ax = sns.heatmap(df)
plt.show()
I'd also like to add a new variable (lets say new_var = pd.DataFrame(np.random.random((5,1)), columns=["new variable"])), such as that the values (and possibly the spine and ticks as well) of the y-axis are colored according to the new variable and a second color bar plotted in the same plot to represent the colors of the y-axis values. How can I do that?
This uses the new values to color the y-ticks and the y-tick labels and adds the associated colorbar.
import matplotlib.pyplot as plt
import matplotlib
import seaborn as sns
import pandas as pd
import numpy as np
# Create data
df = pd.DataFrame(np.random.random((5,5)), columns=["a","b","c","d","e"])
# Default heatmap
ax = sns.heatmap(df)
new_var = pd.DataFrame(np.random.random((5,1)), columns=["new variable"])
# Create the colorbar for y-ticks and labels
norm = plt.Normalize(new_var.min(), new_var.max())
cmap = matplotlib.cm.get_cmap('turbo')
yticks_locations = ax.get_yticks()
yticks_labels = df.index.values
#hide original ticks
ax.tick_params(axis='y', left=False)
ax.set_yticklabels([])
for var, ytick_loc, ytick_label in zip(new_var.values, yticks_locations, yticks_labels):
color = cmap(norm(float(var)))
ax.annotate(ytick_label, xy=(1, ytick_loc), xycoords='data', xytext=(-0.4, ytick_loc),
arrowprops=dict(arrowstyle="-", color=color, lw=1), zorder=0, rotation=90, color=color)
# Add colorbar for y-tick colors
sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
cb = ax.figure.colorbar(sm)
# Match the seaborn style
cb.outline.set_visible(False)
I found your problem interesting, and inspired by the unanswered comment above:
How do you change the second colorbar position? For example, one on top the other on bottom sides. - Py-ser
I decided to spend a while doing some tests. After a little digging i find that cbar_kws={"orientation": "horizontal"} is the argument for sns.heatmap that makes the colorbars horizontal.
Borrowing the code from the solution and making some changes, you can format your plot the way you want as in:
import matplotlib.pyplot as plt
import matplotlib
import seaborn as sns
import pandas as pd
import numpy as np
# Create data
df = pd.DataFrame(np.random.random((5,5)), columns=["a","b","c","d","e"])
# Default heatmap
ax = sns.heatmap(df, cbar_kws={"orientation": "horizontal"}, square = False, annot = True)
new_var = pd.DataFrame(np.random.random((5,1)), columns=["new variable"])
# Create the colorbar for y-ticks and labels
norm = plt.Normalize(new_var.min(), new_var.max())
cmap = matplotlib.cm.get_cmap('turbo')
yticks_locations = ax.get_yticks()
yticks_labels = df.index.values
#hide original ticks
ax.tick_params(axis='y', left=False)
ax.set_yticklabels([])
for var, ytick_loc, ytick_label in zip(new_var.values, yticks_locations, yticks_labels):
color = cmap(norm(float(var)))
ax.annotate(ytick_label, xy=(1, ytick_loc), xycoords='data', xytext=(-0.4, ytick_loc),
arrowprops=dict(arrowstyle="-", color=color, lw=1), zorder=0, rotation=90, color=color)
# Add colorbar for y-tick colors
sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
cb = ax.figure.colorbar(sm)
# Match the seaborn style
cb.outline.set_visible(False)
Also, you will notice that I listed the values related to each cell in the heatmap, but just out of curiosity to make it clearer to check that everything was working as expected.
I'm still not very happy with the shape/size of the horizontal colorbar, but I'll keep testing and update any progress by editing this answer!
==========================================
EDIT
just to keep track of the updates, first i tried to change just some parameters of seaborn's heatmap function but wouldn't consider this a major improvement on the task... by adding
ax = sns.heatmap(df, cbar_kws = dict(use_gridspec=True, location="top", shrink =0.6), square = True, annot = True)
I end up with:
I did get to separate the colormap using the matplotlib subplot routine and honestly i believe this is the right way given the parameter control that is possible to get here, by:
# Define two rows for subplots
fig, (cax, ax) = plt.subplots(nrows=2, figsize=(5,5.025), gridspec_kw={"height_ratios":[0.025, 1]})
# Default heatmap
ax = sns.heatmap(df, cbar=False, annot = True)
# colorbar
fig.colorbar(ax.get_children()[0], cax=cax, orientation="horizontal")
plt.show()
I obtained:
Which is still not the prettiest graph I've ever made, but now the position and size of the heatmap can be edited normally within the plt.subplots subroutines that give absolute control over these parameters.
I've spent hours on trying to do what I thought was a simple task, which is to add labels onto an XY plot while using seaborn.
Here's my code
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
df_iris=sns.load_dataset("iris")
sns.lmplot('sepal_length', # Horizontal axis
'sepal_width', # Vertical axis
data=df_iris, # Data source
fit_reg=False, # Don't fix a regression line
size = 8,
aspect =2 ) # size and dimension
plt.title('Example Plot')
# Set x-axis label
plt.xlabel('Sepal Length')
# Set y-axis label
plt.ylabel('Sepal Width')
I would like to add to each dot on the plot the text in "species" column.
I've seen many examples using matplotlib but not using seaborn.
Any ideas? Thank you.
One way you can do this is as follows:
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
%matplotlib inline
df_iris=sns.load_dataset("iris")
ax = sns.lmplot('sepal_length', # Horizontal axis
'sepal_width', # Vertical axis
data=df_iris, # Data source
fit_reg=False, # Don't fix a regression line
size = 10,
aspect =2 ) # size and dimension
plt.title('Example Plot')
# Set x-axis label
plt.xlabel('Sepal Length')
# Set y-axis label
plt.ylabel('Sepal Width')
def label_point(x, y, val, ax):
a = pd.concat({'x': x, 'y': y, 'val': val}, axis=1)
for i, point in a.iterrows():
ax.text(point['x']+.02, point['y'], str(point['val']))
label_point(df_iris.sepal_length, df_iris.sepal_width, df_iris.species, plt.gca())
Here's a more up-to-date answer that doesn't suffer from the string issue described in the comments.
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
df_iris=sns.load_dataset("iris")
plt.figure(figsize=(20,10))
p1 = sns.scatterplot(x='sepal_length', # Horizontal axis
y='sepal_width', # Vertical axis
data=df_iris, # Data source
size = 8,
legend=False)
for line in range(0,df_iris.shape[0]):
p1.text(df_iris.sepal_length[line]+0.01, df_iris.sepal_width[line],
df_iris.species[line], horizontalalignment='left',
size='medium', color='black', weight='semibold')
plt.title('Example Plot')
# Set x-axis label
plt.xlabel('Sepal Length')
# Set y-axis label
plt.ylabel('Sepal Width')
Thanks to the 2 other answers, here is a function scatter_text that makes it possible to reuse these plots several times.
import seaborn as sns
import matplotlib.pyplot as plt
def scatter_text(x, y, text_column, data, title, xlabel, ylabel):
"""Scatter plot with country codes on the x y coordinates
Based on this answer: https://stackoverflow.com/a/54789170/2641825"""
# Create the scatter plot
p1 = sns.scatterplot(x, y, data=data, size = 8, legend=False)
# Add text besides each point
for line in range(0,data.shape[0]):
p1.text(data[x][line]+0.01, data[y][line],
data[text_column][line], horizontalalignment='left',
size='medium', color='black', weight='semibold')
# Set title and axis labels
plt.title(title)
plt.xlabel(xlabel)
plt.ylabel(ylabel)
return p1
Use the function as follows:
df_iris=sns.load_dataset("iris")
plt.figure(figsize=(20,10))
scatter_text('sepal_length', 'sepal_width', 'species',
data = df_iris,
title = 'Iris sepals',
xlabel = 'Sepal Length (cm)',
ylabel = 'Sepal Width (cm)')
See also this answer on how to have a function that returns a plot:
https://stackoverflow.com/a/43926055/2641825
Below is a solution that does not iterate over rows in the data frame using the dreaded for loop.
There are many issues regarding iterating over a data frame.
The answer is don't iterate! See this link.
The solution below relies on a function (plotlabel) within the petalplot function, which is called by df.apply.
Now, I know readers will comment on the fact that I use scatter and not lmplot, but that is a bit besides the point.
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
df_iris=sns.load_dataset("iris")
def petalplot(df):
def plotlabel(xvar, yvar, label):
ax.text(xvar+0.002, yvar, label)
fig = plt.figure(figsize=(30,10))
ax = sns.scatterplot(x = 'sepal_length', y = 'sepal_width', data=df)
# The magic starts here:
df.apply(lambda x: plotlabel(x['sepal_length'], x['sepal_width'], x['species']), axis=1)
plt.title('Example Plot')
plt.xlabel('Sepal Length')
plt.ylabel('Sepal Width')
petalplot(df_iris)
Same idea with Scott Boston's answer, however with Seaborn v0.12+, you can leverage seaborn.FacetGrid.apply to add labels on plots and set up your figure in one go:
import seaborn as sns
import pandas as pd
%matplotlib inline
sns.set_theme()
df_iris = sns.load_dataset("iris")
(
sns.lmplot(
data=df_iris,
x="sepal_length",
y="sepal_width",
fit_reg=False,
height=8,
aspect=2
)
.apply(lambda grid: [
grid.ax.text(r["sepal_length"]+.02, r["sepal_width"], r["species"])
for r in df_iris.to_dict(orient="records")
])
.set(title="Example Plot")
.set_axis_labels("Sepal Length", "Sepal Width")
)
Or, if you don't need to use lmplot, also from v0.12, you can use the seaborn.objects interface. This way we don't need to manually iterate over the Iris dataframe nor refer to df_iris or column names sepal_... multiple times.
import seaborn.objects as so
(
so.Plot(df_iris, x="sepal_length", y="sepal_width", text="species")
.add(so.Dot())
.add(so.Text(halign="left"))
.label(title="Example plot", x="Sepal Length", y="Sepal Width")
.layout(size=(20, 10))
)
This produces the below figure:
Use the powerful declarative API to avoid loops (seaborn>=0.12).
Specifically, put x,y, and annotations into a pandas data frame and call plotting.
Here is an example from my own research work.
import seaborn.objects as so
import pandas as pd
df = pd.DataFrame(..,columns=['phase','P(X=1)','text'])
fig,ax = plt.subplots()
p = so.Plot(df,x='phase',y='P(X=1)',text='text').add(so.Dot(marker='+')).add(so.Text(halign='left'))
p.on(ax).show()
Here is an example that reproduces my problem:
import matplotlib.pyplot as plt
import numpy as np
data1,data2,data3,data4 = np.random.random(100),np.random.random(100),np.random.random(100),np.random.random(100)
fig,ax = plt.subplots()
ax.plot(data1)
ax.plot(data2)
ax.plot(data3)
ax2 = ax.twinx()
ax2.plot(data4)
plt.grid('on')
ax.legend(['1','2','3'], loc='center')
ax2.legend(['4'], loc=1)
How can I get the legend in the center to plot on top of the lines?
To get exactly what you have asked for, try the following. Note I have modified your code to define the labels when you generate the plot and also the colors so you don't get a repeated blue line.
import matplotlib.pyplot as plt
import numpy as np
data1,data2,data3,data4 = (np.random.random(100),
np.random.random(100),
np.random.random(100),
np.random.random(100))
fig,ax = plt.subplots()
ax.plot(data1, label="1", color="k")
ax.plot(data2, label="2", color="r")
ax.plot(data3, label="3", color="g")
ax2 = ax.twinx()
ax2.plot(data4, label="4", color="b")
# First get the handles and labels from the axes
handles1, labels1 = ax.get_legend_handles_labels()
handles2, labels2 = ax2.get_legend_handles_labels()
# Add the first legend to the second axis so it displaysys 'on top'
first_legend = plt.legend(handles1, labels1, loc='center')
ax2.add_artist(first_legend)
# Add the second legend as usual
ax2.legend(handles2, labels2)
plt.show()
Now I will add that it would be clearer if you just use a single legend adding all the lines to that. This is described in this SO post and in the code above can easily be achieved with
ax2.legend(handles1+handles2, labels1+labels2)
But obviously you may have your own reasons for wanting two legends.