Plot another point on top of swarmplot - python

I want to plot a "highlighted" point on top of swarmplot like this
The swarmplot don't have the y-axis, so I have no idea how to plot that point.
import seaborn as sns
sns.set(style="whitegrid")
tips = sns.load_dataset("tips")
ax = sns.swarmplot(x=tips["total_bill"])

This approach is predicated on knowing the index of the data point you wish to highlight, but it should work - although if you have multiple swarmplots on a single Axes instance it will become slightly more complex.
import matplotlib.pyplot as plt
import matplotlib
import seaborn as sns
sns.set(style="whitegrid")
tips = sns.load_dataset("tips")
ax = sns.swarmplot(x=tips["total_bill"])
artists = ax.get_children()
offsets = []
for a in artists:
if type(a) is matplotlib.collections.PathCollection:
offsets = a.get_offsets()
break
plt.scatter(offsets[50,0], offsets[50,1], marker='o', color='orange', zorder=10)

You can highlight a point/s using the hue attribute if you add a grouping variable for the y axis (so that they appear as a single group), and then use another variable to highlight the point that you're interested in.
Then you can remove the y labels and styling and legend.
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style="whitegrid")
# Get data and mark point you want to highlight
tips = sns.load_dataset("tips")
tips['highlighted_point'] = 0
tips.loc[tips[tips.total_bill > 50].index, 'highlighted_point'] = 1
# Add holding 'group' variable so they appear as one
tips['y_variable'] = 'testing'
# Use 'hue' to differentiate the highlighted point
ax = sns.swarmplot(x=tips["total_bill"], y=tips['y_variable'], hue=tips['highlighted_point'])
# Remove legend
ax.get_legend().remove()
# Hide y axis formatting
ax.set_ylabel('')
ax.get_yaxis().set_ticks([])
plt.show()

Related

Change y-axis scale - FacetGrid

I cannot work out how to change the scale of the y-axis. My code is:
grid = sns.catplot(x='Nationality', y='count',
row='Age', col='Gender',
hue='Type',
data=dfNorthumbria2, kind='bar', ci='No')
I wanted to just go up in full numbers rather than in .5
Update
I just now found this tutorial the probably easiest solution will be the following:
grid.set(yticks=list(range(5)))
From the help of grid.set
Help on method set in module seaborn.axisgrid:
set(**kwargs) method of seaborn.axisgrid.FacetGrid instance
Set attributes on each subplot Axes.
Since seaborn is build on top of matplotlib you can use yticks from plt
import matplotlib.pyplot as plt
plt.yticks(range(5))
However this changed only the yticks of the upper row in my mockup example.
For this reason you probably want to change the y ticks based on the axis with ax.set_yticks(). To get the axis from your grid object you can implemented a list comprehension as follows:
[ax[0].set_yticks(range(0,150,5) )for ax in grid.axes]
A full replicable example would look like this (adapted from here)
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(style="ticks")
exercise = sns.load_dataset("exercise")
grid = sns.catplot(x="time", y="pulse", hue="kind",
row="diet", data=exercise)
# plt.yticks(range(0,150,5)) # Changed only one y-axis
# Changed y-ticks to steps of 20
[ax[0].set_yticks(range(0,150,20) )for ax in grid.axes]

Add Second Colorbar to a Seaborn Heatmap / Clustermap

I was trying to help someone add a colorbar for the vertical blue bar in the image below. We tried many variations of plt.colorbar(row_colors) (like above and below sns.clustermap()) and looked around online for 2 hours, but no luck. We just want to add a colorbar for the blues, please help!
import pickle
import numpy as np
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
feat_mat, freq, label = pickle.load(open('file.pkl', 'rb'))
feat_mat_df = pd.DataFrame(feat_mat[4])
freq_df = pd.DataFrame(freq)
freq_df_transposed = freq_df.transpose()
my_palette = dict(zip(set(freq_df_transposed[int('4')]), sns.color_palette("PuBu", len(set(freq_df_transposed[int('4')]))))))
row_colors = freq_df_transposed[int('4')].map(my_palette)
sns.clustermap(feat_mat_df, metric="euclidean", standard_scale=1, method="complete", cmap="coolwarm", row_colors = row_colors)
plt.show()
This is where he based his code from: #405 Dendrogram with heatmap and coloured leaves
I think something like this should work for your purposes- I didn't have a clustermap example available but the logic is the same to do what you want to do. Basically-you're going to take that list of colors you made and imshow it, then hide the imshow plot, and plot the colorbar in its place.
In my example, I use make_axes_locatable to place axes next to the plot with your data to put the colorbar inside - https://matplotlib.org/2.0.2/mpl_toolkits/axes_grid/users/overview.html. I find placing a new axes for other objects (legends color maps or otherwise) easier than trying to draw them on the same axes.
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np; np.random.seed(0)
import seaborn as sns
from mpl_toolkits.axes_grid1 import make_axes_locatable
import random
uniform_data = np.random.rand(10, 12)
fig, ax = plt.subplots(1,1, figsize = (5,5))
divider = make_axes_locatable(ax)
axDivY = divider.append_axes( 'right', size=0.2, pad= 0.1)
axDivY2 = divider.append_axes( 'right', size=0.2, pad= 0.2)
# we will use this for the colorscale bar
axDivY3 = divider.append_axes( 'right', size=0.2, pad= 0.2)
ax1 = sns.heatmap(uniform_data, ax=ax, cbar_ax=axDivY)
# the palette you were using to make the label column on the clustermap
# some simulated labels for your data with values
color_label_list =[random.randint(0,20) for i in range(20)]
pal = sns.color_palette("PuBu", len(set(color_label_list)))
n = len(pal)
size = 1
# plot the colors with imshow to make a colormap later
ax2 = axDivY2.imshow(np.array([color_label_list]),
cmap=mpl.colors.ListedColormap(list(pal)),
interpolation="nearest", aspect="auto")
# turn off the axes so they aren't visible- note that you need ax.axis('off) if you have older matplotlib
axDivY2.set_axis_off()
axDivY2.set_visible(False)
# plot the colorbar on the other axes (which is on top of the one that we turned off)
plt.colorbar(ax2, cax = axDivY3) ;

bar plot does not respect order of the legend text in matplotlib

Just noticed that the legend text doesnt have the same order as the plot bars. I would expect to see the "Banana" in first place of the legend. Is it possible to correct such behavior? Thanks
My code is:
import matplotlib.pyplot as plt
import pandas as pd
df = pd.DataFrame({"Apple" : [2,3,4,1], "Banana" : [4,2,1,2]})
ax = df.plot.barh()
ax.legend()
plt.show()
And my graph:
The legend labels are actually ordered correctly. Matplotlib's vertical axes by default start at the bottom and reach upwards. Hence the blue bars come first, just as in the legend.
You can invert the legend handles and labels:
h, l = ax.get_legend_handles_labels()
ax.legend(h[::-1], l[::-1])
You may also decide to invert the y axis.
ax = df.plot.barh()
ax.invert_yaxis()
Order of legend handlers is selected by columns ordering, you have to sort columns' names of dataframe in reversed order (use reindex_axis for column axis).
import matplotlib.pyplot as plt
import pandas as pd
df = pd.DataFrame({"Apple" : [2,3,4,1], "Banana" : [4,2,1,2]})
df = df.reindex_axis(reversed(sorted(df.columns)), axis = 1)
ax = df.plot.barh()
ax.legend()
plt.show()

Xaxis-Tick labels have disappeared plotting with pandas onto subplot, but are still stored

Using a complicated script that nests among other pandas.DataFrame.plot() and GridSpec in a subplot setting, I have the following problem:
When I create a 2-cols 1-row gridspec, the tick lables are all correct. When I create a 1-col 2-rows gridspec however, as soon as I plot onto the first (upper row) axes using pandas.DataFrame.plot(), the x-ticklabels for the top row disappear (the ticks remain).
It is not the case that the top ticks change once I draw something on the lower ax, sharex appears to not be the issue.
However, my x-labels are still stored:
axes[0].get_xaxis().get_ticklabels()
Out[59]:
<a list of 9 Text major ticklabel objects>
It's just that they're not displayed. I suspected a NullFormatter, but that's not the case either:
axes[0].get_xaxis().get_major_formatter()
Out[57]:
<matplotlib.ticker.ScalarFormatter at 0x7f7414330710>
I get both ticks and labels on the top of the first axes when I do
axes[0].get_xaxis().tick_top()
However, when I then go back to tick_bottom(), I only have ticks on bottom, not the labels.
What can cause my stored labels to not to be displayed despite a "normal" formatter?
Here's a simple example:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from matplotlib import gridspec
df = pd.DataFrame(np.random.rand(100,2), columns=['A', 'B'])
figure = plt.figure()
GridSpec = gridspec.GridSpec(nrows=2, ncols=1)
[plt.subplot(gsSpec) for gsSpec in GridSpec]
axes = figure.axes
df.plot(secondary_y=['B'], ax=axes[0], sharex=False)
It's the secondary_y=['B'] that causes the xticks to disappear. I'm not sure why it does that.
Fortunately, you can use plt.setp(ax.get_xticklabels(), visible=True) (docs) to turn them back on manually:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from matplotlib import gridspec
df = pd.DataFrame(np.random.rand(100,2), columns=['A', 'B'])
figure = plt.figure()
GridSpec = gridspec.GridSpec(nrows=2, ncols=1)
axes = [plt.subplot(gsSpec) for gsSpec in GridSpec]
ax = axes[0]
df.plot(secondary_y=['B'], ax=ax, sharex=True)
plt.setp(ax.get_xticklabels(), visible=True)

Rotate label text in seaborn factorplot

I have a simple factorplot
import seaborn as sns
g = sns.factorplot("name", "miss_ratio", "policy", dodge=.2,
linestyles=["none", "none", "none", "none"], data=df[df["level"] == 2])
The problem is that the x labels all run together, making them unreadable. How do you rotate the text so that the labels are readable?
I had a problem with the answer by #mwaskorn, namely that
g.set_xticklabels(rotation=30)
fails, because this also requires the labels. A bit easier than the answer by #Aman is to just add
plt.xticks(rotation=45)
You can rotate tick labels with the tick_params method on matplotlib Axes objects. To provide a specific example:
ax.tick_params(axis='x', rotation=90)
This is still a matplotlib object. Try this:
# <your code here>
locs, labels = plt.xticks()
plt.setp(labels, rotation=45)
Any seaborn plots suported by facetgrid won't work with (e.g. catplot)
g.set_xticklabels(rotation=30)
however barplot, countplot, etc. will work as they are not supported by facetgrid. Below will work for them.
g.set_xticklabels(g.get_xticklabels(), rotation=30)
Also, in case you have 2 graphs overlayed on top of each other, try set_xticklabels on graph which supports it.
If anyone wonders how to this for clustermap CorrGrids (part of a given seaborn example):
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(context="paper", font="monospace")
# Load the datset of correlations between cortical brain networks
df = sns.load_dataset("brain_networks", header=[0, 1, 2], index_col=0)
corrmat = df.corr()
# Set up the matplotlib figure
f, ax = plt.subplots(figsize=(12, 9))
# Draw the heatmap using seaborn
g=sns.clustermap(corrmat, vmax=.8, square=True)
rotation = 90
for i, ax in enumerate(g.fig.axes): ## getting all axes of the fig object
ax.set_xticklabels(ax.get_xticklabels(), rotation = rotation)
g.fig.show()
You can also use plt.setp as follows:
import matplotlib.pyplot as plt
import seaborn as sns
plot=sns.barplot(data=df, x=" ", y=" ")
plt.setp(plot.get_xticklabels(), rotation=90)
to rotate the labels 90 degrees.
For a seaborn.heatmap, you can rotate these using (based on #Aman's answer)
pandas_frame = pd.DataFrame(data, index=names, columns=names)
heatmap = seaborn.heatmap(pandas_frame)
loc, labels = plt.xticks()
heatmap.set_xticklabels(labels, rotation=45)
heatmap.set_yticklabels(labels[::-1], rotation=45) # reversed order for y
One can do this with matplotlib.pyplot.xticks
import matplotlib.pyplot as plt
plt.xticks(rotation = 'vertical')
# Or use degrees explicitly
degrees = 70 # Adjust according to one's preferences/needs
plt.xticks(rotation=degrees)
Here one can see an example of how it works.
Use ax.tick_params(labelrotation=45). You can apply this to the axes figure from the plot without having to provide labels. This is an alternative to using the FacetGrid if that's not the path you want to take.
If the labels have long names it may be hard to get it right. A solution that worked well for me using catplot was:
import matplotlib.pyplot as plt
fig = plt.gcf()
fig.autofmt_xdate()

Categories