How to display multiple annotations in Seaborn Heatmap cells? - python

I want seaborn heatmap to display multiple values in each cell of the heatmap. Here is a manual example of what I want to see, just to be clear:
data = np.array([[0.000000,0.000000],[-0.231049,0.000000],[-0.231049,0.000000]])
labels = np.array([['A\nExtra Stuff','B'],['C','D'],['E','F']])
fig, ax = plt.subplots()
ax = sns.heatmap(data, annot = labels, fmt = '')
Here as an example to get seaborn.heat to display flightsRoundUp values in the cells.
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
def RoundUp(x):
return int(np.ceil(x/10)*10)
# Load the example flights dataset and conver to long-form
flights_long = sns.load_dataset("flights")
flights = flights_long.pivot("month", "year", "passengers")
flightsRoundUp = flights.applymap(RoundUp)
# Draw a heatmap with the numeric values in each cell
f, ax = plt.subplots(figsize=(9, 6))
sns.heatmap(flights, annot=flightsRoundUp, fmt="", linewidths=.5, ax=ax)
What is the best way to display both flightsRoundUp and flights in all cells? Something like the first manual example above, but for all the cells in a vectorized-like way...

Rotail's answer didn't work for me, I got an error when applying that lambda function.
However, I found a solution that exploits the fact that seaborn plots sequential figures on top of each other. All you have to do is use one call to heatmap to establish the figure, and then a subsequent call for each of the annotations. Use the annot_kws arg to make sure the text aren't written over eachother.
X = pd.DataFrame({'a':[1, 2, 3], 'b':[4, 5, 6]})
Y = pd.DataFrame({'A':['A', 'B', 'C'], 'B':['E', 'F', 'G']})
Z = pd.DataFrame({'A':['(Extra Stuff)', '(Extra Stuff)', '(Extra Stuff)'], 'B':['(Extra Stuff)', '(Extra Stuff)', '(Extra Stuff)']})
sns.heatmap(X, annot=False)
sns.heatmap(X, annot=Y, annot_kws={'va':'bottom'}, fmt="", cbar=False)
sns.heatmap(X, annot=Z, annot_kws={'va':'top'}, fmt="", cbar=False)

Following works for me too:
X = pd.DataFrame({'a':[1, 2, np.nan], 'b':[10, 20, 30]})
Y = pd.DataFrame({'A':[11, 222, np.nan], 'B':[110, np.nan, 330]})
# convert to string
X_value_ann = (X).astype('|S5').reset_index()
Y_value_ann = (Y).astype('|S5').reset_index()
# define () and new line to glue on later
br = np.char.array(pd.DataFrame('\n(', index=X_value_ann.index, columns=X_value_ann.columns))
cl = np.char.array(pd.DataFrame(')', index=X_value_ann.index, columns=X_value_ann.columns))
# convert to chararray
X_value_ann = np.char.array(X_value_ann)
Y_value_ann = np.char.array(Y_value_ann)
# glue and reshape
my_annotation = pd.DataFrame(X_value_ann+br+Y_value_ann+cl)
my_annotation = my_annotation.applymap(lambda x: x.decode('utf-8'))
my_annotation = my_annotation.drop(columns=[0])
my_annotation

you should be able to set fmt="" and format you labels with appropriate "\n" to have multiple lines of annotations.
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
np.random.seed(0)
sns.set_theme()
uniform_data = np.random.rand(4, 4)
fig,ax = plt.subplots(figsize=(50,20))
uniform_data_labels = \[\]
for i in uniform_data:
tmp_arr=\[\]
for j in i:
tmp_arr.append('Example\nExample')
uniform_data_labels.append(tmp_arr)
sns.heatmap(uniform_data, vmin=0, vmax=1, annot=uniform_data_labels ,ax=ax,fmt="",annot_kws={"fontsize":30})
plt.show()
1

Related

Pandas hist subplots - adding colour bar for the colours of each histogram

I have the columns of a dataframe plotted as separate histogram subplots. For each subplot, I want the bars coloured according to the value in a separate list. I have managed this by making a cmap of it and manually cycling those colours, however, is there a way to add a colorbar to the side to show what values these colours belong to? This is what I have right now:
import pandas as pd
import matplotlib as mpl
from matplotlib.colors import rgb2hex
#reading in the data
df = pd.read_csv( "shortlist_temp.dat", sep='\t',header=(0), usecols=(range(1,13)))
#separate list of values
orig_star_teff = [4308.0, 5112.0, 4240.0, 4042.0, 4411.0, 4100.0, 4511.0, 4738.0, 4630.0, 4870.0, 4442.0, 4845.0]
#Colormapping the values. I did not like the result from the original values so I reduced by 4000.
orig_star_teff_norm = [i - 4000 for i in orig_star_teff]
orig_star_teff_norm = [float(i)/max(orig_star_teff_norm) for i in orig_star_teff_norm]
cmap = mpl.cm.plasma
color_list = cmap(orig_star_teff_norm)
color_list2 = [ rgb2hex(color_list[i,:]) for i in range(color_list.shape[0]) ]
mpl.rcParams['axes.prop_cycle'] = mpl.cycler(color = color_list2)
ax = df.plot.hist(subplots=True, bins = 12, legend=False, layout=(3, 4), figsize = (15,10), sharey = True)
ax[0,0].set_title('ABOO')
ax[0,1].set_title('EpsVIR')
ax[0,2].set_title('HIP 96014')
ax[0,3].set_title('2M16113361')
ax[1,0].set_title('KIC 3955590')
ax[1,1].set_title('KIC 5113061')
ax[1,2].set_title('KIC 5859492')
ax[1,3].set_title('KIC 6547007')
ax[2,0].set_title('KIC 11444313')
ax[2,1].set_title('KIC 11657684')
ax[2,2].set_title('HD102328-K3III')
ax[2,3].set_title('HD142091-K0III')
Resulting plot
Instead of doing all the normalization steps manually, it probably is easier to create a norm. In this case a norm that maps the values from 4000 till max to the range 0,1 needed for the colormap. Note that converting to hex isn't necessary.
With the norm and the colormap a ScalarMapple can be created with all the necessary information for a colorbar:
import pandas as pd
import matplotlib as mpl
from matplotlib.cm import ScalarMappable
# reading in the data
# df = pd.read_csv("shortlist_temp.dat", sep='\t', header=(0), usecols=(range(1, 13)))
# generating some dummy data
df = pd.DataFrame(np.random.randn(100, 12))
# separate list of values
orig_star_teff = [4308.0, 5112.0, 4240.0, 4042.0, 4411.0, 4100.0, 4511.0, 4738.0, 4630.0, 4870.0, 4442.0, 4845.0]
norm = plt.Normalize(4000, max(orig_star_teff))
cmap = mpl.cm.plasma
color_list = cmap(norm(orig_star_teff))
mpl.rcParams['axes.prop_cycle'] = mpl.cycler(color=color_list)
axs = df.plot.hist(subplots=True, bins=12, legend=False, layout=(3, 4), figsize=(15, 10), sharey=True)
titles = ['ABOO', 'EpsVIR', 'HIP 96014', '2M16113361',
'KIC 3955590', 'KIC 5113061', 'KIC 5859492', 'KIC 6547007',
'KIC 11444313', 'KIC 11657684', 'HD102328-K3III', 'HD142091-K0III']
for ax, title in zip(axs.flat, titles):
ax.set_title(title)
plt.colorbar(ScalarMappable(cmap=cmap, norm=norm), ax=axs[:, -1])
plt.show()

Wrap xlabels in Seaborn Plot

Have been trying to modify me plot such that the xlabels can be wrapped.
Have looked at few suggestions from similar questions.
But am unable to use them on this.
The ax.set_xticklabels code does not wrap the labels.
The plt.xticks code throws an error -
AttributeError: 'Text' object has no attribute 'expandtabs'
plt.figure(figsize = (7,5))
ax = sns.countplot(data = df3, x = df3.PaymentMethod, hue = df3.Churn)
#ax.set_xticklabels(ax.get_xticklabels(), ha="right", horizontalalignment = 'center', wrap = True)
plt.xticks([textwrap.fill(label, 10) for label in ax.get_xticklabels()],
rotation = 10, fontsize=8, horizontalalignment="center")
Image of plot with overlapping xlabels
textwrap works as expected with the code suggested in the comments:
import numpy as np # v 1.19.2
import pandas as pd # v 1.1.3
import seaborn as sns # v 0.11.0
import textwrap
# Create sample dataset
rng = np.random.default_rng(seed=1)
cat_names = ['Short name', 'Slightly longer name', 'Rather much longer name',
'Longest name of them all by far']
counts = rng.integers(10, 100, len(cat_names))
var_cat = np.repeat(cat_names, counts)
var_bool = rng.choice(['True', 'False'], size=len(var_cat))
df = pd.DataFrame(dict(vcat=var_cat, vbool=var_bool))
# Plot seaborn countplot with wrapped tick labels
ax = sns.countplot(data=df, x='vcat', hue='vbool')
labels = [textwrap.fill(label.get_text(), 12) for label in ax.get_xticklabels()]
ax.set_xticklabels(labels);

How to combine two heatmaps in Seaborn in Python so both are shown in the same heatmap?

This is link to the data I'm using:
https://github.com/fivethirtyeight/data/tree/master/drug-use-by-age
I'm using Jupyter Lab, and here's the code:
from matplotlib import pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sb
url = 'https://raw.githubusercontent.com/fivethirtyeight/data/master/drug-use-by-age/drug-use-by-age.csv'
df = pd.read_csv(url, index_col = 0)
df.dtypes
df.replace('-', np.nan, inplace=True)
df = df.iloc[:,:].astype(float)
df = df.loc[:, df.columns != 'n']
#df.columns = df.columns.str.rstrip('-use')
df
fig, axes = plt.subplots(1,2, figsize=(20, 8))
fig.subplots_adjust(wspace=0.1)
fig.colorbar(ax.collections[0], ax=ax,location="right", use_gridspec=False, pad=0.2)
#plt.figure(figsize=(16, 16))
df_percentage = df.iloc[:,range(0,26,2)]
plot_precentage = sb.heatmap(df_percentage, cmap='Reds', ax=axes[0], cbar_kws={'format': '%.0f%%', 'label': '% used in past 12 months'})
df_frequency = df.iloc[:,range(1,27,2)]
plot_frequency = sb.heatmap(df_frequency, cmap='Blues', ax=axes[1], cbar_kws= dict(label = 'median frequency a user used'))
I can just show two of them in a subplot in separate diagrams.
I want to make it look like this (this is made in paint):
Also show the data side by side. Is there a simple way to achieve that?
A pretty simple solution with mask option:
mask = np.vstack([np.arange(df.shape[1])]* df.shape[0]) % 2
fig, axes = plt.subplots()
plot_precentage = sns.heatmap(df,mask=mask, cmap='Reds', ax=axes,
cbar_kws={'format': '%.0f%%',
'label': '% used in past 12 months'}
)
plot_frequency = sns.heatmap(df, mask=1-mask, cmap='Blues', ax=axes,
cbar_kws= dict(label = 'median frequency a user used')
)
Output:

Plot facetted barchart using summary statistics within Python

Is there a concise way to plot summary statistics in Python as a boxplot? The code below gives a barchart of each mean, I want to swap each barchart to a boxplot.
I realise that I don't need to summarise, however with the real data, just plotting one of the boxes took a long time (even with showfliers=False); I don't need to see the outliers and I will also want to add a "population-wide" bar (i.e. across all clusters) for each "pc" (any suggestions for that would be greatly appreciated .. I am again attempting to move from R to python and just getting these few lines of code took long enough)
import matplotlib.pyplot as plt
import seaborn as sns
out = pd.DataFrame({'cluster':['a']*100+['b']*100,
'pc': ['w', 'x', 'y', 'z']*50,
'value': np.random.normal(size=200)})
grouped = out.groupby(['cluster', 'pc'])
out = grouped.describe()
out = out.reset_index()
out.columns = [e[0] if e[0] != 'value' else e[1] for e in out.columns.tolist()]
#sns.catplot(x='cluster', y='mean', col='pc', kind='bar', data=out)
g = sns.FacetGrid(out, col="pc", col_wrap = 2)
g = g.map(plt.bar, "cluster", "mean")
You can draw a boxplot from the summary statistics using Axes.bxp(). This needs to be encapsulated in a custom plotting function passed to map():
def my_bxp(**kwargs):
ax = plt.gca()
data = kwargs.pop('data')
color = kwargs.pop('color')
bxpstats = []
for _,row in data.iterrows():
print(row)
d = {'med': row.loc['50%'],
'q1': row.loc['25%'],
'q3': row.loc['75%'],
'whislo': row.loc['min'],
'whishi': row.loc['max'],
'label': row.loc['cluster']}
bxpstats.append(d)
ax.bxp(bxpstats, showfliers=False, boxprops=dict(color=color),
whiskerprops=dict(color=color),
capprops=dict(color=color))
g = sns.FacetGrid(out, col="pc", col_wrap = 2)
g = g.map_dataframe(my_bxp)
Note that, for simplicity's sake, I have the whiskers extend from min to max, which is not the usual representation. You may have to calculate proper whiskers extents when you calculate your summary statistics if that's what you want.
It's easier to draw multiple boxplots with the original data intact.
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import random
df = pd.DataFrame({'cluster':['a']*100+['b']*100,
'pc': ['w', 'x', 'y', 'z']*50,
'value': np.random.normal(size=200)})
c = ['a']*25+['b']*25
df1 = pd.concat([pd.Series(c),
df[df['pc'] == 'w']['value'].reset_index(drop=True),
df[df['pc'] == 'x']['value'].reset_index(drop=True),
df[df['pc'] == 'y']['value'].reset_index(drop=True),
df[df['pc'] == 'z']['value'].reset_index(drop=True)], axis=1, ignore_index=True)
df1.columns = ['cluster','w','x','y','z']
fig, axes = plt.subplots(1, 4, figsize=(8, 4)
fig.subplots_adjust(wspace=0.3, hspace=0.4)
ax = sns.boxplot(x="cluster", y='w', data=df1, orient='v', ax=axes[0])
ax = sns.boxplot(x="cluster", y='x', data=df1, orient='v', ax=axes[1])
ax = sns.boxplot(x="cluster", y='y', data=df1, orient='v', ax=axes[2])
ax = sns.boxplot(x="cluster", y='z', data=df1, orient='v', ax=axes[3])

Merging legends of different lineplots using seaborn

I would like to plot two dataframes with a 'long' representation, and differing axis, to one plot using sns.lineplot(). Yet, I am failing plot it with a single legend containing the elements of both lineplots.
The issue is similar to this: Secondary axis with twinx(): how to add to legend?, though I'd like to use seaborn.
A minimal working example up to the point I got stuck is given below.
import pandas as pd
import seaborn as sns
import numpy as np
import itertools
# mock dataset
lst = range(1,11)
steps1 = list(itertools.chain.from_iterable(itertools.repeat(x, 4) for x in lst))
labels1 = ['A','B']*20
values1 = list(np.random.uniform(0,1,40))
df1 = pd.DataFrame({'steps':steps1, 'lab':labels1, 'vals':values1})
lst = range(6,11)
steps2 = list(itertools.chain.from_iterable(itertools.repeat(x, 4) for x in lst))
labels2 = ['C','D']*10
values2 = list(np.random.uniform(10,20,20))
df2 = pd.DataFrame({'steps':steps2, 'lab2':labels2, 'others':values2})
# plotting
fig, ax = plt.subplots()
fig = sns.lineplot(x='steps',y='vals', data=df1, hue='lab',palette='bright', legend='brief')
ax2 = ax.twinx()
fig2 = sns.lineplot(x='steps',y='others', hue='lab2', data=df2 ,palette='dark', legend='brief')
# How do I merge the legends into one?
# the solution below gives me one merged and one separate legend
h1,l1 = fig.get_legend_handles_labels()
h2,l2 = fig2.get_legend_handles_labels()
ax.legend(loc=3, handles=h1+h2, labels = l1+l2)
I just resolved it by removing the obsolete legend by ax2.get_legend().remove().

Categories