How to label a violin plot on Seaborn? - python

I'm trying to change the labels in a violin plot on Seaborn. I wanna change the NU_NOTA_CN, NU_NOTA_CH, NU_NOTA_LC, NU_NOTA_MT and NU_NOTA_REDAÇÃO, and TP_ESCOLA, and the 2 and 3.
import pandas as pd
import numpy as np
import seaborn as sns
fig_dims = (10, 8)
fig, ax = plt.subplots(figsize=fig_dims)
sns.boxplot(x="DISCIPLINA", y="NOTA", hue="TP_ESCOLA", data=publica_privada_pivot)
plt.show()
plt.clf()
plt.close()
violin plot here

You can use the set_xticklabels
f, ax = plt.subplots()
sns.boxplot(x="DISCIPLINA", y="NOTA", hue="TP_ESCOLA", data=publica_privada_pivot, ax=ax)
ax.set_xticklabels([...]) # list of strings
In addition, you can use get_xticklabels, for example.
xticklabels = [t.get_text() for t in ax.get_xticklabels()]
xticklabels = [t.replace('NU_', '').replace('_', ' ').title()
ax.set_xticklabels(xticklabels)

tips = sns.load_dataset("tips")
foo = sns.boxplot(x="day", y="total_bill", data=tips)
plt.xticks([0, 1, 2, 3], ['x1', 'x2', 'x3', 'x4'])
plt.show()
The number of unique values in 'day' column (i.e. cardinality of the feature) should be the length of the lists passed to plt.xticks() function.
matplotlib.pyplot.xticks

Related

Bar labels in matplotlib/Seaborn

In version 3.4, matplotlib added automatic Bar labels:
https://matplotlib.org/stable/users/whats_new.html#new-automatic-labeling-for-bar-charts
I'm trying to use this on a bar plot generated by Seaborn.
fig, axs = plt.subplots(
nrows=2,
)
for i, col in enumerate(['col_1', 'col_2']):
ax = axs[i]
sns.barplot(
x="class",
y=col,
hue="hue_col",
data=data_df,
edgecolor=".3",
linewidth=0.5,
ax=ax
)
ax.bar_label(ax.containers[i]) # Doesn't work
What do I need to do to make this work? example plot
You can loop through the containers and call ax.bar_label(...) for each of them. Note that seaborn creates one set of bars for each hue value.
The following example uses the titanic dataset and sets ci=None to avoid the error bars overlapping with the text (if error bars are needed, one could set a lighter color, e.g. errcolor='gold').
import seaborn as sns
import matplotlib.pyplot as plt
titanic = sns.load_dataset('titanic')
fig, axs = plt.subplots(ncols=2, figsize=(12, 4))
for ax, col in zip(axs, ['age', 'fare']):
sns.barplot(
x='sex',
y=col,
hue="class",
data=titanic,
edgecolor=".3",
linewidth=0.5,
ci=None,
ax=ax
)
ax.set_title('mean ' + col)
ax.margins(y=0.1) # make room for the labels
for bars in ax.containers:
ax.bar_label(bars, fmt='%.1f')
plt.tight_layout()
plt.show()

How to remove or hide y-axis ticklabels from a matplotlib / seaborn plot

I made a plot that looks like this
I want to turn off the ticklabels along the y axis. And to do that I am using
plt.tick_params(labelleft=False, left=False)
And now the plot looks like this. Even though the labels are turned off the scale 1e67 still remains.
Turning off the scale 1e67 would make the plot look better. How do I do that?
seaborn is used to draw the plot, but it's just a high-level API for matplotlib.
The functions called to remove the y-axis labels and ticks are matplotlib methods.
After creating the plot, use .set().
.set(yticklabels=[]) should remove tick labels.
This doesn't work if you use .set_title(), but you can use .set(title='')
.set(ylabel=None) should remove the axis label.
.tick_params(left=False) will remove the ticks.
Similarly, for the x-axis: How to remove or hide x-axis labels from a seaborn / matplotlib plot?
Tested in python 3.11, pandas 1.5.2, matplotlib 3.6.2, seaborn 0.12.1
Example 1
import seaborn as sns
import matplotlib.pyplot as plt
# load data
exercise = sns.load_dataset('exercise')
pen = sns.load_dataset('penguins')
# create figures
fig, ax = plt.subplots(2, 1, figsize=(8, 8))
# plot data
g1 = sns.boxplot(x='time', y='pulse', hue='kind', data=exercise, ax=ax[0])
g2 = sns.boxplot(x='species', y='body_mass_g', hue='sex', data=pen, ax=ax[1])
plt.show()
Remove Labels
fig, ax = plt.subplots(2, 1, figsize=(8, 8))
g1 = sns.boxplot(x='time', y='pulse', hue='kind', data=exercise, ax=ax[0])
g1.set(yticklabels=[]) # remove the tick labels
g1.set(title='Exercise: Pulse by Time for Exercise Type') # add a title
g1.set(ylabel=None) # remove the axis label
g2 = sns.boxplot(x='species', y='body_mass_g', hue='sex', data=pen, ax=ax[1])
g2.set(yticklabels=[])
g2.set(title='Penguins: Body Mass by Species for Gender')
g2.set(ylabel=None) # remove the y-axis label
g2.tick_params(left=False) # remove the ticks
plt.tight_layout()
plt.show()
Example 2
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
# sinusoidal sample data
sample_length = range(1, 1+1) # number of columns of frequencies
rads = np.arange(0, 2*np.pi, 0.01)
data = np.array([(np.cos(t*rads)*10**67) + 3*10**67 for t in sample_length])
df = pd.DataFrame(data.T, index=pd.Series(rads.tolist(), name='radians'), columns=[f'freq: {i}x' for i in sample_length])
df.reset_index(inplace=True)
# plot
fig, ax = plt.subplots(figsize=(8, 8))
ax.plot('radians', 'freq: 1x', data=df)
# or skip the previous two lines and plot df directly
# ax = df.plot(x='radians', y='freq: 1x', figsize=(8, 8), legend=False)
Remove Labels
# plot
fig, ax = plt.subplots(figsize=(8, 8))
ax.plot('radians', 'freq: 1x', data=df)
# or skip the previous two lines and plot df directly
# ax = df.plot(x='radians', y='freq: 1x', figsize=(8, 8), legend=False)
ax.set(yticklabels=[]) # remove the tick labels
ax.tick_params(left=False) # remove the ticks

How to plot a paired histogram using seaborn

I would like to make a paired histogram like the one shown here using the seaborn distplot.
This kind of plot can also be referred to as the back-to-back histogram shown here, or a bihistogram inverted/mirrored along the x-axis as discussed here.
Here is my code:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
green = np.random.normal(20,10,1000)
blue = np.random.poisson(60,1000)
fig, ax = plt.subplots(figsize=(8,6))
sns.distplot(blue, hist=True, kde=True, hist_kws={'edgecolor':'black'}, kde_kws={'linewidth':2}, bins=10, color='blue')
sns.distplot(green, hist=True, kde=True, hist_kws={'edgecolor':'black'}, kde_kws={'linewidth':2}, bins=10, color='green')
ax.set_xticks(np.arange(-20,121,20))
ax.set_yticks(np.arange(0.0,0.07,0.01))
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
plt.show()
Here is the output:
When I use the method discussed here (plt.barh), I get the bar plot shown just below, which is not what I am looking for.
Or maybe I haven't understood the workaround well enough...
A simple/short implementation of python-seaborn-distplot similar to these kinds of plots would be perfect. I edited the figure of my first plot above to show the kind of plot I hope to achieve (though y-axis not upside down):
Any leads would be greatly appreciated.
You could use two subplots and invert the y-axis of the lower one and plot with the same bins.
df = pd.DataFrame({'a': np.random.normal(0,5,1000), 'b': np.random.normal(20,5,1000)})
fig =plt.figure(figsize=(5,5))
ax = fig.add_subplot(211)
ax2 = fig.add_subplot(212)
bins = np.arange(-20,40)
ax.hist(df['a'], bins=bins)
ax2.hist(df['b'],color='orange', bins=bins)
ax2.invert_yaxis()
edit:
improvements suggested by #mwaskom
fig, axes = plt.subplots(nrows=2, ncols=1, sharex=True, figsize=(5,5))
bins = np.arange(-20,40)
for ax, column, color, invert in zip(axes.ravel(), df.columns, ['teal', 'orange'], [False,True]):
ax.hist(df[column], bins=bins, color=color)
if invert:
ax.invert_yaxis()
plt.subplots_adjust(hspace=0)
Here is a possible approach using seaborn's displots.
Seaborn doesn't return the created graphical elements, but the ax can be interrogated. To make sure the ax only contains the elements you want upside down, those elements can be drawn first. Then, all the patches (the rectangular bars) and the lines (the curve for the kde) can be given their height in negative. Optionally the x-axis can be set at y == 0 using ax.spines['bottom'].set_position('zero').
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
green = np.random.normal(20, 10, 1000)
blue = np.random.poisson(60, 1000)
fig, ax = plt.subplots(figsize=(8, 6))
sns.distplot(green, hist=True, kde=True, hist_kws={'edgecolor': 'black'}, kde_kws={'linewidth': 2}, bins=10,
color='green')
for p in ax.patches: # turn the histogram upside down
p.set_height(-p.get_height())
for l in ax.lines: # turn the kde curve upside down
l.set_ydata(-l.get_ydata())
sns.distplot(blue, hist=True, kde=True, hist_kws={'edgecolor': 'black'}, kde_kws={'linewidth': 2}, bins=10,
color='blue')
ax.set_xticks(np.arange(-20, 121, 20))
ax.set_yticks(np.arange(0.0, 0.07, 0.01))
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
pos_ticks = np.array([t for t in ax.get_yticks() if t > 0])
ticks = np.concatenate([-pos_ticks[::-1], [0], pos_ticks])
ax.set_yticks(ticks)
ax.set_yticklabels([f'{abs(t):.2f}' for t in ticks])
ax.spines['bottom'].set_position('zero')
plt.show()

Plot two Histograms with Matplotlib and Python

I have two lists of numbers:
list_1 = [1,2,3,4,5,1,2,3,4,5,6,3,4,5,1,3,4,5,4,5,6,8,9,12,3,3,3,4,3,4,5,6,5,6,7,8,9,5,3,2,4,5,2,3,4,11,13,4,5,3,5,6,7,11,13,3,4,5,4,5]
list_2 = [4,5,6,7,8,9,4,5,6,7,8,9,5,6,7,8,9,6,7,8,9,12,15,16,11,12,7,8,9,7,8,9,5,6,7,8,9,7,8,9,8,9,11,10,12,16,7,8,9,10,10,8,9,8,9,10,10,10,15,16,19]
I want to plot two histograms with Python and Matplotlib so that i get result like this:
I need the line histogram, and I do not know how to plot it. I know how to make bar histogram, but I want to have line histogram so that I can see intersection of this two histograms.
plt.hist(list_1, bins = 10)
plt.hist(list_2, bins = 10)
plt.show()
The Seaborn Library can help you:
import matplotlib.pyplot as plt
import seaborn as sns
# Your Data
list_1 = [1,2,3,4,5,1,2,3,4,5,6,3,4,5,1,3,4,5,4,5,6,8,9,12,3,3,3,4,3,4,5,6,5,6,7,8,9,5,3,2,4,5,2,3,4,11,13,4,5,3,5,6,7,11,13,3,4,5,4,5]
list_2 = [4,5,6,7,8,9,4,5,6,7,8,9,5,6,7,8,9,6,7,8,9,12,15,16,11,12,7,8,9,7,8,9,5,6,7,8,9,7,8,9,8,9,11,10,12,16,7,8,9,10,10,8,9,8,9,10,10,10,15,16,19]
# Creating a displot
fig = plt.figure(figsize=(15,5))
ax = fig.add_subplot(111)
sns.distplot(list_1, kde=True, ax = ax, hist=False, bins = 10)
sns.distplot(list_2, kde=True, ax = ax, hist=False, bins = 10)
plt.show()

Histogram with Boxplot above in Python

Hi I wanted to draw a histogram with a boxplot appearing the top of the histogram showing the Q1,Q2 and Q3 as well as the outliers. Example phone is below. (I am using Python and Pandas)
I have checked several examples using matplotlib.pyplot but hardly came out with a good example. And I also wanted to have the histogram curve appearing like in the image below.
I also tried seaborn and it provided me the shape line along with the histogram but didnt find a way to incorporate with boxpot above it.
can anyone help me with this to have this on matplotlib.pyplot or using pyplot
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(style="ticks")
x = np.random.randn(100)
f, (ax_box, ax_hist) = plt.subplots(2, sharex=True,
gridspec_kw={"height_ratios": (.15, .85)})
sns.boxplot(x, ax=ax_box)
sns.distplot(x, ax=ax_hist)
ax_box.set(yticks=[])
sns.despine(ax=ax_hist)
sns.despine(ax=ax_box, left=True)
From seaborn v0.11.2, sns.distplot is deprecated. Use sns.histplot for axes-level plots instead.
np.random.seed(2022)
x = np.random.randn(100)
f, (ax_box, ax_hist) = plt.subplots(2, sharex=True, gridspec_kw={"height_ratios": (.15, .85)})
sns.boxplot(x=x, ax=ax_box)
sns.histplot(x=x, bins=12, kde=True, stat='density', ax=ax_hist)
ax_box.set(yticks=[])
sns.despine(ax=ax_hist)
sns.despine(ax=ax_box, left=True)
Solution using only matplotlib, just because:
# start the plot: 2 rows, because we want the boxplot on the first row
# and the hist on the second
fig, ax = plt.subplots(
2, figsize=(7, 5), sharex=True,
gridspec_kw={"height_ratios": (.3, .7)} # the boxplot gets 30% of the vertical space
)
# the boxplot
ax[0].boxplot(data, vert=False)
# removing borders
ax[0].spines['top'].set_visible(False)
ax[0].spines['right'].set_visible(False)
ax[0].spines['left'].set_visible(False)
# the histogram
ax[1].hist(data)
# and we are good to go
plt.show()
Expanding on the answer from #mwaskom, I made a little adaptable function.
import seaborn as sns
def histogram_boxplot(data, xlabel = None, title = None, font_scale=2, figsize=(9,8), bins = None):
""" Boxplot and histogram combined
data: 1-d data array
xlabel: xlabel
title: title
font_scale: the scale of the font (default 2)
figsize: size of fig (default (9,8))
bins: number of bins (default None / auto)
example use: histogram_boxplot(np.random.rand(100), bins = 20, title="Fancy plot")
"""
sns.set(font_scale=font_scale)
f2, (ax_box2, ax_hist2) = plt.subplots(2, sharex=True, gridspec_kw={"height_ratios": (.15, .85)}, figsize=figsize)
sns.boxplot(data, ax=ax_box2)
sns.distplot(data, ax=ax_hist2, bins=bins) if bins else sns.distplot(data, ax=ax_hist2)
if xlabel: ax_hist2.set(xlabel=xlabel)
if title: ax_box2.set(title=title)
plt.show()
histogram_boxplot(np.random.randn(100), bins = 20, title="Fancy plot", xlabel="Some values")
Image
def histogram_boxplot(feature, figsize=(15,10), bins=None):
f,(ax_box,ax_hist)=plt.subplots(nrows=2,sharex=True, gridspec_kw={'height_ratios':(.25,.75)},figsize=figsize)
sns.distplot(feature,kde=False,ax=ax_hist,bins=bins)
sns.boxplot(feature,ax=ax_box, color='Red')
ax_hist.axvline(np.mean(feature),color='g',linestyle='-')
ax_hist.axvline(np.median(feature),color='y',linestyle='--')

Categories