Pandas Bar plot, how to annotate grouped horizontal bar charts - python

I ask this question because I haven't found a working example on how to annotate grouped horizontal Pandas bar charts yet. I'm aware of the following two:
Annotate bars with values on Pandas bar plots
Pandas, Bar Chart Annotations
But they are all about vertical bar charts. I.e., either don't have a solution for horizontal bar chart, or it is not fully working.
After several weeks working on this issue, I finally am able to ask the question with a sample code, which is almost what I want, just not 100% working. Need your help to reach for that 100%.
Here we go, the full code is uploaded here. The result looks like this:
You can see that it is almost working, just the label is not placed at where I want and I can't move them to a better place myself. Besides, because the top of the chart bar is used for displaying error bar, so what I really want is to move the annotate text toward the y-axis, line up nicely on either left or right side of y-axis, depending the X-value. E.g., this is what my colleagues can do with MS Excel:
Is this possible for Python to do that with Pandas chart?
I'm including the code from my above url for the annotation, one is my all-that-I-can-do, and the other is for the reference (from In [23]):
# my all-that-I-can-do
def autolabel(rects):
#if height constant: hbars, vbars otherwise
if (np.diff([plt.getp(item, 'width') for item in rects])==0).all():
x_pos = [rect.get_x() + rect.get_width()/2. for rect in rects]
y_pos = [rect.get_y() + 1.05*rect.get_height() for rect in rects]
scores = [plt.getp(item, 'height') for item in rects]
else:
x_pos = [rect.get_width()+.3 for rect in rects]
y_pos = [rect.get_y()+.3*rect.get_height() for rect in rects]
scores = [plt.getp(item, 'width') for item in rects]
# attach some text labels
for rect, x, y, s in zip(rects, x_pos, y_pos, scores):
ax.text(x,
y,
#'%s'%s,
str(round(s, 2)*100)+'%',
ha='center', va='bottom')
# for the reference
ax.bar(1. + np.arange(len(xv)), xv, align='center')
# Annotate with text
ax.set_xticks(1. + np.arange(len(xv)))
for i, val in enumerate(xv):
ax.text(i+1, val/2, str(round(val, 2)*100)+'%', va='center',
ha='center', color='black')
Please help. Thanks.

So, I changed a bit the way you construct your data for simplicity:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
sns.set_style("white") #for aesthetic purpose only
# fake data
df = pd.DataFrame({'A': np.random.choice(['foo', 'bar'], 100),
'B': np.random.choice(['one', 'two', 'three'], 100),
'C': np.random.choice(['I1', 'I2', 'I3', 'I4'], 100),
'D': np.random.randint(-10,11,100),
'E': np.random.randn(100)})
p = pd.pivot_table(df, index=['A','B'], columns='C', values='D')
e = pd.pivot_table(df, index=['A','B'], columns='C', values='E')
ax = p.plot(kind='barh', xerr=e, width=0.85)
for r in ax.patches:
if r.get_x() < 0: # it it's a negative bar
ax.text(0.25, # set label on the opposite side
r.get_y() + r.get_height()/5., # y
"{:" ">7.1f}%".format(r.get_x()*100), # text
bbox={"facecolor":"red",
"alpha":0.5,
"pad":1},
fontsize=10, family="monospace", zorder=10)
else:
ax.text(-1.5, # set label on the opposite side
r.get_y() + r.get_height()/5., # y
"{:" ">6.1f}%".format(r.get_width()*100),
bbox={"facecolor":"green",
"alpha":0.5,
"pad":1},
fontsize=10, family="monospace", zorder=10)
plt.tight_layout()
which gives:
I plot the label depending on the mean value and put it on the other side of the 0-line so you're pretty sure that it will never overlap to something else, except an error bar sometimes. I set a box behind the text so it reflects the value of the mean.
There are some values you'll need to adjust depending on your figure size so the labels fit right, like:
width=0.85
+r.get_height()/5. # y
"pad":1
fontsize=10
"{:" ">6.1f}%".format(r.get_width()*100) : set total amount of char in the label (here, 6 minimum, fill with white space on the right if less than 6 char). It needs family="monospace"
Tell me if something isn't clear.
HTH

Related

Create a stacked bar plot of percentages and annotate with count

I have this data (df) and I get their percentages (data=rel) and plotted a stacked bar graph.
Now I want to add values (non percentage values) to the centers of each bar but from my first dataframe.
My code for now:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from csv import reader
import seaborn as sns
df = pd.DataFrame({'IL':['Balıkesir', 'Bursa', 'Çanakkale', 'Edirne', 'İstanbul', 'Kırklareli', 'Kocaeli', 'Sakarya','Tekirdağ','Yalova'],'ENGELLIUYGUN':[7,13,3,1,142,1,14,1,2,2],'ENGELLIUYGUNDEGIL':[1,5,0,0,55,0,3,0,1,0]})
iller=df.iloc[:,[0]]
df_total = df["ENGELLIUYGUN"] + df["ENGELLIUYGUNDEGIL"]
df_rel = df[df.columns[1:]].div(df_total, 0)*100
rel=[]
rel=pd.DataFrame(df_rel)
rel['İller'] = iller
d=df.iloc[:,[1]] #I want to add these values to the center of blue bars.
f=df.iloc[:,[2]] #I want to add these values to the center of green bars.
sns.set_theme (style='whitegrid')
ax=rel.plot(x='İller',kind='bar', stacked=True, color=["#3a88e2","#5c9e1e"], label=("Uygun","Uygun Değil"))
plt.legend(["Evet","Hayır"],fontsize=8, bbox_to_anchor=(1, 0.5))
plt.xlabel('...........',fontsize=12)
plt.ylabel('..........',fontsize=12)
plt.title('.............',loc='center',fontsize=14)
plt.ylim(0,100)
ax.yaxis.grid(color='gray', linestyle='dashed')
plt.show()
I have this for now:
I want the exact same style of this photo:
I am using Anaconda-Jupyter Notebook.
Answering: I want to add values (non percentage values) to the centers of each bar but from my first dataframe.
The correct way to annotate bars, is with .bar_label, as explained in this answer.
The values from df can be sent to the label= parameter instead of the percentages.
This answer shows how to succinctly calculate the percentages, but plots the counts and annotates with percentage and value, whereas this OP wants to plot the percentage on the y-axis and annotate with counts.
This answer shows how to place the legend at the bottom of the plot.
This answer shows how to format the axis tick labels as percent.
See pandas.DataFrame.plot for an explanation of the available parameters.
I am using Anaconda-Jupyter Notebook. Everything from the comment, # plot percent; ..., should be in the same notebook cell.
Tested in python 3.11, pandas 1.5.2, matplotlib 3.6.2
import pandas as pd
import matplotlib.ticker as tkr
# sample data
df = pd.DataFrame({'IL': ['Balıkesir', 'Bursa', 'Çanakkale', 'Edirne', 'İstanbul', 'Kırklareli', 'Kocaeli', 'Sakarya','Tekirdağ','Yalova'],
'ENGELLIUYGUN': [7, 13, 3, 1, 142, 1, 14, 1, 2, 2],
'ENGELLIUYGUNDEGIL': [1, 5, 0, 0, 55, 0, 3, 0, 1, 0]})
# set IL as the index
df = df.set_index('IL')
# calculate the percent
per = df.div(df.sum(axis=1), axis=0).mul(100)
# plot percent; adjust rot= for the rotation of the xtick labels
ax = per.plot(kind='bar', stacked=True, figsize=(10, 8), rot=0,
color=['#3a88e2', '#5c9e1e'], yticks=range(0, 101, 10),
title='my title', ylabel='', xlabel='')
# move the legend
ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.05), ncol=2, frameon=False)
# format the y-axis tick labels
ax.yaxis.set_major_formatter(tkr.PercentFormatter())
# iterate through the containers
for c in ax.containers:
# get the current segment label (a string); corresponds to column / legend
col = c.get_label()
# use label to get the appropriate count values from df
# customize the label to account for cases when there might not be a bar section
labels = [v if v > 0 else '' for v in df[col]]
# the following will also work
# labels = df[col].replace(0, '')
# add the annotation
ax.bar_label(c, labels=labels, label_type='center', fontweight='bold')
Alternate Annotation Implementation
Since the column names in df and per are the same, they can be extracted directly from per.
# iterate through the containers and per column names
for c, col in zip(ax.containers, per):
# add the annotations with custom labels from df
ax.bar_label(c, labels=df[col].replace(0, ''), label_type='center', fontweight='bold')
I don't think any subtle method exist. So you have to print those yourself, adding explicitly text. Which is not that hard to do. For example, if you add this just after your plot
for i in range(len(d)):
ax.text(i, df_rel.iloc[i,0]/2, d.iloc[i,0], ha='center', fontweight='bold', color='#ffff00', fontsize='small')
ax.text(i, 50+df_rel.iloc[i,0]/2, f.iloc[i,0], ha='center', fontweight='bold', color='#400040', fontsize='small')
you get this result
You can of course change color, size, position, etc. (I am well known for by total lack of bon goût for those matter). But also decide some arbitrary rule, such as not printing '0' (that the advantage of doing things explicitly: your code, your rule; you don't have to fight an existing API to convince it to do it your way).

Plot Bar Graph with different Parametes in X Axis

I have a DataFrame like below. It has Actual and Predicted columns. I want to compare Actual Vs Predicted in Bar plot in one on one. I have confidence value for Predicted column and default for Actual confidence is 1. So, I want to keep Each row in single bar group Actual and Predicted value will be X axis and corresponding Confidence score as y value.
I am unable to get the expected plot because X axis values are not aligned or grouped to same value in each row.
Actual Predicted Confidence
0 A A 0.90
1 B C 0.30
2 C C 0.60
3 D D 0.75
Expected Bar plot.
Any hint would be appreciable. Please let me know if further details required.
What I have tried so far.
df_actual = pd.DataFrame()
df_actual['Key']= df['Actual'].copy()
df_actual['Confidence'] = 1
df_actual['Identifier'] = 'Actual'
df_predicted=pd.DataFrame()
df_predicted = df[['Predicted', 'Confidence']]
df_predicted = df_predicted.rename(columns={'Predicted': 'Key'})
df_predicted['Identifier'] = 'Predicted'
df_combined = pd.concat([df_actual,df_predicted], ignore_index=True)
df_combined
fig = px.bar(df_combined, x="Key", y="Confidence", color='Identifier',
barmode='group', height=400)
fig.show()
I have found that adjusting the data first makes it easier to get the plot I want. I have used Seaborn, hope that is ok. Please see if this code works for you. I have considered that the df mentioned above is already available. I created df2 so that it aligns to what you had shown in the expected figure. Also, I used index as the X-axis column so that the order is maintained... Some adjustments to ensure xtick names align and the legend is outside as you wanted it.
Code
vals= []
conf = []
for x, y, z in zip(df.Actual, df.Predicted, df.Confidence):
vals += [x, y]
conf += [1, z]
df2 = pd.DataFrame({'Values': vals, 'Confidence':conf}).reset_index()
ax=sns.barplot(data = df2, x='index', y='Confidence', hue='Values',dodge=False)
ax.set_xticklabels(['Actual', 'Predicted']*4)
plt.legend(bbox_to_anchor=(1.0,1))
plt.show()
Plot
Update - grouping Actual and Predicted bars
Hi #Mohammed - As we have already used up hue, I don't think there is a way to do this easily with Seaborn. You would need to use matplotlib and adjust the bar position, xtick positions, etc. Below is the code that will do this. You can change SET1 to another color map to change colors. I have also added a black outline as the same colored bars were blending into one another. Further, I had to rotate the xlables, as they were on top of one another. You can change it as per your requirements. Hope this helps...
vals = df[['Actual','Predicted']].melt(value_name='texts')['texts']
conf = [1]*4 + list(df.Confidence)
ident = ['Actual', 'Predicted']*4
df2 = pd.DataFrame({'Values': vals, 'Confidence':conf, 'Identifier':ident}).reset_index()
uvals, uind = np.unique(df2["Values"], return_inverse=1)
cmap = plt.cm.get_cmap("Set1")
fig, ax=plt.subplots()
l = len(df2)
pos = np.arange(0,l) % (l//2) + (np.arange(0,l)//(l//2)-1)*0.4
ax.bar(pos, df2["Confidence"], width=0.4, align="edge", ec="k",color=cmap(uind) )
handles=[plt.Rectangle((0,0),1,1, color=cmap(i), ec="k") for i in range(len(uvals))]
ax.legend(handles=handles, labels=list(uvals), prop ={'size':10}, loc=9, ncol=8)
pos=pos+0.2
pos.sort()
ax.set_xticks(pos)
ax.set_xticklabels(df2["Identifier"][:l], rotation=45,ha='right', rotation_mode="anchor")
ax.set_ylim(0, 1.2)
plt.show()
Output plot
I updated #Redox answer to get the exact output.
df_ = pd.DataFrame({'Labels': df.reset_index()[['Actual', 'Predicted', 'index']].values.ravel(),
'Confidence': np.array(list(zip(np.repeat(1, len(df)), df['Confidence'].values, np.repeat(0, len(df))))).ravel()})
df_.loc[df_['Labels'].astype(str).str.isdigit(), 'Labels'] = ''
plt.figure(figsize=(15, 6))
ax=sns.barplot(data = df_, x=df_.index, y='Confidence', hue='Labels',dodge=False, ci=None)
ax.set_xticklabels(['Actual', 'Predicted', '']*len(df))
plt.setp(ax.get_xticklabels(), rotation=90)
ax.tick_params(labelsize=14)
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
Output:
Removed loop to improve performance
Added blank bar values to look alike group chart.

How to highlight columns in an Altair heatmap?

I would like to guide the reader's attention to just some columns (or rows and columns) in a heatmap, while still retaining the full context.
I can use alt.condition to alter color and opacity. Both work to some extent. But changes in opacity visualize in a similar way as changes in value. And using a different color changes the perception of values. What I would like to do instead is to put yellow or red borders around the consecutive columns I want to highlight.
This is what I have now. Any other ideas?
import altair as alt
alt.data_transformers.disable_max_rows()
def create_att_chart(df, keys_to_highlight=[], width=150, height=150, title=None, labels_x=True, labels_y=True):
properties = {}
if title:
properties['title'] = title
if width: properties['width'] = width
if height: properties['height'] = height
chart = alt.Chart(df).mark_rect().encode(
x=alt.X('k:N', sort=None, axis=alt.Axis(labels=labels_x, title=None, ticks=False), title=None),
y=alt.Y('q:N', sort=None, axis=alt.Axis(labels=labels_y, title=None, ticks=False), title=None),
opacity=alt.Opacity('a:Q', legend=None),
column=alt.Column('h:N', title=None, header=alt.Header(labels=False), spacing=0.),
row= alt.Row( 'l:N', title=None, header=alt.Header(labels=False), spacing=5.))
if keys_to_highlight:
chart = chart.encode(
color=alt.condition(
alt.Predicate(alt.FieldOneOfPredicate(field='k', oneOf=keys_to_highlight)),
alt.value('orange'),
alt.value('blue')))
else:
chart = chart.encode(color=alt.value('blue'))
return chart.properties(**properties)
[..]
((create_att_chart(df_pt, ['sage', '##maker'], title='Pre-Trained') | create_att_chart(df_ft, ['sage', '##maker'], title='Fine-Tuned', labels_y=False)).properties(padding=0))
You could try use the condition for the stroke encoding instead of color, but I think that would give you strokes around each box, which is probably not what you want. Instead you could use mark_rule or mark_rect with this example from the docs:
import altair as alt
import numpy as np
import pandas as pd
# Compute x^2 + y^2 across a 2D grid
x, y = np.meshgrid(range(-5, 5), range(-5, 5))
z = x ** 2 + y ** 2
# Convert this grid to columnar data expected by Altair
source = pd.DataFrame({'x': x.ravel(), 'y': y.ravel(), 'z': z.ravel()})
heatmap = alt.Chart(source).mark_rect().encode(
x='x:O',
y='y:O',
color=alt.Color('z:Q', scale=alt.Scale(scheme='blues')))
Now add the rules:
rule1 = alt.Chart(df).mark_rule(stroke='orange', strokeWidth=2).encode(x=alt.value(20))
rule2 = alt.Chart(df).mark_rule(stroke='orange', strokeWidth=2).encode(x=alt.value(60))
heatmap + rule1 + rule2
A top rule might be more appealing/elegant and you could add text above it with mark_text if needed:
rule1 = alt.Chart(df).mark_rule(stroke='orange', strokeWidth=3).encode(
y=alt.value(-5),
x=alt.value(20),
x2=alt.value(60))
heatmap + rule1
mark_rect works but add the lines in the middle of squares since the scale is ordinal and a quantitative mark_rect messes up the axis:
df = pd.DataFrame({'x': [0], 'x2': [3]})
box = alt.Chart(df).mark_rect(color='', stroke='orange', strokeWidth=2).encode(
x='x:O',
x2=alt.X2('x2:O', title='x'))
heatmap + box
If you try to add the lines in between, new ordinal axis marks will be created. You could abuse this and make the lines white to highlight by separation but the ticks on the axis are still there, so you would have to remove them with lablExpr or similar.
df = pd.DataFrame({'x': [0.5], 'x2': [3.5]})
box = alt.Chart(df).mark_rect(color='', stroke='white').encode(
x='x:O',
x2=alt.X2('x2:O', title='x'))
(heatmap + box).configure_view(stroke=None)

Adding quantitative values to differentiate data through colours in a scatterplot's legend in Python?

Currently, I'm working on an introductory paper on data manipulation and such; however... the CSV I'm working on has some things I wish to do a scatter graph on!
I want a scatter graph to show me the volume sold on certain items as well as their average price, differentiating all data according to their region (Through colours I assume).
So what I want is to know if I can add the region column as a quantitative value
or if there's a way to make this possible...
It's my first time using Python and I'm confused way too often
I'm not sure if this is what you mean, but here is some working code, assuming you have data in the format of [(country, volume, price), ...]. If not, you can change the inputs to the scatter method as needed.
import random
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
n_countries = 50
# get the data into "countries", for example
countries = ...
# in this example: countries is [('BS', 21, 25), ('WZ', 98, 25), ...]
df = pd.DataFrame(countries)
# arbitrary method to get a color
def get_color(i, max_i):
cmap = matplotlib.cm.get_cmap('Spectral')
return cmap(i/max_i)
# get the figure and axis - make a larger figure to fit more points
# add labels for metric names
def get_fig_ax():
fig = plt.figure(figsize=(14,14))
ax = fig.add_subplot(1, 1, 1)
ax.set_xlabel('volume')
ax.set_ylabel('price')
return fig, ax
# switch around the assignments depending on your data
def get_x_y_labels():
x = df[1]
y = df[2]
labels = df[0]
return x, y, labels
offset = 1 # offset just so annotations aren't on top of points
x, y, labels = get_x_y_labels()
fig, ax = get_fig_ax()
# add a point and annotation for each of the labels/regions
for i, region in enumerate(labels):
ax.annotate(region, (x[i] + offset, y[i] + offset))
# note that you must use "label" for "legend" to work
ax.scatter(x[i], y[i], color=get_color(i, len(x)), label=region)
# Add the legend just outside of the plot.
# The .1, 0 at the end will put it outside
ax.legend(loc='upper right', bbox_to_anchor=(1, 1, .1, 0))
plt.show()

How to label the bars of a stacked bar plot from a pandas DataFrame? [duplicate]

I am trying to replicate the following image in matplotlib and it seems barh is my only option. Though it appears that you can't stack barh graphs so I don't know what to do
If you know of a better python library to draw this kind of thing, please let me know.
This is all I could come up with as a start:
import matplotlib.pyplot as plt; plt.rcdefaults()
import numpy as np
import matplotlib.pyplot as plt
people = ('A','B','C','D','E','F','G','H')
y_pos = np.arange(len(people))
bottomdata = 3 + 10 * np.random.rand(len(people))
topdata = 3 + 10 * np.random.rand(len(people))
fig = plt.figure(figsize=(10,8))
ax = fig.add_subplot(111)
ax.barh(y_pos, bottomdata,color='r',align='center')
ax.barh(y_pos, topdata,color='g',align='center')
ax.set_yticks(y_pos)
ax.set_yticklabels(people)
ax.set_xlabel('Distance')
plt.show()
I would then have to add labels individually using ax.text which would be tedious. Ideally I would like to just specify the width of the part to be inserted then it updates the center of that section with a string of my choosing. The labels on the outside (e.g. 3800) I can add myself later, it is mainly the labeling over the bar section itself and creating this stacked method in a nice way I'm having problems with. Can you even specify a 'distance' i.e. span of color in any way?
Edit 2: for more heterogeneous data. (I've left the above method since I find it more usual to work with the same number of records per series)
Answering the two parts of the question:
a) barh returns a container of handles to all the patches that it drew. You can use the coordinates of the patches to aid the text positions.
b) Following these two answers to the question that I noted before (see Horizontal stacked bar chart in Matplotlib), you can stack bar graphs horizontally by setting the 'left' input.
and additionally c) handling data that is less uniform in shape.
Below is one way you could handle data that is less uniform in shape is simply to process each segment independently.
import numpy as np
import matplotlib.pyplot as plt
# some labels for each row
people = ('A','B','C','D','E','F','G','H')
r = len(people)
# how many data points overall (average of 3 per person)
n = r * 3
# which person does each segment belong to?
rows = np.random.randint(0, r, (n,))
# how wide is the segment?
widths = np.random.randint(3,12, n,)
# what label to put on the segment (xrange in py2.7, range for py3)
labels = range(n)
colors ='rgbwmc'
patch_handles = []
fig = plt.figure(figsize=(10,8))
ax = fig.add_subplot(111)
left = np.zeros(r,)
row_counts = np.zeros(r,)
for (r, w, l) in zip(rows, widths, labels):
print r, w, l
patch_handles.append(ax.barh(r, w, align='center', left=left[r],
color=colors[int(row_counts[r]) % len(colors)]))
left[r] += w
row_counts[r] += 1
# we know there is only one patch but could enumerate if expanded
patch = patch_handles[-1][0]
bl = patch.get_xy()
x = 0.5*patch.get_width() + bl[0]
y = 0.5*patch.get_height() + bl[1]
ax.text(x, y, "%d%%" % (l), ha='center',va='center')
y_pos = np.arange(8)
ax.set_yticks(y_pos)
ax.set_yticklabels(people)
ax.set_xlabel('Distance')
plt.show()
Which produces a graph like this , with a different number of segments present in each series.
Note that this is not particularly efficient since each segment used an individual call to ax.barh. There may be more efficient methods (e.g. by padding a matrix with zero-width segments or nan values) but this likely to be problem-specific and is a distinct question.
Edit: updated to answer both parts of the question.
import numpy as np
import matplotlib.pyplot as plt
people = ('A','B','C','D','E','F','G','H')
segments = 4
# generate some multi-dimensional data & arbitrary labels
data = 3 + 10* np.random.rand(segments, len(people))
percentages = (np.random.randint(5,20, (len(people), segments)))
y_pos = np.arange(len(people))
fig = plt.figure(figsize=(10,8))
ax = fig.add_subplot(111)
colors ='rgbwmc'
patch_handles = []
left = np.zeros(len(people)) # left alignment of data starts at zero
for i, d in enumerate(data):
patch_handles.append(ax.barh(y_pos, d,
color=colors[i%len(colors)], align='center',
left=left))
# accumulate the left-hand offsets
left += d
# go through all of the bar segments and annotate
for j in range(len(patch_handles)):
for i, patch in enumerate(patch_handles[j].get_children()):
bl = patch.get_xy()
x = 0.5*patch.get_width() + bl[0]
y = 0.5*patch.get_height() + bl[1]
ax.text(x,y, "%d%%" % (percentages[i,j]), ha='center')
ax.set_yticks(y_pos)
ax.set_yticklabels(people)
ax.set_xlabel('Distance')
plt.show()
You can achieve a result along these lines (note: the percentages I used have nothing to do with the bar widths, as the relationship in the example seems unclear):
See Horizontal stacked bar chart in Matplotlib for some ideas on stacking horizontal bar plots.
Imports and Test DataFrame
Tested in python 3.10, pandas 1.4.2, matplotlib 3.5.1, seaborn 0.11.2
For vertical stacked bars see Stacked Bar Chart with Centered Labels
import pandas as pd
import numpy as np
# create sample data as shown in the OP
np.random.seed(365)
people = ('A','B','C','D','E','F','G','H')
bottomdata = 3 + 10 * np.random.rand(len(people))
topdata = 3 + 10 * np.random.rand(len(people))
# create the dataframe
df = pd.DataFrame({'Female': bottomdata, 'Male': topdata}, index=people)
# display(df)
Female Male
A 12.41 7.42
B 9.42 4.10
C 9.85 7.38
D 8.89 10.53
E 8.44 5.92
F 6.68 11.86
G 10.67 12.97
H 6.05 7.87
Updated with matplotlib v3.4.2
Use matplotlib.pyplot.bar_label
See How to add value labels on a bar chart for additional details and examples with .bar_label.
labels = [f'{v.get_width():.2f}%' if v.get_width() > 0 else '' for v in c ] for python < 3.8, without the assignment expression (:=).
Plotted using pandas.DataFrame.plot with kind='barh'
ax = df.plot(kind='barh', stacked=True, figsize=(8, 6))
for c in ax.containers:
# customize the label to account for cases when there might not be a bar section
labels = [f'{w:.2f}%' if (w := v.get_width()) > 0 else '' for v in c ]
# set the bar label
ax.bar_label(c, labels=labels, label_type='center')
# uncomment and use the next line if there are no nan or 0 length sections; just use fmt to add a % (the previous two lines of code are not needed, in this case)
# ax.bar_label(c, fmt='%.2f%%', label_type='center')
# move the legend
ax.legend(bbox_to_anchor=(1.025, 1), loc='upper left', borderaxespad=0.)
# add labels
ax.set_ylabel("People", fontsize=18)
ax.set_xlabel("Percent", fontsize=18)
plt.show()
Using seaborn
sns.barplot does not have an option for stacked bar plots, however, sns.histplot and sns.displot can be used to create horizontal stacked bars.
seaborn typically requires the dataframe to be in a long, instead of wide, format, so use pandas.DataFrame.melt to reshape the dataframe.
Reshape dataframe
# convert the dataframe to a long form
df = df.reset_index()
df = df.rename(columns={'index': 'People'})
dfm = df.melt(id_vars='People', var_name='Gender', value_name='Percent')
# display(dfm)
People Gender Percent
0 A Female 12.414557
1 B Female 9.416027
2 C Female 9.846105
3 D Female 8.885621
4 E Female 8.438872
5 F Female 6.680709
6 G Female 10.666258
7 H Female 6.050124
8 A Male 7.420860
9 B Male 4.104433
10 C Male 7.383738
11 D Male 10.526158
12 E Male 5.916262
13 F Male 11.857227
14 G Male 12.966913
15 H Male 7.865684
sns.histplot: axes-level plot
fig, axe = plt.subplots(figsize=(8, 6))
sns.histplot(data=dfm, y='People', hue='Gender', discrete=True, weights='Percent', multiple='stack', ax=axe)
# iterate through each set of containers
for c in axe.containers:
# add bar annotations
axe.bar_label(c, fmt='%.2f%%', label_type='center')
axe.set_xlabel('Percent')
plt.show()
sns.displot: figure-level plot
g = sns.displot(data=dfm, y='People', hue='Gender', discrete=True, weights='Percent', multiple='stack', height=6)
# iterate through each facet / supbplot
for axe in g.axes.flat:
# iteate through each set of containers
for c in axe.containers:
# add the bar annotations
axe.bar_label(c, fmt='%.2f%%', label_type='center')
axe.set_xlabel('Percent')
plt.show()
Original Answer - before matplotlib v3.4.2
The easiest way to plot a horizontal or vertical stacked bar, is to load the data into a pandas.DataFrame
This will plot, and annotate correctly, even when all categories ('People'), don't have all segments (e.g. some value is 0 or NaN)
Once the data is in the dataframe:
It's easier to manipulate and analyze
It can be plotted with the matplotlib engine, using:
pandas.DataFrame.plot.barh
label_text = f'{width}' for annotations
pandas.DataFrame.plot.bar
label_text = f'{height}' for annotations
SO: Vertical Stacked Bar Chart with Centered Labels
These methods return a matplotlib.axes.Axes or a numpy.ndarray of them.
Using the .patches method unpacks a list of matplotlib.patches.Rectangle objects, one for each of the sections of the stacked bar.
Each .Rectangle has methods for extracting the various values that define the rectangle.
Each .Rectangle is in order from left the right, and bottom to top, so all the .Rectangle objects, for each level, appear in order, when iterating through .patches.
The labels are made using an f-string, label_text = f'{width:.2f}%', so any additional text can be added as needed.
Plot and Annotate
Plotting the bar, is 1 line, the remainder is annotating the rectangles
# plot the dataframe with 1 line
ax = df.plot.barh(stacked=True, figsize=(8, 6))
# .patches is everything inside of the chart
for rect in ax.patches:
# Find where everything is located
height = rect.get_height()
width = rect.get_width()
x = rect.get_x()
y = rect.get_y()
# The height of the bar is the data value and can be used as the label
label_text = f'{width:.2f}%' # f'{width:.2f}' to format decimal values
# ax.text(x, y, text)
label_x = x + width / 2
label_y = y + height / 2
# only plot labels greater than given width
if width > 0:
ax.text(label_x, label_y, label_text, ha='center', va='center', fontsize=8)
# move the legend
ax.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0.)
# add labels
ax.set_ylabel("People", fontsize=18)
ax.set_xlabel("Percent", fontsize=18)
plt.show()
Example with Missing Segment
# set one of the dataframe values to 0
df.iloc[4, 1] = 0
Note the annotations are all in the correct location from df.
For this case, the above answers work perfectly. The issue I had, and didn't find a plug-and-play solution online, was that I often have to plot stacked bars in multi-subplot figures, with many values, which tend to have very non-homogenous amplitudes.
(Note: I work usually with pandas dataframes, and matplotlib. I couldn't make the bar_label() method of matplotlib to work all the times.)
So, I just give a kind of ad-hoc, but easily generalizable solution. In this example, I was working with single-row dataframes (for power-exchange monitoring purposes per hour), so, my dataframe (df) had just one row.
(I provide an example figure to show how this can be useful in very densely-packed plots)
[enter image description here][1]
[1]: https://i.stack.imgur.com/9akd8.png
'''
This implementation produces a stacked, horizontal bar plot.
df --> pandas dataframe. Columns are used as the iterator, and only the firs value of each column is used.
waterfall--> bool: if True, apart from the stack-direction, also a perpendicular offset is added.
cyclic_offset_x --> list (of any length) or None: loop through these values to use as x-offset pixels.
cyclic_offset_y --> list (of any length) or None: loop through these values to use as y-offset pixels.
ax --> matplotlib Axes, or None: if None, creates a new axis and figure.
'''
def magic_stacked_bar(df, waterfall=False, cyclic_offset_x=None, cyclic_offset_y=None, ax=None):
if isinstance(cyclic_offset_x, type(None)):
cyclic_offset_x = [0, 0]
if isinstance(cyclic_offset_y, type(None)):
cyclic_offset_y = [0, 0]
ax0 = ax
if isinstance(ax, type(None)):
fig, ax = plt.subplots()
fig.set_size_inches(19, 10)
cycler = 0;
prev = 0 # summation variable to make it stacked
for c in df.columns:
if waterfall:
y = c ; label = "" # bidirectional stack
else:
y = 0; label = c # unidirectional stack
ax.barh(y=y, width=df[c].values[0], height=1, left=prev, label = label)
prev += df[c].values[0] # add to sum-stack
offset_x = cyclic_offset_x[divmod(cycler, len(cyclic_offset_x))[1]]
offset_y = cyclic_offset_y[divmod(cycler, len(cyclic_offset_y))[1]]
ax.annotate(text="{}".format(int(df[c].values[0])), xy=(prev - df[c].values / 2, y),
xytext=(offset_x, offset_y), textcoords='offset pixels',
ha='center', va='top', fontsize=8,
arrowprops=dict(facecolor='black', shrink=0.01, width=0.3, headwidth=0.3),
bbox=dict(boxstyle='round', facecolor='grey', alpha=0.5))
cycler += 1
if not waterfall:
ax.legend() # if waterfall, the index annotates the columns. If
# waterfall ==False, the legend annotates the columns
if isinstance(ax0, type(None)):
ax.set_title("Voi la")
ax.set_xlabel("UltraWatts")
plt.show()
else:
return ax
''' (Sometimes, it is more tedious and requires some custom functions to make the labels look alright.
'''
A, B = 80,80
n_units = df.shape[1]
cyclic_offset_x = -A*np.cos(2*np.pi / (2*n_units) *np.arange(n_units))
cyclic_offset_y = B*np.sin(2*np.pi / (2*n_units) * np.arange(n_units)) + B/2

Categories