Add value labels to stacked bar chart [duplicate] - python

I'm trying to "robustly" center the data labels in a stacked bar chart. A simple code example and the result are given below. As you can see, the data labels aren't really centered in all rectangles. What am I missing?
import numpy as np
import matplotlib.pyplot as plt
A = [45, 17, 47]
B = [91, 70, 72]
fig = plt.figure(facecolor="white")
ax = fig.add_subplot(1, 1, 1)
bar_width = 0.5
bar_l = np.arange(1, 4)
tick_pos = [i + (bar_width / 2) for i in bar_l]
ax1 = ax.bar(bar_l, A, width=bar_width, label="A", color="green")
ax2 = ax.bar(bar_l, B, bottom=A, width=bar_width, label="B", color="blue")
ax.set_ylabel("Count", fontsize=18)
ax.set_xlabel("Class", fontsize=18)
ax.legend(loc="best")
plt.xticks(tick_pos, ["C1", "C2", "C3"], fontsize=16)
plt.yticks(fontsize=16)
for r1, r2 in zip(ax1, ax2):
h1 = r1.get_height()
h2 = r2.get_height()
plt.text(r1.get_x() + r1.get_width() / 2., h1 / 2., "%d" % h1, ha="center", va="bottom", color="white", fontsize=16, fontweight="bold")
plt.text(r2.get_x() + r2.get_width() / 2., h1 + h2 / 2., "%d" % h2, ha="center", va="bottom", color="white", fontsize=16, fontweight="bold")
plt.show()

The following method is more succinct, and easily scales.
Putting the data into a pandas.DataFrame is the easiest way to plot a stacked bar plot.
Using pandas.DataFrame.plot.bar(stacked=True), or pandas.DataFrame.plot(kind='bar', stacked=True), is the easiest way to plot a stacked bar plot.
This method returns a matplotlib.axes.Axes or a numpy.ndarray of them.
Since seaborn is just a high-level API for matplotlib, these solutions also work with seaborn plots, as shown in How to annotate a seaborn barplot with the aggregated value.
For horizontal stacked bars, see Horizontal stacked bar plot and add labels to each section
Tested in python 3.10, pandas 1.4.2, matplotlib 3.5.1, seaborn 0.11.2
Imports & Test DataFrame
import pandas as pd
import matplotlib.pyplot as plt
A = [45, 17, 47]
B = [91, 70, 72]
C = [68, 43, 13]
# pandas dataframe
df = pd.DataFrame(data={'A': A, 'B': B, 'C': C})
df.index = ['C1', 'C2', 'C3']
A B C
C1 45 91 68
C2 17 70 43
C3 47 72 13
Updated for matplotlib v3.4.2
Use matplotlib.pyplot.bar_label, which will automatically center the values in the bar.
See How to add value labels on a bar chart for additional details and examples with .bar_label.
Tested with pandas v1.2.4, which is using matplotlib as the plot engine.
If some sections of the bar plot will be zero, see my answer, which shows how to customize the labels for .bar_label().
ax.bar_label(c, fmt='%0.0f', label_type='center') will change the number format to show no decimal places, if needed.
ax = df.plot(kind='bar', stacked=True, figsize=(8, 6), rot=0, xlabel='Class', ylabel='Count')
for c in ax.containers:
# Optional: if the segment is small or 0, customize the labels
labels = [v.get_height() if v.get_height() > 0 else '' for v in c]
# remove the labels parameter if it's not needed for customized labels
ax.bar_label(c, labels=labels, label_type='center')
Seaborn Options
seaborn is a high-level api for matplotlib
The seaborn.barplot api doesn't have an option for stacking, but it "can" be implemented with sns.histplot, or sns.displot.
Seaborn DataFrame Format
# create the data frame
df = pd.DataFrame(data={'A': A, 'B': B, 'C': C, 'cat': ['C1', 'C2', 'C3']})
A B C cat
0 45 91 68 C1
1 17 70 43 C2
2 47 72 13 C3
# convert the dataframe to a long form
df = df.melt(id_vars='cat')
cat variable value
0 C1 A 45
1 C2 A 17
2 C3 A 47
3 C1 B 91
4 C2 B 70
5 C3 B 72
6 C1 C 68
7 C2 C 43
8 C3 C 13
axes-level plot
# plot
ax = sns.histplot(data=df, x='cat', hue='variable', weights='value', discrete=True, multiple='stack')
# iterate through each container
for c in ax.containers:
# Optional: if the segment is small or 0, customize the labels
labels = [v.get_height() if v.get_height() > 0 else '' for v in c]
# remove the labels parameter if it's not needed for customized labels
ax.bar_label(c, labels=labels, label_type='center')
figure-level plot
# plot
g = sns.displot(data=df, x='cat', hue='variable', weights='value', discrete=True, multiple='stack')
# iterate through each axes
for ax in g.axes.flat:
# iterate through each container
for c in ax.containers:
# Optional: if the segment is small or 0, customize the labels
labels = [v.get_height() if v.get_height() > 0 else '' for v in c]
# remove the labels parameter if it's not needed for customized labels
ax.bar_label(c, labels=labels, label_type='center')
Original Answer
Using the .patches method unpacks a list of matplotlib.patches.Rectangle objects, one for each of the sections of the stacked bar.
Each .Rectangle has methods for extracting the various values that define the rectangle.
Each .Rectangle is in order from left to right, and bottom to top, so all the .Rectangle objects, for each level, appear in order, when iterating through .patches.
The labels are made using an f-string, label_text = f'{height}', so any additional text can be added as needed, such as label_text = f'{height}%'
label_text = f'{height:0.0f}' will display numbers with no decimal places.
Plot
plt.style.use('ggplot')
ax = df.plot(stacked=True, kind='bar', figsize=(12, 8), rot='horizontal')
# .patches is everything inside of the chart
for rect in ax.patches:
# Find where everything is located
height = rect.get_height()
width = rect.get_width()
x = rect.get_x()
y = rect.get_y()
# The height of the bar is the data value and can be used as the label
label_text = f'{height}' # f'{height:.2f}' to format decimal values
# ax.text(x, y, text)
label_x = x + width / 2
label_y = y + height / 2
# plot only when height is greater than specified value
if height > 0:
ax.text(label_x, label_y, label_text, ha='center', va='center', fontsize=8)
ax.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0.)
ax.set_ylabel("Count", fontsize=18)
ax.set_xlabel("Class", fontsize=18)
plt.show()
To plot a horizontal bar:
kind='barh'
label_text = f'{width}'
if width > 0:
Attribution: jsoma/chart.py

Why you wrote va="bottom"? You have to use va="center".

Related

How to plot a bar chart with multiple x-axis data?

I'd like to plot a bar chart in Python, similar to Excel. However, I am struggling to have two different x-axes. For example, for each size (like 8M), I want to plot the results of all 5 strategies. For each strategy, there are 3 metrics (Fit, boot, and exp).
You can download the original excel file here here.
This is my code so far:
df = pd.read_excel("data.xlsx",sheet_name="Sheet1")
r1= df['Fit']
r2= df['Boot']
r3= df['Exp']
x= df['strategy']
n_groups = 5
# create plot
fig, ax = plt.subplots()
index = np.arange(n_groups)
names = ["8M","16M","32M","64M","128M"]
bar_width = 0.1
opacity = 0.8
Fit8= [r1[0],r1[1],r1[2],r1[3],r1[4]]
Boot8= [r2[0],r2[1],r2[2],r2[3],r2[4]]
Exp8= [r3[0],r3[1],r3[2],r3[3],r3[4]]
Fit16= [r1[5],r1[6],r1[7],r1[8],r1[9]]
Boot16= [r2[5],r2[6],r2[7],r2[8],r2[9]]
Exp16= [r3[5],r3[6],r3[7],r3[8],r3[9]]
rects1 = plt.bar(
index, Fit8, bar_width,
alpha=opacity,
color='g',
label='Fit'
)
rects2 = plt.bar(
index + 0.1, Boot8, bar_width,
alpha=opacity,
color='b',
label='Boot'
)
rects3 = plt.bar(
index + 0.2, Exp8, bar_width,
alpha=opacity,
color='y',
label='EXP'
)
rects4 = plt.bar(
index + 0.5, Fit16, bar_width,
alpha=opacity,
color='g'
)
rects5 = plt.bar(
index + 0.6, Boot16, bar_width,
alpha=opacity,
color='b'
)
rects6 = plt.bar(
index + 0.7, Exp16, bar_width,
alpha=opacity,
color='y'
)
plt.xticks(index + 0.2, (names))
plt.legend()
plt.tight_layout()
plt.show()
Something like this?
Here the code:
import pandas as pd
import pylab as plt
# read dataframe, take advantage of Multiindex
df = pd.read_excel(
"data.xlsx",
sheet_name="Sheet1", engine='openpyxl',
index_col=[0, 1],
)
# plot the content of the dataframe
ax = df.plot.bar()
# Show minor ticks
ax.minorticks_on()
# Get location of the center of each bar
bar_locations = list(map(lambda x: x.get_x() + x.get_width() / 2., ax.patches))
# Set minor and major tick positions
# Minor are used for S1, ..., S5
# Major for sizes 8M, ..., 128M
# tick locations are sorted according to the 3 metrics, so first all the 25 bars for the fit, then the 25
# for the boot and at the end the 25 for the exp. We set the major tick at the position of the bar at the center
# of the size group, that is the third boot bar of each size.
ax.set_xticks(bar_locations[27:50:5], minor=False) # use the 7th bar of each size group
ax.set_xticks(bar_locations[len(df):2 * len(df)], minor=True) # use the bar in the middle of each group of 3 bars
# Labels for groups of 3 bars and for each group of size
ax.set_xticklabels(df.index.get_level_values(0)[::5], minor=False, rotation=0)
ax.set_xticklabels(df.index.get_level_values(1), minor=True, rotation=0)
# Set tick parameters
ax.tick_params(axis='x', which='major', pad=15, bottom='off')
ax.tick_params(axis='x', which='both', top='off')
# You can use a different color for each group
# You can comment out these lines if you don't like it
size_colors = 'rgbym'
# major ticks
for l, c in zip(ax.get_xticklabels(minor=False), size_colors):
l.set_color(c)
l.set_fontweight('bold')
# minor ticks
for i, l in enumerate(ax.get_xticklabels(minor=True)):
l.set_color(size_colors[i // len(size_colors)])
# remove x axis label
ax.set_xlabel('')
plt.tight_layout()
plt.show()
The main idea here is to use the Multiindex of Pandas, with some minor tweaks.
EDIT
If you want spaces between groups, you can add a dummy category (a.k.a strategy) in the dataframe to create an artificial space, obtaining:
Here the code:
import numpy as np
import pandas as pd
import pylab as plt
# read dataframe, take advantage of Multiindex
df = pd.read_excel(
"data.xlsx",
sheet_name="Sheet1", engine='openpyxl',
index_col=[0, 1],
)
# plot the content of the dataframe
sizes = list(df.index.get_level_values(0).drop_duplicates())
strategies = list(df.index.get_level_values(1).drop_duplicates())
n_sizes = len(sizes)
n_strategies = len(strategies)
n_metrics = len(df.columns)
empty_rows = pd.DataFrame(
data=[[np.nan] * n_metrics] * n_sizes, index=pd.MultiIndex.from_tuples([(s, 'SN') for s in sizes], names=df.index.names),
columns=df.columns,
)
old_columns = list(df.columns)
df = df.merge(empty_rows, how='outer', left_index=True, right_index=True, sort=False).drop(
columns=[f'{c}_y' for c in df.columns]
).sort_index(
ascending=True, level=0, key=lambda x: sorted(x, key=lambda y: int(y[:-1]))
)
df.columns = old_columns
# Update number of strategies
n_strategies += 1
# Plot with Pandas
ax = df.plot.bar()
# Show minor ticks
ax.minorticks_on()
# Get location of the center of each bar
bar_locations = list(map(lambda x: x.get_x() + x.get_width() / 2., ax.patches))
# Set minor and major tick positions
# Major for sizes 8M, ..., 128M
# Minor are used for S1, ..., S5, SN
# Tick locations are sorted according to the 3 metrics, so first 30 (5 sizes * 6 strategies) bars for the fit,
# then 30 (5 sizes * 6 strategies) for the boot and at the end 30 (5 sizes * 6 strategies) for the exp.
# We set the major tick at the position of the bar at the center of the size group (+7),
# that is the third boot bar of each size.
n_bars_per_metric = n_sizes * n_strategies
strategy_ticks = bar_locations[len(df):2 * len(df)]
strategy_ticks = np.concatenate([strategy_ticks[b * n_strategies:b * n_strategies + n_strategies - 1] for b in range(n_sizes)]) # get only positions of the first 5 bars
size_ticks = strategy_ticks[2::n_sizes] + 0.01
ax.set_xticks(size_ticks, minor=False) # use the 7th bar of each size group
ax.set_xticks(strategy_ticks, minor=True) # use the bar in the middle of each group of 3 bars
# Labels for groups of 3 bars and for each group of size
ax.set_xticklabels(sizes, minor=False, rotation=0)
ax.set_xticklabels(strategies * n_sizes, minor=True, rotation=0)
# Set tick parameters
ax.tick_params(axis='x', which='major', pad=15, bottom=False)
ax.tick_params(axis='x', which='both', top=False)
# You can use a different color for each group
# You can comment out these lines if you don't like it
size_colors = 'rgbym'
# major ticks
for l, c in zip(ax.get_xticklabels(minor=False), size_colors):
l.set_color(c)
l.set_fontweight('bold')
# minor ticks
for i, l in enumerate(ax.get_xticklabels(minor=True)):
l.set_color(size_colors[i // len(size_colors)])
# remove x axis label
ax.set_xlabel('')
plt.tight_layout()
plt.show()
As you can see, you have to play with the DataFrame, adding some extra code. Maybe there is a simpler solution, but it was the first that I can think of.

Python maxplotlib - boxsplot subplot + scatter plot

I am trying to perform a scatter plot within a boxplot as subplot. When I do for just one boxsplot, it works. I can define a specific point with specific color inside of the boxsplot. The green ball (Image 1) is representing an specific number in comparision with boxplot values.
for columnName in data_num.columns:
plt.figure(figsize=(2, 2), dpi=100)
bp = data_num.boxplot(column=columnName, grid=False)
y = S[columnName]
x = columnName
if y > data_num[columnName].describe().iloc[5]:
plt.plot(1, y, 'r.', alpha=0.7,color='green',markersize=12)
count_G = count_G + 1
elif y < data_num[columnName].describe().iloc[5]:
plt.plot(1, y, 'r.', alpha=0.7,color='red',markersize=12)
count_L = count_L + 1
else:
plt.plot(1, y, 'r.', alpha=0.7,color='yellow',markersize=12)
count_E = count_E + 1
Image 1 - Scatter + 1 boxplot
I can create a subplot with boxplots.
fig, axes = plt.subplots(6,10,figsize=(16,16)) # create figure and axes
fig.subplots_adjust(hspace=0.6, wspace=1)
for j,columnName in enumerate(list(data_num.columns.values)[:-1]):
bp = data_num.boxplot(columnName,ax=axes.flatten()[j])
Image 2 - Subplots + Boxplots
But when I try to plot a specific number inside of each boxplot, actually it subscribes the entire plot.
plt.subplot(6,10,j+1)
if y > data_num[columnName].describe().iloc[5]:
plt.plot(1, y, 'r.', alpha=0.7,color='green',markersize=12)
count_G = count_G + 1
elif y < data_num[columnName].describe().iloc[5]:
plt.plot(1, y, 'r.', alpha=0.7,color='red',markersize=12)
count_L = count_L + 1
else:
plt.plot(1, y, 'r.', alpha=0.7,color='black',markersize=12)
count_E = count_E + 1
Image 3 - Subplots + scatter
It is not completely clear what is going wrong. Probably the call to plt.subplot(6,10,j+1) is erasing some stuff. However, such a call is not necessary with the standard modern use of matplotlib, where the subplots are created via fig, axes = plt.subplots(). Be careful to use ax.plot() instead of plt.plot(). plt.plot() plots on the "current" ax, which can be a bit confusing when there are lots of subplots.
The sample code below first creates some toy data (hopefully similar to the data in the question). Then the boxplots and the individual dots are drawn in a loop. To avoid repetition, the counts and the colors are stored in dictionaries. As data_num[columnName].describe().iloc[5] seems to be the median, for readability the code directly calculates that median.
from matplotlib import pyplot as plt
import pandas as pd
import numpy as np
column_names = list('abcdef')
S = {c: np.random.randint(2, 6) for c in column_names}
data_num = pd.DataFrame({c: np.random.randint(np.random.randint(0, 3), np.random.randint(4, 8), 20)
for c in column_names})
colors = {'G': 'limegreen', 'E': 'gold', 'L': 'crimson'}
counts = {c: 0 for c in colors}
fig, axes = plt.subplots(1, 6, figsize=(12, 3), gridspec_kw={'hspace': 0.6, 'wspace': 1})
for columnName, ax in zip(data_num.columns, axes.flatten()):
data_num.boxplot(column=columnName, grid=False, ax=ax)
y = S[columnName] # in case S would be a dataframe with one row: y = S[columnName].values[0]
data_median = data_num[columnName].median()
classification = 'G' if y > data_median else 'L' if y < data_median else 'E'
ax.plot(1, y, '.', alpha=0.9, color=colors[classification], markersize=12)
counts[classification] += 1
print(counts)
plt.show()

Plotting a Bar Chart on matplotlib

How can I plot a horizontal bar chart with the values at the end of the bar, Something similar to this
I tried this
plt.barh(inc.index,inc)
plt.yticks(inc.index)
plt.xticks(inc);
plt.xlabel("Order Count")
plt.ylabel("Date")
Bar chart
The answer can be found here:
How to display the value of the bar on each bar with pyplot.barh()?
Just add the for loop as cphlewis said:
for i, v in enumerate(inc):
ax.text(v + 3, i + .25, str(v), color='blue', fontweight='bold')
plt.show()
Here is the code that I tried for your situation:
import matplotlib.pyplot as plt
import numpy as np
inc = [12, 25, 50, 65, 40, 45]
index = ["2019-10-31", "2019-10-30", "2019-10-29", "2019-10-28", "2019-10-27", "2019-10-26"]
fig, ax = plt.subplots()
ax.barh(index,inc, color='black')
plt.yticks(index)
plt.xticks(inc);
plt.xlabel("Order Count")
plt.ylabel("Date")
# Set xticks
plt.xticks(np.arange(0, max(inc)+15, step=10))
# Loop for showing inc numbers in the end of bar
for i, v in enumerate(inc):
ax.text(v + 1, i, str(v), color='black', fontweight='bold')
plt.show()
Plot looks like this:
To generate a plot with values superimposed, run:
ax = inc.plot.barh(xticks=inc, xlim=(0, 40));
ax.set_xlabel('Order Count')
ax.set_ylabel('Date')
for p in ax.patches:
w = p.get_width()
ax.annotate(f' {w}', (w + 0.1, p.get_y() + 0.1))
Note that I set xlim with upper limit slightly above the
maximum Order Count, to provide the space for annotations.
For a subset of your data I got:
And one more impovement:
As I see, your data is a Series with a DatetimeIndex.
So if you want to have y label values as dates only (without
00:00:00 for hours), convert the index to string:
inc.index = inc.index.strftime('%Y-%m-%d')
like I did, generating my plot.

specify spaces between bars in barplot in matplotlib [duplicate]

How to plot multiple bars in matplotlib, when I tried to call the bar function multiple times, they overlap and as seen the below figure the highest value red can be seen only.
How can I plot the multiple bars with dates on the x-axes?
So far, I tried this:
import matplotlib.pyplot as plt
import datetime
x = [
datetime.datetime(2011, 1, 4, 0, 0),
datetime.datetime(2011, 1, 5, 0, 0),
datetime.datetime(2011, 1, 6, 0, 0)
]
y = [4, 9, 2]
z = [1, 2, 3]
k = [11, 12, 13]
ax = plt.subplot(111)
ax.bar(x, y, width=0.5, color='b', align='center')
ax.bar(x, z, width=0.5, color='g', align='center')
ax.bar(x, k, width=0.5, color='r', align='center')
ax.xaxis_date()
plt.show()
I got this:
The results should be something like, but with the dates are on the x-axes and bars are next to each other:
import matplotlib.pyplot as plt
from matplotlib.dates import date2num
import datetime
x = [
datetime.datetime(2011, 1, 4, 0, 0),
datetime.datetime(2011, 1, 5, 0, 0),
datetime.datetime(2011, 1, 6, 0, 0)
]
x = date2num(x)
y = [4, 9, 2]
z = [1, 2, 3]
k = [11, 12, 13]
ax = plt.subplot(111)
ax.bar(x-0.2, y, width=0.2, color='b', align='center')
ax.bar(x, z, width=0.2, color='g', align='center')
ax.bar(x+0.2, k, width=0.2, color='r', align='center')
ax.xaxis_date()
plt.show()
I don't know what's the "y values are also overlapping" means, does the following code solve your problem?
ax = plt.subplot(111)
w = 0.3
ax.bar(x-w, y, width=w, color='b', align='center')
ax.bar(x, z, width=w, color='g', align='center')
ax.bar(x+w, k, width=w, color='r', align='center')
ax.xaxis_date()
ax.autoscale(tight=True)
plt.show()
The trouble with using dates as x-values, is that if you want a bar chart like in your second picture, they are going to be wrong. You should either use a stacked bar chart (colours on top of each other) or group by date (a "fake" date on the x-axis, basically just grouping the data points).
import numpy as np
import matplotlib.pyplot as plt
N = 3
ind = np.arange(N) # the x locations for the groups
width = 0.27 # the width of the bars
fig = plt.figure()
ax = fig.add_subplot(111)
yvals = [4, 9, 2]
rects1 = ax.bar(ind, yvals, width, color='r')
zvals = [1,2,3]
rects2 = ax.bar(ind+width, zvals, width, color='g')
kvals = [11,12,13]
rects3 = ax.bar(ind+width*2, kvals, width, color='b')
ax.set_ylabel('Scores')
ax.set_xticks(ind+width)
ax.set_xticklabels( ('2011-Jan-4', '2011-Jan-5', '2011-Jan-6') )
ax.legend( (rects1[0], rects2[0], rects3[0]), ('y', 'z', 'k') )
def autolabel(rects):
for rect in rects:
h = rect.get_height()
ax.text(rect.get_x()+rect.get_width()/2., 1.05*h, '%d'%int(h),
ha='center', va='bottom')
autolabel(rects1)
autolabel(rects2)
autolabel(rects3)
plt.show()
after looking for a similar solution and not finding anything flexible enough, I decided to write my own function for it. It allows you to have as many bars per group as you wish and specify both the width of a group as well as the individual widths of the bars within the groups.
Enjoy:
from matplotlib import pyplot as plt
def bar_plot(ax, data, colors=None, total_width=0.8, single_width=1, legend=True):
"""Draws a bar plot with multiple bars per data point.
Parameters
----------
ax : matplotlib.pyplot.axis
The axis we want to draw our plot on.
data: dictionary
A dictionary containing the data we want to plot. Keys are the names of the
data, the items is a list of the values.
Example:
data = {
"x":[1,2,3],
"y":[1,2,3],
"z":[1,2,3],
}
colors : array-like, optional
A list of colors which are used for the bars. If None, the colors
will be the standard matplotlib color cyle. (default: None)
total_width : float, optional, default: 0.8
The width of a bar group. 0.8 means that 80% of the x-axis is covered
by bars and 20% will be spaces between the bars.
single_width: float, optional, default: 1
The relative width of a single bar within a group. 1 means the bars
will touch eachother within a group, values less than 1 will make
these bars thinner.
legend: bool, optional, default: True
If this is set to true, a legend will be added to the axis.
"""
# Check if colors where provided, otherwhise use the default color cycle
if colors is None:
colors = plt.rcParams['axes.prop_cycle'].by_key()['color']
# Number of bars per group
n_bars = len(data)
# The width of a single bar
bar_width = total_width / n_bars
# List containing handles for the drawn bars, used for the legend
bars = []
# Iterate over all data
for i, (name, values) in enumerate(data.items()):
# The offset in x direction of that bar
x_offset = (i - n_bars / 2) * bar_width + bar_width / 2
# Draw a bar for every value of that type
for x, y in enumerate(values):
bar = ax.bar(x + x_offset, y, width=bar_width * single_width, color=colors[i % len(colors)])
# Add a handle to the last drawn bar, which we'll need for the legend
bars.append(bar[0])
# Draw legend if we need
if legend:
ax.legend(bars, data.keys())
if __name__ == "__main__":
# Usage example:
data = {
"a": [1, 2, 3, 2, 1],
"b": [2, 3, 4, 3, 1],
"c": [3, 2, 1, 4, 2],
"d": [5, 9, 2, 1, 8],
"e": [1, 3, 2, 2, 3],
"f": [4, 3, 1, 1, 4],
}
fig, ax = plt.subplots()
bar_plot(ax, data, total_width=.8, single_width=.9)
plt.show()
Output:
I know that this is about matplotlib, but using pandas and seaborn can save you a lot of time:
df = pd.DataFrame(zip(x*3, ["y"]*3+["z"]*3+["k"]*3, y+z+k), columns=["time", "kind", "data"])
plt.figure(figsize=(10, 6))
sns.barplot(x="time", hue="kind", y="data", data=df)
plt.show()
Given the existing answers, the easiest solution, given the data in the OP, is load the data into a dataframe and plot with pandas.DataFrame.plot.
Load the value lists into pandas with a dict, and specify x as the index. The index will automatically be set as the x-axis, and the columns will be plotted as the bars.
pandas.DataFrame.plot uses matplotlib as the default backend.
See How to add value labels on a bar chart for thorough details about using .bar_label.
Tested in python 3.8.11, pandas 1.3.2, matplotlib 3.4.3
import pandas as pd
# using the existing lists from the OP, create the dataframe
df = pd.DataFrame(data={'y': y, 'z': z, 'k': k}, index=x)
# since there's no time component and x was a datetime dtype, set the index to be just the date
df.index = df.index.date
# display(df)
y z k
2011-01-04 4 1 11
2011-01-05 9 2 12
2011-01-06 2 3 13
# plot bars or kind='barh' for horizontal bars; adjust figsize accordingly
ax = df.plot(kind='bar', rot=0, xlabel='Date', ylabel='Value', title='My Plot', figsize=(6, 4))
# add some labels
for c in ax.containers:
# set the bar label
ax.bar_label(c, fmt='%.0f', label_type='edge')
# add a little space at the top of the plot for the annotation
ax.margins(y=0.1)
# move the legend out of the plot
ax.legend(title='Columns', bbox_to_anchor=(1, 1.02), loc='upper left')
Horizontal bars for when there are more columns
ax = df.plot(kind='barh', ylabel='Date', title='My Plot', figsize=(5, 4))
ax.set(xlabel='Value')
for c in ax.containers:
# set the bar label
ax.bar_label(c, fmt='%.0f', label_type='edge')
ax.margins(x=0.1)
# move the legend out of the plot
ax.legend(title='Columns', bbox_to_anchor=(1, 1.02), loc='upper left')
I modified pascscha's solution extending the interface, hopefully this helps someone else! Key features:
Variable number of entries per bar group
Customizable colors
Handling of x ticks
Fully customizable bar labels on top of bars
def bar_plot(ax, data, group_stretch=0.8, bar_stretch=0.95,
legend=True, x_labels=True, label_fontsize=8,
colors=None, barlabel_offset=1,
bar_labeler=lambda k, i, s: str(round(s, 3))):
"""
Draws a bar plot with multiple bars per data point.
:param dict data: The data we want to plot, wher keys are the names of each
bar group, and items is a list of bar values for the corresponding group.
:param float group_stretch: 1 means groups occupy the most (largest groups
touch side to side if they have equal number of bars).
:param float bar_stretch: If 1, bars within a group will touch side to side.
:param bool x_labels: If true, x-axis will contain labels with the group
names given at data, centered at the bar group.
:param int label_fontsize: Font size for the label on top of each bar.
:param float barlabel_offset: Distance, in y-values, between the top of the
bar and its label.
:param function bar_labeler: If not None, must be a functor with signature
``f(group_name, i, scalar)->str``, where each scalar is the entry found at
data[group_name][i]. When given, returns a label to put on the top of each
bar. Otherwise no labels on top of bars.
"""
sorted_data = list(sorted(data.items(), key=lambda elt: elt[0]))
sorted_k, sorted_v = zip(*sorted_data)
max_n_bars = max(len(v) for v in data.values())
group_centers = np.cumsum([max_n_bars
for _ in sorted_data]) - (max_n_bars / 2)
bar_offset = (1 - bar_stretch) / 2
bars = defaultdict(list)
#
if colors is None:
colors = {g_name: [f"C{i}" for _ in values]
for i, (g_name, values) in enumerate(data.items())}
#
for g_i, ((g_name, vals), g_center) in enumerate(zip(sorted_data,
group_centers)):
n_bars = len(vals)
group_beg = g_center - (n_bars / 2) + (bar_stretch / 2)
for val_i, val in enumerate(vals):
bar = ax.bar(group_beg + val_i + bar_offset,
height=val, width=bar_stretch,
color=colors[g_name][val_i])[0]
bars[g_name].append(bar)
if bar_labeler is not None:
x_pos = bar.get_x() + (bar.get_width() / 2.0)
y_pos = val + barlabel_offset
barlbl = bar_labeler(g_name, val_i, val)
ax.text(x_pos, y_pos, barlbl, ha="center", va="bottom",
fontsize=label_fontsize)
if legend:
ax.legend([bars[k][0] for k in sorted_k], sorted_k)
#
ax.set_xticks(group_centers)
if x_labels:
ax.set_xticklabels(sorted_k)
else:
ax.set_xticklabels()
return bars, group_centers
Sample run:
fig, ax = plt.subplots()
data = {"Foo": [1, 2, 3, 4], "Zap": [0.1, 0.2], "Quack": [6], "Bar": [1.1, 2.2, 3.3, 4.4, 5.5]}
bar_plot(ax, data, group_stretch=0.8, bar_stretch=0.95, legend=True,
labels=True, label_fontsize=8, barlabel_offset=0.05,
bar_labeler=lambda k, i, s: str(round(s, 3)))
fig.show()
I did this solution: if you want plot more than one plot in one figure, make sure before plotting next plots you have set right matplotlib.pyplot.hold(True)
to able adding another plots.
Concerning the datetime values on the X axis, a solution using the alignment of bars works for me. When you create another bar plot with matplotlib.pyplot.bar(), just use align='edge|center' and set width='+|-distance'.
When you set all bars (plots) right, you will see the bars fine.

Python matplotlib multiple bars

How to plot multiple bars in matplotlib, when I tried to call the bar function multiple times, they overlap and as seen the below figure the highest value red can be seen only.
How can I plot the multiple bars with dates on the x-axes?
So far, I tried this:
import matplotlib.pyplot as plt
import datetime
x = [
datetime.datetime(2011, 1, 4, 0, 0),
datetime.datetime(2011, 1, 5, 0, 0),
datetime.datetime(2011, 1, 6, 0, 0)
]
y = [4, 9, 2]
z = [1, 2, 3]
k = [11, 12, 13]
ax = plt.subplot(111)
ax.bar(x, y, width=0.5, color='b', align='center')
ax.bar(x, z, width=0.5, color='g', align='center')
ax.bar(x, k, width=0.5, color='r', align='center')
ax.xaxis_date()
plt.show()
I got this:
The results should be something like, but with the dates are on the x-axes and bars are next to each other:
import matplotlib.pyplot as plt
from matplotlib.dates import date2num
import datetime
x = [
datetime.datetime(2011, 1, 4, 0, 0),
datetime.datetime(2011, 1, 5, 0, 0),
datetime.datetime(2011, 1, 6, 0, 0)
]
x = date2num(x)
y = [4, 9, 2]
z = [1, 2, 3]
k = [11, 12, 13]
ax = plt.subplot(111)
ax.bar(x-0.2, y, width=0.2, color='b', align='center')
ax.bar(x, z, width=0.2, color='g', align='center')
ax.bar(x+0.2, k, width=0.2, color='r', align='center')
ax.xaxis_date()
plt.show()
I don't know what's the "y values are also overlapping" means, does the following code solve your problem?
ax = plt.subplot(111)
w = 0.3
ax.bar(x-w, y, width=w, color='b', align='center')
ax.bar(x, z, width=w, color='g', align='center')
ax.bar(x+w, k, width=w, color='r', align='center')
ax.xaxis_date()
ax.autoscale(tight=True)
plt.show()
The trouble with using dates as x-values, is that if you want a bar chart like in your second picture, they are going to be wrong. You should either use a stacked bar chart (colours on top of each other) or group by date (a "fake" date on the x-axis, basically just grouping the data points).
import numpy as np
import matplotlib.pyplot as plt
N = 3
ind = np.arange(N) # the x locations for the groups
width = 0.27 # the width of the bars
fig = plt.figure()
ax = fig.add_subplot(111)
yvals = [4, 9, 2]
rects1 = ax.bar(ind, yvals, width, color='r')
zvals = [1,2,3]
rects2 = ax.bar(ind+width, zvals, width, color='g')
kvals = [11,12,13]
rects3 = ax.bar(ind+width*2, kvals, width, color='b')
ax.set_ylabel('Scores')
ax.set_xticks(ind+width)
ax.set_xticklabels( ('2011-Jan-4', '2011-Jan-5', '2011-Jan-6') )
ax.legend( (rects1[0], rects2[0], rects3[0]), ('y', 'z', 'k') )
def autolabel(rects):
for rect in rects:
h = rect.get_height()
ax.text(rect.get_x()+rect.get_width()/2., 1.05*h, '%d'%int(h),
ha='center', va='bottom')
autolabel(rects1)
autolabel(rects2)
autolabel(rects3)
plt.show()
after looking for a similar solution and not finding anything flexible enough, I decided to write my own function for it. It allows you to have as many bars per group as you wish and specify both the width of a group as well as the individual widths of the bars within the groups.
Enjoy:
from matplotlib import pyplot as plt
def bar_plot(ax, data, colors=None, total_width=0.8, single_width=1, legend=True):
"""Draws a bar plot with multiple bars per data point.
Parameters
----------
ax : matplotlib.pyplot.axis
The axis we want to draw our plot on.
data: dictionary
A dictionary containing the data we want to plot. Keys are the names of the
data, the items is a list of the values.
Example:
data = {
"x":[1,2,3],
"y":[1,2,3],
"z":[1,2,3],
}
colors : array-like, optional
A list of colors which are used for the bars. If None, the colors
will be the standard matplotlib color cyle. (default: None)
total_width : float, optional, default: 0.8
The width of a bar group. 0.8 means that 80% of the x-axis is covered
by bars and 20% will be spaces between the bars.
single_width: float, optional, default: 1
The relative width of a single bar within a group. 1 means the bars
will touch eachother within a group, values less than 1 will make
these bars thinner.
legend: bool, optional, default: True
If this is set to true, a legend will be added to the axis.
"""
# Check if colors where provided, otherwhise use the default color cycle
if colors is None:
colors = plt.rcParams['axes.prop_cycle'].by_key()['color']
# Number of bars per group
n_bars = len(data)
# The width of a single bar
bar_width = total_width / n_bars
# List containing handles for the drawn bars, used for the legend
bars = []
# Iterate over all data
for i, (name, values) in enumerate(data.items()):
# The offset in x direction of that bar
x_offset = (i - n_bars / 2) * bar_width + bar_width / 2
# Draw a bar for every value of that type
for x, y in enumerate(values):
bar = ax.bar(x + x_offset, y, width=bar_width * single_width, color=colors[i % len(colors)])
# Add a handle to the last drawn bar, which we'll need for the legend
bars.append(bar[0])
# Draw legend if we need
if legend:
ax.legend(bars, data.keys())
if __name__ == "__main__":
# Usage example:
data = {
"a": [1, 2, 3, 2, 1],
"b": [2, 3, 4, 3, 1],
"c": [3, 2, 1, 4, 2],
"d": [5, 9, 2, 1, 8],
"e": [1, 3, 2, 2, 3],
"f": [4, 3, 1, 1, 4],
}
fig, ax = plt.subplots()
bar_plot(ax, data, total_width=.8, single_width=.9)
plt.show()
Output:
I know that this is about matplotlib, but using pandas and seaborn can save you a lot of time:
df = pd.DataFrame(zip(x*3, ["y"]*3+["z"]*3+["k"]*3, y+z+k), columns=["time", "kind", "data"])
plt.figure(figsize=(10, 6))
sns.barplot(x="time", hue="kind", y="data", data=df)
plt.show()
Given the existing answers, the easiest solution, given the data in the OP, is load the data into a dataframe and plot with pandas.DataFrame.plot.
Load the value lists into pandas with a dict, and specify x as the index. The index will automatically be set as the x-axis, and the columns will be plotted as the bars.
pandas.DataFrame.plot uses matplotlib as the default backend.
See How to add value labels on a bar chart for thorough details about using .bar_label.
Tested in python 3.8.11, pandas 1.3.2, matplotlib 3.4.3
import pandas as pd
# using the existing lists from the OP, create the dataframe
df = pd.DataFrame(data={'y': y, 'z': z, 'k': k}, index=x)
# since there's no time component and x was a datetime dtype, set the index to be just the date
df.index = df.index.date
# display(df)
y z k
2011-01-04 4 1 11
2011-01-05 9 2 12
2011-01-06 2 3 13
# plot bars or kind='barh' for horizontal bars; adjust figsize accordingly
ax = df.plot(kind='bar', rot=0, xlabel='Date', ylabel='Value', title='My Plot', figsize=(6, 4))
# add some labels
for c in ax.containers:
# set the bar label
ax.bar_label(c, fmt='%.0f', label_type='edge')
# add a little space at the top of the plot for the annotation
ax.margins(y=0.1)
# move the legend out of the plot
ax.legend(title='Columns', bbox_to_anchor=(1, 1.02), loc='upper left')
Horizontal bars for when there are more columns
ax = df.plot(kind='barh', ylabel='Date', title='My Plot', figsize=(5, 4))
ax.set(xlabel='Value')
for c in ax.containers:
# set the bar label
ax.bar_label(c, fmt='%.0f', label_type='edge')
ax.margins(x=0.1)
# move the legend out of the plot
ax.legend(title='Columns', bbox_to_anchor=(1, 1.02), loc='upper left')
I modified pascscha's solution extending the interface, hopefully this helps someone else! Key features:
Variable number of entries per bar group
Customizable colors
Handling of x ticks
Fully customizable bar labels on top of bars
def bar_plot(ax, data, group_stretch=0.8, bar_stretch=0.95,
legend=True, x_labels=True, label_fontsize=8,
colors=None, barlabel_offset=1,
bar_labeler=lambda k, i, s: str(round(s, 3))):
"""
Draws a bar plot with multiple bars per data point.
:param dict data: The data we want to plot, wher keys are the names of each
bar group, and items is a list of bar values for the corresponding group.
:param float group_stretch: 1 means groups occupy the most (largest groups
touch side to side if they have equal number of bars).
:param float bar_stretch: If 1, bars within a group will touch side to side.
:param bool x_labels: If true, x-axis will contain labels with the group
names given at data, centered at the bar group.
:param int label_fontsize: Font size for the label on top of each bar.
:param float barlabel_offset: Distance, in y-values, between the top of the
bar and its label.
:param function bar_labeler: If not None, must be a functor with signature
``f(group_name, i, scalar)->str``, where each scalar is the entry found at
data[group_name][i]. When given, returns a label to put on the top of each
bar. Otherwise no labels on top of bars.
"""
sorted_data = list(sorted(data.items(), key=lambda elt: elt[0]))
sorted_k, sorted_v = zip(*sorted_data)
max_n_bars = max(len(v) for v in data.values())
group_centers = np.cumsum([max_n_bars
for _ in sorted_data]) - (max_n_bars / 2)
bar_offset = (1 - bar_stretch) / 2
bars = defaultdict(list)
#
if colors is None:
colors = {g_name: [f"C{i}" for _ in values]
for i, (g_name, values) in enumerate(data.items())}
#
for g_i, ((g_name, vals), g_center) in enumerate(zip(sorted_data,
group_centers)):
n_bars = len(vals)
group_beg = g_center - (n_bars / 2) + (bar_stretch / 2)
for val_i, val in enumerate(vals):
bar = ax.bar(group_beg + val_i + bar_offset,
height=val, width=bar_stretch,
color=colors[g_name][val_i])[0]
bars[g_name].append(bar)
if bar_labeler is not None:
x_pos = bar.get_x() + (bar.get_width() / 2.0)
y_pos = val + barlabel_offset
barlbl = bar_labeler(g_name, val_i, val)
ax.text(x_pos, y_pos, barlbl, ha="center", va="bottom",
fontsize=label_fontsize)
if legend:
ax.legend([bars[k][0] for k in sorted_k], sorted_k)
#
ax.set_xticks(group_centers)
if x_labels:
ax.set_xticklabels(sorted_k)
else:
ax.set_xticklabels()
return bars, group_centers
Sample run:
fig, ax = plt.subplots()
data = {"Foo": [1, 2, 3, 4], "Zap": [0.1, 0.2], "Quack": [6], "Bar": [1.1, 2.2, 3.3, 4.4, 5.5]}
bar_plot(ax, data, group_stretch=0.8, bar_stretch=0.95, legend=True,
labels=True, label_fontsize=8, barlabel_offset=0.05,
bar_labeler=lambda k, i, s: str(round(s, 3)))
fig.show()
I did this solution: if you want plot more than one plot in one figure, make sure before plotting next plots you have set right matplotlib.pyplot.hold(True)
to able adding another plots.
Concerning the datetime values on the X axis, a solution using the alignment of bars works for me. When you create another bar plot with matplotlib.pyplot.bar(), just use align='edge|center' and set width='+|-distance'.
When you set all bars (plots) right, you will see the bars fine.

Categories