How to customize bar annotations to not show selected values - python

I have the following data set:
data = [6.92, 1.78, 0.0, 0.0, 3.5, 8.82, 3.06, 0.0, 0.0, 5.54, -10.8, -6.03, 0.0, 0.0, -6.8, 13.69, 8.61, 9.98, 0.0, 9.42, 4.91, 3.54, 2.62, 5.65, 1.95, 8.91, 11.46, 5.31, 6.93, 6.42]
Is there a way to remove the 0.0 labels from the bar plot?
I tried df = df.replace(0, "") but then I get a list index out of range error code.
My code:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
data = [6.92, 1.78, 0.0, 0.0, 3.5, 8.82, 3.06, 0.0, 0.0, 5.54, -10.8, -6.03, 0.0, 0.0, -6.8, 13.69, 8.61, 9.98, 0.0, 9.42, 4.91, 3.54, 2.62, 5.65, 1.95, 8.91, 11.46, 5.31, 6.93, 6.42]
df = pd.DataFrame(np.array(data).reshape(6,5), columns=['Bank1', 'Bank2', 'Bank3', 'Bank4', 'Bank5'], index =['2016', '2017', '2018', '2019', '2020', '2021'])
print(df)
ax = df.plot(kind='bar', rot=0, xlabel='Year', ylabel='Total Return %', title='Overall Performance', figsize=(15, 10))
ax.bar_label(ax.containers[0], fmt='%.1f', fontsize=8, padding=3)
ax.bar_label(ax.containers[1], fmt='%.1f', fontsize=8, padding=3)
ax.bar_label(ax.containers[2], fmt='%.1f', fontsize=8, padding=3)
ax.bar_label(ax.containers[3], fmt='%.1f', fontsize=8, padding=3)
ax.bar_label(ax.containers[4], fmt='%.1f', fontsize=8, padding=3)
ax.legend(title='Columns', bbox_to_anchor=(1, 1.02), loc='upper left')
plt.show()

labels passed to matplotlib.pyplot.bar_label must be customized
Adjust the comparison (!= 0) value or range as needed.
labels = [f'{v.get_height():0.0f}' if v.get_height() != 0 else '' for v in c ] without the assignment expression (:=).
See this answer for additional details and examples using .bar_label
Tested in pandas 1.3.4, python 3.8.121., and matplotlib 3.4.31.
Minimum version required are 3.8 and 3.4.2 respectively
import pandas as pd
import matplotlib.pyplot as plt
data = [6.92, 1.78, 0.0, 0.0, 3.5, 8.82, 3.06, 0.0, 0.0, 5.54, -10.8, -6.03, 0.0, 0.0, -6.8, 13.69, 8.61, 9.98, 0.0, 9.42, 4.91, 3.54, 2.62, 5.65, 1.95, 8.91, 11.46, 5.31, 6.93, 6.42]
df = pd.DataFrame(np.array(data).reshape(6,5), columns=['Bank1', 'Bank2', 'Bank3', 'Bank4', 'Bank5'], index =['2016', '2017', '2018', '2019', '2020', '2021'])
ax = df.plot(kind='bar', rot=0, xlabel='Year', ylabel='Total Return %', title='Overall Performance', figsize=(15, 10))
for c in ax.containers:
# customize the label to account for cases when there might not be a bar section
labels = [f'{h:0.1f}' if (h := v.get_height()) != 0 else '' for v in c ]
# set the bar label
ax.bar_label(c, labels=labels, fontsize=8, padding=3)
ax.legend(title='Columns', bbox_to_anchor=(1, 1.02), loc='upper left')
plt.show()

Related

How to draw multiple graphs with the same axes and is it even possible to do it in matplotlib?

I need to build a graph similar to the following but with my data.
It turns out there is a common axis between two adjacent quarters. And each axis has its own dimension.
The best thing I got was the following drawing.
My code:
import matplotlib.pyplot as plt
import numpy as np
import mpl_toolkits.axisartist.axislines as axislines
%matplotlib widget
#Data
u01 = np.array([0.00, 2.70, 3.45, 3.90, 4.50, 5.10, 5.55, 6.30, 7.65, 8.70, 9.75,
11.40, 11.85, 12.45, 13.05, 14.10, 17.40, 18.45, 20.55, 22.80])
i01 = np.array([0.00, 2.77, 3.59, 3.80, 3.90, 3.96, 3.98, 4.03, 4.11, 4.18, 4.25,
4.35, 4.38, 4.42, 4.45, 4.52, 4.73, 4.79, 4.93, 5.07])
u02 = np.array([0.00, 1.05, 1.65, 2.85, 3.75, 4.80, 5.55, 6.30, 8.25, 9.75, 12.15, 13.05, 16.80])
i02 = np.array([0.00, 0.06, 1.83, 7.13, 9.02, 9.41, 9.54, 9.68, 9.99, 10.23, 10.61, 10.75, 11.35])
u03 = np.array([0.00, 0.96, 1.80, 3.12, 3.96, 4.92, 6.84, 7.68, 9.72, 10.44])
i03 = np.array([0.00, 0.15, 7.26, 15.25, 15.91, 16.28, 16.95, 17.24, 17.95, 18.20])
u04 = np.array([0.00, 0.60, 1.08, 1.44, 1.68, 2.28, 2.64, 3.48, 4.68, 6.36, 7.56])
i04 = np.array([0.00, 0.20, 2.20, 5.56, 8.42, 16.07, 18.93, 21.62, 22.45, 23.29, 23.88])
x_gyp = np.arange(6, 25.1, 0.1)
y_gyp = 200/x_gyp
x_nag = [0, 12]
y_nag = [40, 0]
u_vh = np.array([0,0.51,0.60,0.64,0.67,0.74,0.77,0.79,0.83,0.86,0.92])
i_vh = np.array([0,0,3.05,11.32,30,250.14,531.820,816.89,1629.97,2431.52,4430.13])
###
fig, axs = plt.subplots(2, 2, gridspec_kw={'height_ratios': [3, 1]})
fig.subplots_adjust(wspace=0, hspace=0)
#axs[0][0].plot(u_vh, i_vh, 'C1' '-o')
axs[0][0].set_xlim([0, 1]);
axs[0][0].set_ylim([0, 50]);
axs[0][0].set_xticks(np.arange(0, 1.1, 0.1));
axs[0][0].set_yticks(np.arange(0, 6000, 1000));
axs[0][0].invert_xaxis()
axs[0][0].yaxis.tick_right()
axs[0][0].get_xaxis().set_visible(False)
axs[0][0].get_yaxis().set_visible(False)
axs[0][1].plot(u01, i01, 'C1' '-o', u02, i02, 'C2' '-x', u03, i03, 'C3' '-^', u04, i04, 'C4' '-s', x_gyp, y_gyp, x_nag, y_nag, 'C7')
axs[0][1].set_xlim([0, 27]);
axs[0][1].set_ylim([0, 50]);
axs[0][1].set_xticks(np.arange(0, 27, 1));
axs[0][1].set_yticks(np.arange(0, 51, 1));
axs[0][1].xaxis.tick_top()
axs[1][0].plot(i_vh, u_vh, 'C1' '-o')
axs[1][0].set_xlim([0, 1000]);
axs[1][0].set_ylim([0, 1]);
#axs[1][0].set_xticks(np.arange(0, 100, 10));
axs[1][0].set_yticks(np.arange(0, 1.1, 0.1));
axs[1][0].invert_xaxis()
axs[1][0].invert_yaxis()
axs[1][0].xaxis.tick_top()
#axs[1][0].yaxis.tick_right()
axs[1][0].get_yaxis().set_visible(False)
#axs[1][1].plot(u_vh, i_vh, 'C1' '-o')
axs[1][1].set_xlim([0, 27]);
axs[1][1].set_ylim([0, 1]);
axs[1][1].set_xticks(np.arange(0, 27, 1));
axs[1][1].set_yticks(np.arange(0, 1.1, 0.1));
axs[1][1].invert_yaxis()
plt.figure(figsize=(3, 10));
I think the result would look better if ticklabels were placed inside the graph.

How to stack only selected columns in pandas barh plot

I am trying to plot a bar chart where I would like to have two bars, one stacked and another one not stacked by the side of the stacked one.
I have the first plot which is a stacked plot:
And another plot, with the same lines and columns:
I want to plot it side by side to the columns of the last plot, and not stack it:
This is a code snippet to replicate my problem:
d = pd.DataFrame({'DC': {'col0': 257334.0,
'col1': 0.0,
'col2': 0.0,
'col3': 186146.0,
'col4': 0.0,
'col5': 366431.0,
'col6': 461.0,
'col7': 0.0,
'col8': 0.0},
'DC - IDC': {'col0': 32665.0,
'col1': 0.0,
'col2': 156598.0,
'col3': 0.0,
'col4': 176170.0,
'col5': 0.0,
'col6': 0.0,
'col7': 0.0,
'col8': 0.0},
'No Address': {'col0': 292442.0,
'col1': 227.0,
'col2': 298513.0,
'col3': 117167.0,
'col4': 249.0,
'col5': 747753.0,
'col6': 271976.0,
'col7': 9640.0,
'col8': 211410.0}})
d[['DC', 'DC - IDC']].plot.barh(stacked=True)
d[['No Address']].plot.barh( stacked=False, color='red')
Use position parameter to draw 2 columns on the same index:
fig, ax = plt.subplots()
d[['DC', 'DC - IDC']].plot.barh(width=0.4, position=0, stacked=True, ax=ax)
d[['No Address']].plot.barh(width=0.4, position=1, stacked=True, ax=ax, color='red')
plt.show()
You can achieve this only by using matplotlib.pyplot library. First, you need to import NumPy and matplotlib libraries.
import matplotlib.pyplot as plt
import numpy as np
Then,
plt.figure(figsize=(15,8))
plt.barh(d.index, d['DC'], 0.4, label='DC', align='edge')
plt.barh(d.index, d['DC - IDC'], 0.4, label='DC - IDC', align='edge')
plt.barh(np.arange(len(d.index))-0.4, d['No Address'], 0.4, color='red', label='No Address', align='edge')
plt.legend();
Here is what I did:
Increase the figure size (optional)
Create a BarContainer for each column
Decrease the width of each bar to 0.4 to make them fit
Align the left edges of the bars with the y positions
Normally all bars now are stacked. To put the red bars to the side you need to subtract each y coordinate by the width of the bars (0.4) np.arange(len(d.index))-0.4
Finally, add a legend
It should look like that:

seaborn: x ticks disappear on lineplot

I have a timeseries, but every time I try to plot it while rotating the ticks, the ticks disappear.
Reproducible code:
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
dates = ['2015-01', '2015-02', '2015-03', '2015-04', '2015-05', '2015-06', '2015-07', '2015-08', '2015-09', '2015-10', '2015-11', '2015-12', '2016-01', '2016-02', '2016-03', '2016-04', '2016-05', '2016-06', '2016-07', '2016-08', '2016-09', '2016-10', '2016-11', '2016-12', '2017-01', '2017-02', '2017-03', '2017-04', '2017-05', '2017-06', '2017-07', '2017-08', '2017-09', '2017-10', '2017-11', '2017-12', '2018-01', '2018-02', '2018-03', '2018-04', '2018-05', '2018-06', '2018-07', '2018-08', '2018-09', '2018-10', '2018-11', '2018-12', '2019-01', '2019-02', '2019-03', '2019-04', '2019-05', '2019-06', '2019-07', '2019-08', '2019-09', '2019-10', '2019-11', '2019-12', '2020-01', '2020-02', '2020-03', '2020-04', '2020-05', '2020-06', '2020-07']
measurement = [0.0, 0.0, 0.0, 0.0, 14.8, 11.3, 7.2, 0.0, 5.1, 70.1, 0.0, 0.0, 0.0, 0.0, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 937.8999749999999, 0.0, 118.7, 168.3, 525.95001, 0.0, 0.0, 3.8, 0.0, 767.0, 0.0, 0.0, 0.2, 0.0, 0.0, 0.0, 18.4, 642.7000099999999, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.03333333333333333, 0.7, 0.0, 0.0, 0.0, 0.0, 13.049999999999999, 0.0, 0.0, 384.29999, 0.0, 0.0, 0.0, 0.0, 13.3, 0.0, 0.0, 325.39999, 0.0, 0.0, 0.0]
ax = sns.lineplot(x=dates,y=measurement)
ax.set_xticklabels(ax.get_xticklabels(),rotation=20)
plt.show()
Use plt.xticks(rotation=90) instead of ax.set_xticklabels(ax.get_xticklabels(),rotation=20), because you aren't trying to change the value of the label Text or position.
ax.xticklabels() is a matplotlib.cbook.silent_list of all the ticks and labels (e.g. [Text(0, 0, '2015-01'), Text(1, 0, '2015-02'), Text(2, 0, '2015-03'),...]
To get only the x position from ax.get_xticklabels(), you would need to do [x.get_position()[0] for x in ax.get_xticklabels()]
See Changing the “tick frequency” on x or y axis in matplotlib? if you want to change label frequency.
ax = sns.lineplot(x=dates,y=measurement)
plt.xticks(rotation=90)
plt.show()
This is happening because the function ax.get_xticklabels() does not return the array of labels, use ax.get_xticks() instead:
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
dates = ['2015-01', '2015-02', '2015-03', '2015-04', '2015-05', '2015-06', '2015-07', '2015-08', '2015-09', '2015-10', '2015-11', '2015-12', '2016-01', '2016-02', '2016-03', '2016-04', '2016-05', '2016-06', '2016-07', '2016-08', '2016-09', '2016-10', '2016-11', '2016-12', '2017-01', '2017-02', '2017-03', '2017-04', '2017-05', '2017-06', '2017-07', '2017-08', '2017-09', '2017-10', '2017-11', '2017-12', '2018-01', '2018-02', '2018-03', '2018-04', '2018-05', '2018-06', '2018-07', '2018-08', '2018-09', '2018-10', '2018-11', '2018-12', '2019-01', '2019-02', '2019-03', '2019-04', '2019-05', '2019-06', '2019-07', '2019-08', '2019-09', '2019-10', '2019-11', '2019-12', '2020-01', '2020-02', '2020-03', '2020-04', '2020-05', '2020-06', '2020-07']
measurement = [0.0, 0.0, 0.0, 0.0, 14.8, 11.3, 7.2, 0.0, 5.1, 70.1, 0.0, 0.0, 0.0, 0.0, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 937.8999749999999, 0.0, 118.7, 168.3, 525.95001, 0.0, 0.0, 3.8, 0.0, 767.0, 0.0, 0.0, 0.2, 0.0, 0.0, 0.0, 18.4, 642.7000099999999, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.03333333333333333, 0.7, 0.0, 0.0, 0.0, 0.0, 13.049999999999999, 0.0, 0.0, 384.29999, 0.0, 0.0, 0.0, 0.0, 13.3, 0.0, 0.0, 325.39999, 0.0, 0.0, 0.0]
ax = sns.lineplot(x=dates,y=measurement)
ax.set_xticklabels(ax.get_xticks(),rotation=20)
plt.show()
Alternatively, as suggested in the comments, you can replace the ax.set_ticklabels call with plt.xticks(rotation=20).

How to create equally spaced interval in xaxis in matplotlib histogram when the bins are not equally spaced? [duplicate]

This question already has an answer here:
matplotlib histogram with equal bars width
(1 answer)
Closed 2 years ago.
I would like the matplotlib histogram to show the data in an equally spaced xaxis despite the values of its bin is not equally spaced. How do I do so? Presently the bars for age group '0-6', '7-12', '13-16', '17-20' looks thinner than the rest of my data and bars of '17-20' is overlapping with 21-30. xticklabels are also overlapping. How do I resolve these issues?
#!/usr/bin/env python3.6
# -*- coding: utf-8 -*-
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import numpy as np
male_ages = [66.0, 37.0, 2.0, 56.0, 8.0, 56.0, 56.0, 31.0, 15.0, 41.0, 17.0, 40.0, 45.0, 0.5, 41.0, 27.0, 53.0, 64.0, 53.0,]
female_ages = [53.0, 56.0, 3.0, 31.0, 9.0, 73.0, 47.0, 18.0, 31.0, 28.0, 48.0, 44.0, 32.0, 42.0, 42.0, 39.0, 40.0, 38.0, 2.0 ]
age_bins_label = [ '0-6', '7-12', '13-16', '17-20', '21-30',
'31-40', '41-50', '51-60', '61-70', '71-80',
'81-90', '91-100', '101-110', '111-120' ]
age_bins = [0, 6, 12, 16, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110 ]
xmax = max( male_ages, female_ages)
data = [ male_ages, female_ages ]
colors = [ 'orange', 'pink']
labels = [ 'male', 'female' ]
fig, axs = plt.subplots(2, 2, 'all', tight_layout=True, sharey=True )
axs[0, 0].hist( data, bins=age_bins, color=colors, rwidth=0.9, align='left',
stacked=False, label=labels )
axs[0, 0].legend(prop={'size': 10})
axs[0, 0].set_title('bars with legend')
axs[0, 0].get_xaxis().set_label_text( label='Age Groups', fontweight='bold' )
axs[0, 0].get_yaxis().set_label_text( label='Confirmed Cases', fontweight='bold' )
for ax in axs.flat:
ax.label_outer()
# Set x-axis
#xlabels = [ str(i) for i in age_bins[1:] ]
xlabels = age_bins_label
N_labels = len(xlabels)
plt.xticks( age_bins, xlabels )
plt.show()
I would suggest you make use of np.histogram
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import numpy as np
male_ages = [66.0, 37.0, 2.0, 56.0, 8.0, 56.0, 56.0, 31.0, 15.0, 41.0, 17.0, 40.0, 45.0, 0.5, 41.0, 27.0, 53.0, 64.0, 53.0,]
female_ages = [53.0, 56.0, 3.0, 31.0, 9.0, 73.0, 47.0, 18.0, 31.0, 28.0, 48.0, 44.0, 32.0, 42.0, 42.0, 39.0, 40.0, 38.0, 2.0 ]
age_bins_label = [ '0-6', '7-12', '13-16', '17-20', '21-30',
'31-40', '41-50', '51-60', '61-70', '71-80',
'81-90', '91-100', '101-110', '111-120' ]
age_bins = [0, 6, 12, 16, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120 ]
fig, ax = plt.subplots()
ax.bar(x = age_bins_label,
height = np.histogram(male_ages, bins = age_bins)[0],
alpha=0.5,
label='male')
ax.bar(x = age_bins_label,
height = np.histogram(female_ages, bins = age_bins)[0],
alpha=0.5,
label='female')
plt.legend(loc='upper right')
Result

y-axis labels don't get displayed properly with Plotly

I've a problem with displaying the y-axis labels properly with plotly.
This is my index:
index = ['2015-11','2015-12','2016-01','2016-02','2016-03','2016-04','2016-05',
'2016-06','2016-07','2016-08','2016-09','2016-10','2016-11']
the data
data = [[0.115, 0.077, 0.0, 0.038, 0.0, 0.038, 0.038, 0.077, 0.0, 0.077, 0.077, 0.038],
[0.073, 0.055, 0.083, 0.055, 0.018, 0.055, 0.073, 0.037, 0.028, 0.037, 0.009, 0.0],
[0.099, 0.027, 0.036, 0.045, 0.063, 0.153, 0.027, 0.045, 0.063, 0.027, 0.0, 0.0],
[0.076, 0.038, 0.053, 0.061, 0.098, 0.068, 0.038, 0.061, 0.023, 0.0, 0.0, 0.0],
[0.142, 0.062, 0.027, 0.08, 0.097, 0.044, 0.071, 0.027, 0.0, 0.0, 0.0, 0.0],
[0.169, 0.026, 0.026, 0.026, 0.013, 0.013, 0.091, 0.0, 0.0, 0.0, 0.0, 0.0],
[0.138, 0.121, 0.052, 0.017, 0.034, 0.017, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
[0.297, 0.081, 0.054, 0.054, 0.054, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
[0.095, 0.016, 0.024, 0.04, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
[0.102, 0.023, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
[0.054, 0.027, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
[0.087, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]
I create a heatmap with following code:
import plotly.figure_factory as ff
from plotly.offline import iplot
import re
cols = range(12)
index = index
df = pd.DataFrame(data, columns = cols)
df.index = index
x = df.columns.tolist()
y = df.index.tolist()
z = df.values
annotation_text = np.char.mod('%.0f%%', df*100).tolist()
annotation_text = [[re.sub('^0%$','', x) for x in l] for l in annotation_text]
colorscale=[[0.0, 'rgb(248, 248, 255)'],
[0.04, 'rgb(224, 228, 236)'],
[0.08, 'rgb(196, 210, 226)'],
[0.12, 'rgb(158, 178, 226)'],
[0.16, 'rgb(134, 158, 227)'],
[0.2, 'rgb(122, 146, 227)'],
[1.0, 'rgb(65, 105, 225)'],
]
fig = ff.create_annotated_heatmap(z, x=x, y=y, colorscale= colorscale,
annotation_text = annotation_text)
fig.layout.yaxis.autorange = 'reversed'
offline.iplot(fig, filename='annotated_heatmap_color.html')
Which produces the correct heatmap but with the y-axis labels missing
When I change the index to shorter values like '5-11' with
index = [x[3:] for x in index]
the labels show up.
I don't understand the logic behind that and would like to know how to fix it.
Plotly.py uses plotly.js under the hood, which is transforming your date strings to a numerical date format and misplacing them on your non numerical axis.
To explicit a categorical axis you just have to add:
fig.layout.yaxis.type = 'category'

Categories