labeling data points when X axis is a string [duplicate] - python

I have created a bar chart and a line chart using two different y-axes for the following dataframe.
import pandas as pd
import matplotlib.pyplot as plt
df = pd.DataFrame({'DXC':['T1', 'H1', 'HP', 'T1_or_H1_or_HP'],
'Count': [2485, 5595, 3091, 9933],
'percent':[1.06, 2.39, 1.31, 4.23]})
DXC Count percent
0 T1 2485 1.06
1 H1 5595 2.39
2 HP 3091 1.31
3 T1_or_H1_or_HP 9933 4.23
Using the following code, I can also display values next to each bar in the bar chart. However, I have not been successful thus far in my attempts to also display the label (percent) values for the line plot.
fig=plt.figure()
#AX: bar chart
ax=df["Count"].plot(kind="bar", color="orange")
ax.set_ylabel("Counts")
ax.set_xlabel("")
ax.set_ylim(0,20000)
for tick in ax.get_xticklabels():
tick.set_rotation(0)
#AX2: Create secondary y-axis with same x-axis as above for plotting percent values
ax2=ax.twinx()
ax2.plot(ax.get_xticks(),df["percent"], color="red", linewidth=4, marker = "o")
ax2.grid(False)
ax2.set_ylabel("Percent", color = "red")
ax2.set_ylim(0,4.5)
ax2.tick_params(labelcolor="red", axis='y')
def add_value_labels(ax, spacing=5):
for i in ax.patches:
y_value = i.get_height()
x_value = i.get_x() + i.get_width() / 2
space = spacing
va = 'bottom'
# Use Y value as label and format number with no decimal place
label = "{:.0f}".format(y_value)
# Create annotation
ax.annotate(label,(x_value, y_value), xytext=(0, space), textcoords="offset points", ha='center', va=va)
add_value_labels(ax)
plt.show()
Can somebody suggest how to display labels for both bar plot and line plot in the same figure?

Here is a modified function that will achieve the required task. The trick is to extract the x and y values based on the type of the chart you have. For a line chart, you can use ax.lines[0] and then get_xdata and get_ydata
def add_value_labels(ax, typ, spacing=5):
space = spacing
va = 'bottom'
if typ == 'bar':
for i in ax.patches:
y_value = i.get_height()
x_value = i.get_x() + i.get_width() / 2
label = "{:.0f}".format(y_value)
ax.annotate(label,(x_value, y_value), xytext=(0, space),
textcoords="offset points", ha='center', va=va)
if typ == 'line':
line = ax.lines[0]
for x_value, y_value in zip(line.get_xdata(), line.get_ydata()):
label = "{:.2f}".format(y_value)
ax.annotate(label,(x_value, y_value), xytext=(0, space),
textcoords="offset points", ha='center', va=va)
add_value_labels(ax, typ='bar')
add_value_labels(ax2, typ='line')

From matplotlib v3.4.0 it's easier to use matplotlib.pyplot.bar_label, as explained in this answer.
The OP has many extraneous steps, which can be removed by using the yticks, secondary_y, and ylabel parameters for pandas.DataFrame.plot
pandas.DataFrame.itertuples can be used to annotate the line with matplotlib.axes.Axes.annotate because .Index corresponds to the x-axis locations and .percent is the correct y value for ax2.
See How to add hovering annotations to a plot for additional options to annotate the line.
See How to change the color of the axis, ticks and labels for a plot to easily change colors of various aspects of the figure.
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
# create the bar plot for the Count column and specify the yticks
ax = df.Count.plot(kind='bar', color='tab:orange', rot=0, yticks=range(0, 20001, 2500), figsize=(9, 5), ylabel='Counts')
# add bar labels
ax.bar_label(ax.containers[0])
# add the line plot for the percent column and specify the yticks and secondary_y
ax2 = df.percent.plot(marker='.', yticks=np.arange(0, 5, 0.5), secondary_y=True, ax=ax, ylabel='Percent')
# annotate the line by iterating through each row with itertuples
for row in df.itertuples():
ax2.annotate(text=row.percent, xy=(row.Index, row.percent))

Related

How to annotate a bar plot and add a custom legend

I am trying to draw a Bar chart that looks like the one below, I am not sure how to set a percentage value in each column top, and a legend at the right side. My code snippets below. It's working, however it's missing the percentage value and legend.
import matplotlib.pyplot as plt; plt.rcdefaults()
import numpy as np
import matplotlib.pyplot as plt
objects = ('18-25', '26-30', '31-40', '40-50')
y_pos = np.arange(len(objects))
performance = [13, 18, 16, 3]
width = 0.35 # the width of the bars
plt.bar(y_pos, performance, align='center', alpha=0.5, color=('red', 'green', 'blue', 'yellow'))
plt.xticks(y_pos, objects)
plt.ylabel('%User', fontsize=16)
plt.title('Age of Respondents', fontsize=20)
width = 0.35
plt.show()
The legend colors were slightly different than the plot colors because alpha=0.5, which has been removed.
imagecolorpicker.com was used to select the correct blue and green.
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import Patch
color = ('red', '#00b050', '#00b0f0', 'yellow')
objects = ('18-25', '26-30', '31-40', '40-50')
y_pos = np.arange(len(objects))
performance = [13, 18, 16, 3]
width = 0.35 # the width of the bars
plt.bar(y_pos, performance, align='center', color=color)
plt.xticks(y_pos, objects)
plt.ylim(0, 20) # this adds a little space at the top of the plot, to compensate for the annotation
plt.ylabel('%User', fontsize=16)
plt.title('Age of Respondents', fontsize=20)
# map names to colors
cmap = dict(zip(performance, color))
# create the rectangles for the legend
patches = [Patch(color=v, label=k) for k, v in cmap.items()]
# add the legend
plt.legend(title='Number of Trips', labels=objects, handles=patches, bbox_to_anchor=(1.04, 0.5), loc='center left', borderaxespad=0, fontsize=15, frameon=False)
# add the annotations
for y, x in zip(performance, y_pos):
plt.annotate(f'{y}%\n', xy=(x, y), ha='center', va='center')
Annotation Resources - from matplotlib v3.4.2
Adding value labels on a matplotlib bar chart
How to annotate each segment of a stacked bar chart
Stacked Bar Chart with Centered Labels
How to plot and annotate multiple data columns in a seaborn barplot
How to annotate a seaborn barplot with the aggregated value
stack bar plot in matplotlib and add label to each section
How to add multiple annotations to a barplot
How to plot and annotate a grouped bar chart
How to plot a horizontal stacked bar with annotations

How do I plot percentage labels for a horizontal bar graph in Python?

Can someone please help me plot x axis labels in percentages given the following code of my horizontal bar chart?
Finding it difficult to find as I want a more simplistic chart without x axis labels and ticks.
[Horizontal Bar Chart][1]
# Plot the figure size
plt.figure(figsize= (8,6))
# New variable and plot the question of the data frame in a normalized in a horizontal bar chat.
ax1 = df[q1].value_counts(normalize=True).sort_values().plot(kind="barh", color='#fd6a02', width=0.75, zorder=2)
# Draw vague vertical axis lines and set lines to the back of the order
vals = ax1.get_xticks()
for tick in vals:
ax1.axvline(x=tick, linestyle='dashed', alpha=0.4, color = '#d3d3d3', zorder=1)
# Tot3als to produce a composition ratio
total_percent = df[q1].value_counts(normalize=True) *100
# Remove borders
ax1.spines['right'].set_visible(False)
ax1.spines['top'].set_visible(False)
ax1.spines['left'].set_visible(False)
ax1.spines['bottom'].set_visible(False)
# Set the title of the graph inline with the Y axis labels.
ax1.set_title(q1, weight='bold', size=14, loc = 'left', pad=20, x = -0.16)
# ax.text(x,y,text,color)
for i,val in enumerate(total):
ax1.text(val - 1.5, i, str("{:.2%}".format(total_percent), color="w", fontsize=10, zorder=3)
# Create axis labels
plt.xlabel("Ratio of Responses", labelpad=20, weight='bold', size=12)
Each time I get a EOF error. Can someone help?
It's not based on your code, but I'll customize the answer from the official reference.
The point is achieved with ax.text(), which is a looping process.
import matplotlib.pyplot as plt
import numpy as np
# Fixing random state for reproducibility
np.random.seed(19680801)
plt.rcdefaults()
fig, ax = plt.subplots()
# Example data
people = ('Tom', 'Dick', 'Harry', 'Slim', 'Jim')
y_pos = np.arange(len(people))
performance = 3 + 10 * np.random.rand(len(people))
ax.barh(y_pos, performance, align='center')
ax.set_yticks(y_pos)
ax.set_yticklabels(people)
ax.invert_yaxis() # labels read top-to-bottom
ax.set_xlabel('Performance')
ax.set_title('How fast do you want to go today?')
# Totals to produce a composition ratio
total = sum(performance)
# ax.text(x,y,text,color)
for i,val in enumerate(performance):
ax.text(val - 1.5, i, str("{:.2%}".format(val/total)), color="w", fontsize=10)
plt.show()

Labeling horizontal barplot with values in Seaborn

I have a horizontal barplot, for example, a simplified version of the example from the seaborn documentation:
import seaborn as sns
import matplotlib.pyplot as plt
f, ax = plt.subplots(figsize=(6, 15))
crashes = sns.load_dataset("car_crashes").sort_values("total", ascending=False)
sns.barplot(x="total", y="abbrev", data=crashes,
label="Total", color="b")
ax.set(xlim=(0, 24), ylabel="",
xlabel="Automobile collisions per billion miles")
plt.show()
How can I get the bars labeled with the value for each bar?
I tried this approach for vertical bars (How to add percentages on top of bars in seaborn), but it doesn't seem to work. Changing height to width doesn't have the effect I assumed it would.
for p in ax.patches:
height = p.get_width()
ax.text(p.get_y()+p.get_height()/2.,
height + 3,
'{:1.2f}'.format(height),
ha="center")
I'm assuming the horizontal plot works differently?
Got it, thanks to #ImportanceOfBeingErnest
This worked for me
for p in ax.patches:
width = p.get_width() # get bar length
ax.text(width + 1, # set the text at 1 unit right of the bar
p.get_y() + p.get_height() / 2, # get Y coordinate + X coordinate / 2
'{:1.2f}'.format(width), # set variable to display, 2 decimals
ha = 'left', # horizontal alignment
va = 'center') # vertical alignment
As of matplotlib 3.4.0
Use the new built-in ax.bar_label, which will automatically label bar containers regardless of orientation:
fig, ax = plt.subplots(figsize=(6, 8))
sns.barplot(x="total", y="abbrev", data=crashes)
# new helper method to auto-label bars
ax.bar_label(ax.containers[0])
If the bars are grouped by hue, call ax.bar_label on all the containers:
fig, ax = plt.subplots(figsize=(5, 6))
ax = sns.barplot(x="tip", y="day", hue="smoker", data=tips)
# grouped bars will have multiple containers
for container in ax.containers:
ax.bar_label(container)
Thank you very much for this. It helped me a lot, but i ran to a problem, where percents had to many digits after decimal point, the format can be then simply specified:
for container in ax.containers:
ax.bar_label(container,size=8,fmt='%.1f')

Add data labels to Seaborn factor plot [duplicate]

This question already has answers here:
How to add value labels on a bar chart
(7 answers)
Closed 5 months ago.
I would like to add data labels to factor plots generated by Seaborn. Here is an example:
import pandas as pd
from pandas import Series, DataFrame
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
titanic_df = pd.read_csv('train.csv')
sns.factorplot('Sex',data=titanic_df,kind='count')
How can I add the 'count' values to the top of each bar on the graph?
You could do it this way:
import math
# Set plotting style
sns.set_style('whitegrid')
# Rounding the integer to the next hundredth value plus an offset of 100
def roundup(x):
return 100 + int(math.ceil(x / 100.0)) * 100
df = pd.read_csv('train.csv')
sns.factorplot('Sex', data=df, kind='count', alpha=0.7, size=4, aspect=1)
# Get current axis on current figure
ax = plt.gca()
# ylim max value to be set
y_max = df['Sex'].value_counts().max()
ax.set_ylim([0, roundup(y_max)])
# Iterate through the list of axes' patches
for p in ax.patches:
ax.text(p.get_x() + p.get_width()/2., p.get_height(), '%d' % int(p.get_height()),
fontsize=12, color='red', ha='center', va='bottom')
plt.show()
You could do something even simpler
plt.figure(figsize=(4, 3))
plot = sns.catplot(x='Sex', y='count', kind='bar', data=titanic_df)
# plot.ax gives the axis object
# plot.ax.patches gives list of bars that can be access using index starting at 0
for i, bar in enumerate(plot.ax.patches):
h = bar.get_height()
plot.ax.text(
i, # bar index (x coordinate of text)
h+10, # y coordinate of text
'{}'.format(int(h)), # y label
ha='center',
va='center',
fontweight='bold',
size=14)
The above answer from #nickil-maveli is simply great.
This is just to add some clarity about the parameters when you are adding the data labels to the barplot (as requested in the comments by #user27074)
# loop through all bars of the barplot
for nr, p in enumerate(ax.patches):
# height of bar, which is basically the data value
height = p.get_height()
# add text to specified position
ax.text(
# bar to which data label will be added
# so this is the x-coordinate of the data label
nr,
# height of data label: height / 2. is in the middle of the bar
# so this is the y-coordinate of the data label
height / 2.,
# formatting of data label
u'{:0.1f}%'.format(height),
# color of data label
color='black',
# size of data label
fontsize=18,
# horizontal alignment: possible values are center, right, left
ha='center',
# vertical alignment: possible values are top, bottom, center, baseline
va='center'
)

How to plot a superimposed bar chart using matplotlib in python?

I want to plot a bar chart or a histogram using matplotlib. I don't want a stacked bar plot, but a superimposed barplot of two lists of data, for instance I have the following two lists of data with me:
Some code to begin with :
import matplotlib.pyplot as plt
from numpy.random import normal, uniform
highPower = [1184.53,1523.48,1521.05,1517.88,1519.88,1414.98,1419.34,
1415.13,1182.70,1165.17]
lowPower = [1000.95,1233.37, 1198.97,1198.01,1214.29,1130.86,1138.70,
1104.12,1012.95,1000.36]
plt.hist(highPower, bins=10, histtype='stepfilled', normed=True,
color='b', label='Max Power in mW')
plt.hist(lowPower, bins=10, histtype='stepfilled', normed=True,
color='r', alpha=0.5, label='Min Power in mW')
I want to plot these two lists against the number of values in the two lists such that I am able to see the variation per reading.
You can produce a superimposed bar chart using plt.bar() with the alpha keyword as shown below.
The alpha controls the transparency of the bar.
N.B. when you have two overlapping bars, one with an alpha < 1, you will get a mixture of colours. As such the bar will appear purple even though the legend shows it as a light red. To alleviate this I have modified the width of one of the bars, this way even if your powers should change you will still be able to see both bars.
plt.xticks can be used to set the location and format of the x-ticks in your graph.
import matplotlib.pyplot as plt
import numpy as np
width = 0.8
highPower = [1184.53,1523.48,1521.05,1517.88,1519.88,1414.98,
1419.34,1415.13,1182.70,1165.17]
lowPower = [1000.95,1233.37, 1198.97,1198.01,1214.29,1130.86,
1138.70,1104.12,1012.95,1000.36]
indices = np.arange(len(highPower))
plt.bar(indices, highPower, width=width,
color='b', label='Max Power in mW')
plt.bar([i+0.25*width for i in indices], lowPower,
width=0.5*width, color='r', alpha=0.5, label='Min Power in mW')
plt.xticks(indices+width/2.,
['T{}'.format(i) for i in range(len(highPower))] )
plt.legend()
plt.show()
Building on #Ffisegydd's answer, if your data is in a Pandas DataFrame, this should work nicely:
def overlapped_bar(df, show=False, width=0.9, alpha=.5,
title='', xlabel='', ylabel='', **plot_kwargs):
"""Like a stacked bar chart except bars on top of each other with transparency"""
xlabel = xlabel or df.index.name
N = len(df)
M = len(df.columns)
indices = np.arange(N)
colors = ['steelblue', 'firebrick', 'darksage', 'goldenrod', 'gray'] * int(M / 5. + 1)
for i, label, color in zip(range(M), df.columns, colors):
kwargs = plot_kwargs
kwargs.update({'color': color, 'label': label})
plt.bar(indices, df[label], width=width, alpha=alpha if i else 1, **kwargs)
plt.xticks(indices + .5 * width,
['{}'.format(idx) for idx in df.index.values])
plt.legend()
plt.title(title)
plt.xlabel(xlabel)
plt.ylabel(ylabel)
if show:
plt.show()
return plt.gcf()
And then in a python command line:
low = [1000.95, 1233.37, 1198.97, 1198.01, 1214.29, 1130.86, 1138.70, 1104.12, 1012.95, 1000.36]
high = [1184.53, 1523.48, 1521.05, 1517.88, 1519.88, 1414.98, 1419.34, 1415.13, 1182.70, 1165.17]
df = pd.DataFrame(np.matrix([high, low]).T, columns=['High', 'Low'],
index=pd.Index(['T%s' %i for i in range(len(high))],
name='Index'))
overlapped_bar(df, show=False)
It is actually simpler than the answers all over the internet make it appear.
a = range(1,10)
b = range(4,13)
ind = np.arange(len(a))
fig = plt.figure()
ax = fig.add_subplot(111)
ax.bar(x=ind, height=a, width=0.35,align='center')
ax.bar(x=ind, height=b, width=0.35/3, align='center')
plt.xticks(ind, a)
plt.tight_layout()
plt.show()

Categories