Add data labels to Seaborn factor plot [duplicate] - python

This question already has answers here:
How to add value labels on a bar chart
(7 answers)
Closed 5 months ago.
I would like to add data labels to factor plots generated by Seaborn. Here is an example:
import pandas as pd
from pandas import Series, DataFrame
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
titanic_df = pd.read_csv('train.csv')
sns.factorplot('Sex',data=titanic_df,kind='count')
How can I add the 'count' values to the top of each bar on the graph?

You could do it this way:
import math
# Set plotting style
sns.set_style('whitegrid')
# Rounding the integer to the next hundredth value plus an offset of 100
def roundup(x):
return 100 + int(math.ceil(x / 100.0)) * 100
df = pd.read_csv('train.csv')
sns.factorplot('Sex', data=df, kind='count', alpha=0.7, size=4, aspect=1)
# Get current axis on current figure
ax = plt.gca()
# ylim max value to be set
y_max = df['Sex'].value_counts().max()
ax.set_ylim([0, roundup(y_max)])
# Iterate through the list of axes' patches
for p in ax.patches:
ax.text(p.get_x() + p.get_width()/2., p.get_height(), '%d' % int(p.get_height()),
fontsize=12, color='red', ha='center', va='bottom')
plt.show()

You could do something even simpler
plt.figure(figsize=(4, 3))
plot = sns.catplot(x='Sex', y='count', kind='bar', data=titanic_df)
# plot.ax gives the axis object
# plot.ax.patches gives list of bars that can be access using index starting at 0
for i, bar in enumerate(plot.ax.patches):
h = bar.get_height()
plot.ax.text(
i, # bar index (x coordinate of text)
h+10, # y coordinate of text
'{}'.format(int(h)), # y label
ha='center',
va='center',
fontweight='bold',
size=14)

The above answer from #nickil-maveli is simply great.
This is just to add some clarity about the parameters when you are adding the data labels to the barplot (as requested in the comments by #user27074)
# loop through all bars of the barplot
for nr, p in enumerate(ax.patches):
# height of bar, which is basically the data value
height = p.get_height()
# add text to specified position
ax.text(
# bar to which data label will be added
# so this is the x-coordinate of the data label
nr,
# height of data label: height / 2. is in the middle of the bar
# so this is the y-coordinate of the data label
height / 2.,
# formatting of data label
u'{:0.1f}%'.format(height),
# color of data label
color='black',
# size of data label
fontsize=18,
# horizontal alignment: possible values are center, right, left
ha='center',
# vertical alignment: possible values are top, bottom, center, baseline
va='center'
)

Related

labeling data points when X axis is a string [duplicate]

I have created a bar chart and a line chart using two different y-axes for the following dataframe.
import pandas as pd
import matplotlib.pyplot as plt
df = pd.DataFrame({'DXC':['T1', 'H1', 'HP', 'T1_or_H1_or_HP'],
'Count': [2485, 5595, 3091, 9933],
'percent':[1.06, 2.39, 1.31, 4.23]})
DXC Count percent
0 T1 2485 1.06
1 H1 5595 2.39
2 HP 3091 1.31
3 T1_or_H1_or_HP 9933 4.23
Using the following code, I can also display values next to each bar in the bar chart. However, I have not been successful thus far in my attempts to also display the label (percent) values for the line plot.
fig=plt.figure()
#AX: bar chart
ax=df["Count"].plot(kind="bar", color="orange")
ax.set_ylabel("Counts")
ax.set_xlabel("")
ax.set_ylim(0,20000)
for tick in ax.get_xticklabels():
tick.set_rotation(0)
#AX2: Create secondary y-axis with same x-axis as above for plotting percent values
ax2=ax.twinx()
ax2.plot(ax.get_xticks(),df["percent"], color="red", linewidth=4, marker = "o")
ax2.grid(False)
ax2.set_ylabel("Percent", color = "red")
ax2.set_ylim(0,4.5)
ax2.tick_params(labelcolor="red", axis='y')
def add_value_labels(ax, spacing=5):
for i in ax.patches:
y_value = i.get_height()
x_value = i.get_x() + i.get_width() / 2
space = spacing
va = 'bottom'
# Use Y value as label and format number with no decimal place
label = "{:.0f}".format(y_value)
# Create annotation
ax.annotate(label,(x_value, y_value), xytext=(0, space), textcoords="offset points", ha='center', va=va)
add_value_labels(ax)
plt.show()
Can somebody suggest how to display labels for both bar plot and line plot in the same figure?
Here is a modified function that will achieve the required task. The trick is to extract the x and y values based on the type of the chart you have. For a line chart, you can use ax.lines[0] and then get_xdata and get_ydata
def add_value_labels(ax, typ, spacing=5):
space = spacing
va = 'bottom'
if typ == 'bar':
for i in ax.patches:
y_value = i.get_height()
x_value = i.get_x() + i.get_width() / 2
label = "{:.0f}".format(y_value)
ax.annotate(label,(x_value, y_value), xytext=(0, space),
textcoords="offset points", ha='center', va=va)
if typ == 'line':
line = ax.lines[0]
for x_value, y_value in zip(line.get_xdata(), line.get_ydata()):
label = "{:.2f}".format(y_value)
ax.annotate(label,(x_value, y_value), xytext=(0, space),
textcoords="offset points", ha='center', va=va)
add_value_labels(ax, typ='bar')
add_value_labels(ax2, typ='line')
From matplotlib v3.4.0 it's easier to use matplotlib.pyplot.bar_label, as explained in this answer.
The OP has many extraneous steps, which can be removed by using the yticks, secondary_y, and ylabel parameters for pandas.DataFrame.plot
pandas.DataFrame.itertuples can be used to annotate the line with matplotlib.axes.Axes.annotate because .Index corresponds to the x-axis locations and .percent is the correct y value for ax2.
See How to add hovering annotations to a plot for additional options to annotate the line.
See How to change the color of the axis, ticks and labels for a plot to easily change colors of various aspects of the figure.
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
# create the bar plot for the Count column and specify the yticks
ax = df.Count.plot(kind='bar', color='tab:orange', rot=0, yticks=range(0, 20001, 2500), figsize=(9, 5), ylabel='Counts')
# add bar labels
ax.bar_label(ax.containers[0])
# add the line plot for the percent column and specify the yticks and secondary_y
ax2 = df.percent.plot(marker='.', yticks=np.arange(0, 5, 0.5), secondary_y=True, ax=ax, ylabel='Percent')
# annotate the line by iterating through each row with itertuples
for row in df.itertuples():
ax2.annotate(text=row.percent, xy=(row.Index, row.percent))

How can I mark xticks at the center of bins for a seaborn distplot?

I want to plot a distplot using seaborn with xticks at the mid point of the bins. I am using the below code:
sns.distplot(df['MILEAGE'], kde=False, bins=20)
To get the midpoints of the bars, you can extract the generated rectangle patches and add half their width to their x position. Setting these midpoints as xticks will label the bars.
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
ax = sns.distplot(np.random.randn(1000).cumsum(), kde=False, bins=20)
mids = [rect.get_x() + rect.get_width() / 2 for rect in ax.patches]
ax.set_xticks(mids)
plt.show()
If the tick labels would overlap too much, you could rotate them and/or adapt their fontsize:
ax.tick_params(axis='x', rotation=90, labelsize=8)
If you need the bin edges instead of their centers:
edges = [rect.get_x() for rect in ax.patches] + [ax.patches[-1].get_x() + ax.patches[-1].get_width()]
In your specific case you could just get the ticks of the figure and add 25 to them to shift them into the middle of the bars. You could also completely reset them.
ticks = plt.gca().get_xticks()
ticks += 25
plt.xticks(ticks)
Alternatively, if you plot a histogram with matplotlib, you can get the bins directly:
x = np.random.rand(100)
# Matplotlib only
counts, bins, patches = plt.hist(x)
plt.xticks(bins + 25)

How do I plot percentage labels for a horizontal bar graph in Python?

Can someone please help me plot x axis labels in percentages given the following code of my horizontal bar chart?
Finding it difficult to find as I want a more simplistic chart without x axis labels and ticks.
[Horizontal Bar Chart][1]
# Plot the figure size
plt.figure(figsize= (8,6))
# New variable and plot the question of the data frame in a normalized in a horizontal bar chat.
ax1 = df[q1].value_counts(normalize=True).sort_values().plot(kind="barh", color='#fd6a02', width=0.75, zorder=2)
# Draw vague vertical axis lines and set lines to the back of the order
vals = ax1.get_xticks()
for tick in vals:
ax1.axvline(x=tick, linestyle='dashed', alpha=0.4, color = '#d3d3d3', zorder=1)
# Tot3als to produce a composition ratio
total_percent = df[q1].value_counts(normalize=True) *100
# Remove borders
ax1.spines['right'].set_visible(False)
ax1.spines['top'].set_visible(False)
ax1.spines['left'].set_visible(False)
ax1.spines['bottom'].set_visible(False)
# Set the title of the graph inline with the Y axis labels.
ax1.set_title(q1, weight='bold', size=14, loc = 'left', pad=20, x = -0.16)
# ax.text(x,y,text,color)
for i,val in enumerate(total):
ax1.text(val - 1.5, i, str("{:.2%}".format(total_percent), color="w", fontsize=10, zorder=3)
# Create axis labels
plt.xlabel("Ratio of Responses", labelpad=20, weight='bold', size=12)
Each time I get a EOF error. Can someone help?
It's not based on your code, but I'll customize the answer from the official reference.
The point is achieved with ax.text(), which is a looping process.
import matplotlib.pyplot as plt
import numpy as np
# Fixing random state for reproducibility
np.random.seed(19680801)
plt.rcdefaults()
fig, ax = plt.subplots()
# Example data
people = ('Tom', 'Dick', 'Harry', 'Slim', 'Jim')
y_pos = np.arange(len(people))
performance = 3 + 10 * np.random.rand(len(people))
ax.barh(y_pos, performance, align='center')
ax.set_yticks(y_pos)
ax.set_yticklabels(people)
ax.invert_yaxis() # labels read top-to-bottom
ax.set_xlabel('Performance')
ax.set_title('How fast do you want to go today?')
# Totals to produce a composition ratio
total = sum(performance)
# ax.text(x,y,text,color)
for i,val in enumerate(performance):
ax.text(val - 1.5, i, str("{:.2%}".format(val/total)), color="w", fontsize=10)
plt.show()

Python show value in line chart

I had created a chart with values (LSMA5['Low']), I'm able to plot the chart, but I want to show the values at each point of the chart, how can I do that?
Here are the code:
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')
plt.figure(figsize=(12.6,4.6))
plt.plot(stock_store['Close'], label='ABCsTock', alpha=0.35)
plt.plot(LSMA5['Low'], label='LSMA5', alpha=1, linewidth=1)
plt.title('ABCsTock')
plt.xlabel('Jan. 01,2018 - Jul. 30,2020')
plt.ylabel('Price')
plt.legend(loc='upper right')
plt.show()
Thanks with regards
JC
If I understand what you're trying to do, here's a way to do that (with synthetic data):
x_arr = np.arange(10)
y_arr = np.random.randint(0, 10, 10)
plt.plot(x_arr, y_arr)
# zip joins x and y coordinates in pairs
for x,y in zip(x_arr,y_arr):
label = "{:.2f}".format(y)
plt.annotate(label, # this is the text
(x,y), # this is the point to label
textcoords="offset points", # how to position the text
xytext=(0,10), # distance from text to points (x,y)
ha='center') # horizontal alignment can be left, right or center
The output is:

Annotate heatmap with value from Pandas dataframe

I would like to annotate a heatmap with the values that I pass from a dataframe into the function below. I have looked at matplotlib.text but have not been able to get the values from my dataframe in a desired way in my heatmap. I have pasted in my function for generating a heatmap below, after that my dataframe and the output from the heatmap call. I would like to plot each value from my dataframe in the center of each cell in the heatmap.
Function for generating a heatmap:
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
def heatmap_binary(df,
edgecolors='w',
#cmap=mpl.cm.RdYlGn,
log=False):
width = len(df.columns)/7*10
height = len(df.index)/7*10
fig, ax = plt.subplots(figsize=(20,10))#(figsize=(width,height))
cmap, norm = mcolors.from_levels_and_colors([0, 0.05, 1],['Teal', 'MidnightBlue'] ) # ['MidnightBlue', Teal]['Darkgreen', 'Darkred']
heatmap = ax.pcolor(df ,
edgecolors=edgecolors, # put white lines between squares in heatmap
cmap=cmap,
norm=norm)
ax.autoscale(tight=True) # get rid of whitespace in margins of heatmap
ax.set_aspect('equal') # ensure heatmap cells are square
ax.xaxis.set_ticks_position('top') # put column labels at the top
ax.tick_params(bottom='off', top='off', left='off', right='off') # turn off ticks
plt.yticks(np.arange(len(df.index)) + 0.5, df.index, size=20)
plt.xticks(np.arange(len(df.columns)) + 0.5, df.columns, rotation=90, size= 15)
# ugliness from http://matplotlib.org/users/tight_layout_guide.html
from mpl_toolkits.axes_grid1 import make_axes_locatable
divider = make_axes_locatable(ax)
cax = divider.append_axes("right", "3%", pad="1%")
plt.colorbar(heatmap, cax=cax)
plt.show()
Herre is an example of My dataframe :
dataframe :
0-5 km / h 5-40 km / h 40-80 km / h 80-120 km / h \
NORDIC 0.113955 0.191888 0.017485 -0.277528
MIDDLE EU 0.117903 0.197084 -0.001447 -0.332677
KOREA 0.314008 0.236503 -0.067174 -0.396518
CHINA 0.314008 0.236503 -0.067174 -0.396518
120-160 km / h 160-190 km / h 190 km / h
NORDIC -0.054365 0.006107 0.002458
MIDDLE EU 0.002441 0.012097 0.004599
KOREA -0.087191 0.000331 0.000040
CHINA -0.087191 0.000331 0.000040
Generating the heatmap:
heatmap_binary(dataframe)
Any ideas?
Update to clarify my problem
I tried the proposed solution from question which has the result I'm looking for:
how to annotate heatmap with text in matplotlib?
However, I still have a problem using the matplotlib.text function for positioning the values in the heatmap:
Here is my cod for trying this solution:
import matplotlib.pyplot as plt
import numpy as np
data = dataframe.values
heatmap_binary(dataframe)
for y in range(data.shape[0]):
for x in range(data.shape[1]):
plt.text(data[y,x] +0.05 , data[y,x] + 0.05, '%.4f' % data[y, x], #data[y,x] +0.05 , data[y,x] + 0.05
horizontalalignment='center',
verticalalignment='center',
color='w')
#plt.colorbar(heatmap)
plt.show()
added plot: (different coloring but same problem)
This functionality is provided by the seaborn package. It can produce maps like
An example usage of seaborn is
import seaborn as sns
sns.set()
# Load the example flights dataset and conver to long-form
flights_long = sns.load_dataset("flights")
flights = flights_long.pivot("month", "year", "passengers")
# Draw a heatmap with the numeric values in each cell
sns.heatmap(flights, annot=True, fmt="d", linewidths=.5)
The values you were using for your coordinates in your for loop were screwed up. Also you were using plt.colorbar instead of something cleaner like fig.colorbar. Try this (it gets the job done, with no effort made to otherwise cleanup the code):
def heatmap_binary(df,
edgecolors='w',
#cmap=mpl.cm.RdYlGn,
log=False):
width = len(df.columns)/7*10
height = len(df.index)/7*10
fig, ax = plt.subplots(figsize=(20,10))#(figsize=(width,height))
cmap, norm = mcolors.from_levels_and_colors([0, 0.05, 1],['Teal', 'MidnightBlue'] ) # ['MidnightBlue', Teal]['Darkgreen', 'Darkred']
heatmap = ax.pcolor(df ,
edgecolors=edgecolors, # put white lines between squares in heatmap
cmap=cmap,
norm=norm)
data = df.values
for y in range(data.shape[0]):
for x in range(data.shape[1]):
plt.text(x + 0.5 , y + 0.5, '%.4f' % data[y, x], #data[y,x] +0.05 , data[y,x] + 0.05
horizontalalignment='center',
verticalalignment='center',
color='w')
ax.autoscale(tight=True) # get rid of whitespace in margins of heatmap
ax.set_aspect('equal') # ensure heatmap cells are square
ax.xaxis.set_ticks_position('top') # put column labels at the top
ax.tick_params(bottom='off', top='off', left='off', right='off') # turn off ticks
ax.set_yticks(np.arange(len(df.index)) + 0.5)
ax.set_yticklabels(df.index, size=20)
ax.set_xticks(np.arange(len(df.columns)) + 0.5)
ax.set_xticklabels(df.columns, rotation=90, size= 15)
# ugliness from http://matplotlib.org/users/tight_layout_guide.html
from mpl_toolkits.axes_grid1 import make_axes_locatable
divider = make_axes_locatable(ax)
cax = divider.append_axes("right", "3%", pad="1%")
fig.colorbar(heatmap, cax=cax)
Then
df1 = pd.DataFrame(np.random.choice([0, 0.75], size=(4,5)), columns=list('ABCDE'), index=list('WXYZ'))
heatmap_binary(df1)
gives:
This is because you're using plt.text after you've added another axes.
The state machine will plot on the current axes, and after you've added a new one with divider.append_axes, the colorbar's axes is the current one. (Just calling plt.colorbar will not cause this, as it sets the current axes back to the original one afterwards if it creates the axes itself. If a specific axes object is passed in using the cax kwarg, it doesn't reset the "current" axes, as that's not what you'd normally want.)
Things like this are the main reason that you'll see so many people advising that you use the OO interface to matplotlib instead of the state machine interface. That way you know which axes object that you're plotting on.
For example, in your case, you could have heatmap_binary return the ax object that it creates, and the plot using ax.text instead of plt.text (and similar for the other plotting methods).
You also can use plotly.figure_factory to create heatmap from DataFrame, but you have convert it into list.
import plotly.figure_factory as ff
z = [your_dataframe].values.tolist()
x = [your_dataframe].columns.tolist()
y = [your_dataframe].index.tolist()
fig = ff.create_annotated_heatmap(z, x=x, y=y, annotation_text=z, colorscale='viridis')
# for add annotation into Heatmap
for i in range(len(fig.layout.annotations)):
fig.layout.annotations[i].font.size = 12
# show your Heatmap
fig.show()

Categories