Adding total values to seaborn plot python [duplicate] - python

I have a horizontal barplot, for example, a simplified version of the example from the seaborn documentation:
import seaborn as sns
import matplotlib.pyplot as plt
f, ax = plt.subplots(figsize=(6, 15))
crashes = sns.load_dataset("car_crashes").sort_values("total", ascending=False)
sns.barplot(x="total", y="abbrev", data=crashes,
label="Total", color="b")
ax.set(xlim=(0, 24), ylabel="",
xlabel="Automobile collisions per billion miles")
plt.show()
How can I get the bars labeled with the value for each bar?
I tried this approach for vertical bars (How to add percentages on top of bars in seaborn), but it doesn't seem to work. Changing height to width doesn't have the effect I assumed it would.
for p in ax.patches:
height = p.get_width()
ax.text(p.get_y()+p.get_height()/2.,
height + 3,
'{:1.2f}'.format(height),
ha="center")
I'm assuming the horizontal plot works differently?

Got it, thanks to #ImportanceOfBeingErnest
This worked for me
for p in ax.patches:
width = p.get_width() # get bar length
ax.text(width + 1, # set the text at 1 unit right of the bar
p.get_y() + p.get_height() / 2, # get Y coordinate + X coordinate / 2
'{:1.2f}'.format(width), # set variable to display, 2 decimals
ha = 'left', # horizontal alignment
va = 'center') # vertical alignment

As of matplotlib 3.4.0
Use the new built-in ax.bar_label, which will automatically label bar containers regardless of orientation:
fig, ax = plt.subplots(figsize=(6, 8))
sns.barplot(x="total", y="abbrev", data=crashes)
# new helper method to auto-label bars
ax.bar_label(ax.containers[0])
If the bars are grouped by hue, call ax.bar_label on all the containers:
fig, ax = plt.subplots(figsize=(5, 6))
ax = sns.barplot(x="tip", y="day", hue="smoker", data=tips)
# grouped bars will have multiple containers
for container in ax.containers:
ax.bar_label(container)

Thank you very much for this. It helped me a lot, but i ran to a problem, where percents had to many digits after decimal point, the format can be then simply specified:
for container in ax.containers:
ax.bar_label(container,size=8,fmt='%.1f')

Related

How to highlight multiple bar using matplotlib

I want to highlight the max value and 2 other bars. Can anyone help me with this, thank you!!
#Figure size
plt.figure(figsize = (20, 8))
#Group by position and find the mean salary
df.groupby("Position")["Salary"].mean()
#Plot bar graph
ax = df.groupby("Position")["Salary"].mean()
#Highlight max value bar
ax.plot.bar(color=np.where(ax==ax.max(), '#ff9999','cadetblue'))
#Axis label
plt.xlabel("Position",fontsize=12)
plt.ylabel("Salary",fontsize=12)
plt.title("Wages for different job functions", fontweight='bold', fontsize=14)
plt.show()
Get an array of bar graphs and set the color of the desired location. In the following example, besides setting a special color for the largest value in your code, you have also set the fourth to red.
import matplotlib.pyplot as plt
import numpy as np
# Fixing random state for reproducibility
np.random.seed(19680801)
plt.rcdefaults()
fig, ax = plt.subplots()
# Example data
people = ('Tom', 'Dick', 'Harry', 'Slim', 'Jim')
y_pos = np.arange(len(people))
performance = 3 + 10 * np.random.rand(len(people))
error = np.random.rand(len(people))
bars = ax.bar(y_pos, performance, align='center', color=np.where(performance == performance.max(),'#ff9999','cadetblue'))
ax.set_xticks(y_pos)
ax.set_xticklabels(people)
ax.set_ylabel('Salary')
ax.set_title("Wages for different job functions", fontweight='bold', fontsize=14)
bars[3].set_color("red")
plt.show()

Two subplots coming out too long (length)

I'm attempting to plot two bar charts using matplotlib.pyplot.subplots. I've created subplots within a function, but when I output the subplots they are too long in height and not long enough in width.
Here's the function that I wrote:
def corr_bar(data1, data2, method='pearson'):
# Basic configuration.
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(7, 7))
ax1, ax2 = axes
corr_matrix1 = data1.corr(method=method)
corr_matrix2 = data2.corr(method=method)
cmap = cm.get_cmap('coolwarm')
major_ticks = np.arange(0, 1.1, 0.1)
minor_ticks = np.arange(0, 1.1, 0.05)
# Values for plotting.
x1 = corr_matrix1['price'].sort_values(ascending=False).index
x2 = corr_matrix2['price'].sort_values(ascending=False).index
values1 = corr_matrix1['price'].sort_values(ascending=False).values
values2 = corr_matrix2['price'].sort_values(ascending=False).values
im1 = ax1.bar(x1, values1, color=cmap(values1))
im2 = ax2.bar(x2, values2, color=cmap(values2))
# Formatting for plot 1.
ax1.set_yticks(major_ticks)
ax1.set_yticks(minor_ticks, minor=True)
plt.setp(ax1.get_xticklabels(), rotation=45, ha='right', rotation_mode='anchor')
ax1.grid(which='both')
ax1.grid(which='minor', alpha=0.4)
ax1.grid(which='major', alpha=0.7)
ax1.xaxis.grid(False)
# Formatting for plot 2.
ax2.set_yticks(major_ticks)
ax2.set_yticks(minor_ticks, minor=True)
plt.setp(ax2.get_xticklabels(), rotation=45, ha='right', rotation_mode='anchor')
ax2.grid(which='both')
ax2.grid(which='minor', alpha=0.4)
ax2.grid(which='major', alpha=0.7)
ax2.xaxis.grid(False)
fig.tight_layout()
plt.show()
This function (when run with two Pandas DataFrames) outputs an image like the following:
I purposely captured the blank right side of the image as well in an attempt to better depict my predicament. What I want is for the bar charts to be appropriately sized in height and width as to take up the entire space, rather than be elongated and pushed to the left.
I've tried to use the ax.set(aspect='equal') method but it "scrunches up" the bar chart. Would anybody happen to know what I could do to solve this issue?
Thank you.
When you define figsize=(7,7) you are setting the size of the entire figure and not the subplots. So your entire figure must be a square in this case. You should change it to figsize=(14,7) or use a number larger than 14 to get a little bit of extra space.

Python - dual y axis chart, align zero

I'm trying to create a horizontal bar chart, with dual x axes. The 2 axes are very different in scale, 1 set goes from something like -5 to 15 (positive and negative value), the other set is more like 100 to 500 (all positive values).
When I plot this, I'd like to align the 2 axes so zero shows at the same position, and only the negative values are to the left of this. Currently the set with all positive values starts at the far left, and the set with positive and negative starts in the middle of the overall plot.
I found the align_yaxis example, but I'm struggling to align the x axes.
Matplotlib bar charts: Aligning two different y axes to zero
Here is an example of what I'm working on with simple test data. Any ideas/suggestions? thanks
import pandas as pd
import matplotlib.pyplot as plt
d = {'col1':['Test 1','Test 2','Test 3','Test 4'],'col 2':[1.4,-3,1.3,5],'Col3':[900,750,878,920]}
df = pd.DataFrame(data=d)
fig = plt.figure() # Create matplotlib figure
ax = fig.add_subplot(111) # Create matplotlib axes
ax2 = ax.twiny() # Create another axes that shares the same y-axis as ax.
width = 0.4
df['col 2'].plot(kind='barh', color='darkblue', ax=ax, width=width, position=1,fontsize =4, figsize=(3.0, 5.0))
df['Col3'].plot(kind='barh', color='orange', ax=ax2, width=width, position=0, fontsize =4, figsize=(3.0, 5.0))
ax.set_yticklabels(df.col1)
ax.set_xlabel('Positive and Neg',color='darkblue')
ax2.set_xlabel('Positive Only',color='orange')
ax.invert_yaxis()
plt.show()
I followed the link from a question and eventually ended up at this answer : https://stackoverflow.com/a/10482477/5907969
The answer has a function to align the y-axes and I have modified the same to align x-axes as follows:
def align_xaxis(ax1, v1, ax2, v2):
"""adjust ax2 xlimit so that v2 in ax2 is aligned to v1 in ax1"""
x1, _ = ax1.transData.transform((v1, 0))
x2, _ = ax2.transData.transform((v2, 0))
inv = ax2.transData.inverted()
dx, _ = inv.transform((0, 0)) - inv.transform((x1-x2, 0))
minx, maxx = ax2.get_xlim()
ax2.set_xlim(minx+dx, maxx+dx)
And then use it within the code as follows:
import pandas as pd
import matplotlib.pyplot as plt
d = {'col1':['Test 1','Test 2','Test 3','Test 4'],'col 2' [1.4,-3,1.3,5],'Col3':[900,750,878,920]}
df = pd.DataFrame(data=d)
fig = plt.figure() # Create matplotlib figure
ax = fig.add_subplot(111) # Create matplotlib axes
ax2 = ax.twiny() # Create another axes that shares the same y-axis as ax.
width = 0.4
df['col 2'].plot(kind='barh', color='darkblue', ax=ax, width=width, position=1,fontsize =4, figsize=(3.0, 5.0))
df['Col3'].plot(kind='barh', color='orange', ax=ax2, width=width, position=0, fontsize =4, figsize=(3.0, 5.0))
ax.set_yticklabels(df.col1)
ax.set_xlabel('Positive and Neg',color='darkblue')
ax2.set_xlabel('Positive Only',color='orange')
align_xaxis(ax,0,ax2,0)
ax.invert_yaxis()
plt.show()
This will give you what you're looking for

Labeling horizontal barplot with values in Seaborn

I have a horizontal barplot, for example, a simplified version of the example from the seaborn documentation:
import seaborn as sns
import matplotlib.pyplot as plt
f, ax = plt.subplots(figsize=(6, 15))
crashes = sns.load_dataset("car_crashes").sort_values("total", ascending=False)
sns.barplot(x="total", y="abbrev", data=crashes,
label="Total", color="b")
ax.set(xlim=(0, 24), ylabel="",
xlabel="Automobile collisions per billion miles")
plt.show()
How can I get the bars labeled with the value for each bar?
I tried this approach for vertical bars (How to add percentages on top of bars in seaborn), but it doesn't seem to work. Changing height to width doesn't have the effect I assumed it would.
for p in ax.patches:
height = p.get_width()
ax.text(p.get_y()+p.get_height()/2.,
height + 3,
'{:1.2f}'.format(height),
ha="center")
I'm assuming the horizontal plot works differently?
Got it, thanks to #ImportanceOfBeingErnest
This worked for me
for p in ax.patches:
width = p.get_width() # get bar length
ax.text(width + 1, # set the text at 1 unit right of the bar
p.get_y() + p.get_height() / 2, # get Y coordinate + X coordinate / 2
'{:1.2f}'.format(width), # set variable to display, 2 decimals
ha = 'left', # horizontal alignment
va = 'center') # vertical alignment
As of matplotlib 3.4.0
Use the new built-in ax.bar_label, which will automatically label bar containers regardless of orientation:
fig, ax = plt.subplots(figsize=(6, 8))
sns.barplot(x="total", y="abbrev", data=crashes)
# new helper method to auto-label bars
ax.bar_label(ax.containers[0])
If the bars are grouped by hue, call ax.bar_label on all the containers:
fig, ax = plt.subplots(figsize=(5, 6))
ax = sns.barplot(x="tip", y="day", hue="smoker", data=tips)
# grouped bars will have multiple containers
for container in ax.containers:
ax.bar_label(container)
Thank you very much for this. It helped me a lot, but i ran to a problem, where percents had to many digits after decimal point, the format can be then simply specified:
for container in ax.containers:
ax.bar_label(container,size=8,fmt='%.1f')

How to plot a superimposed bar chart using matplotlib in python?

I want to plot a bar chart or a histogram using matplotlib. I don't want a stacked bar plot, but a superimposed barplot of two lists of data, for instance I have the following two lists of data with me:
Some code to begin with :
import matplotlib.pyplot as plt
from numpy.random import normal, uniform
highPower = [1184.53,1523.48,1521.05,1517.88,1519.88,1414.98,1419.34,
1415.13,1182.70,1165.17]
lowPower = [1000.95,1233.37, 1198.97,1198.01,1214.29,1130.86,1138.70,
1104.12,1012.95,1000.36]
plt.hist(highPower, bins=10, histtype='stepfilled', normed=True,
color='b', label='Max Power in mW')
plt.hist(lowPower, bins=10, histtype='stepfilled', normed=True,
color='r', alpha=0.5, label='Min Power in mW')
I want to plot these two lists against the number of values in the two lists such that I am able to see the variation per reading.
You can produce a superimposed bar chart using plt.bar() with the alpha keyword as shown below.
The alpha controls the transparency of the bar.
N.B. when you have two overlapping bars, one with an alpha < 1, you will get a mixture of colours. As such the bar will appear purple even though the legend shows it as a light red. To alleviate this I have modified the width of one of the bars, this way even if your powers should change you will still be able to see both bars.
plt.xticks can be used to set the location and format of the x-ticks in your graph.
import matplotlib.pyplot as plt
import numpy as np
width = 0.8
highPower = [1184.53,1523.48,1521.05,1517.88,1519.88,1414.98,
1419.34,1415.13,1182.70,1165.17]
lowPower = [1000.95,1233.37, 1198.97,1198.01,1214.29,1130.86,
1138.70,1104.12,1012.95,1000.36]
indices = np.arange(len(highPower))
plt.bar(indices, highPower, width=width,
color='b', label='Max Power in mW')
plt.bar([i+0.25*width for i in indices], lowPower,
width=0.5*width, color='r', alpha=0.5, label='Min Power in mW')
plt.xticks(indices+width/2.,
['T{}'.format(i) for i in range(len(highPower))] )
plt.legend()
plt.show()
Building on #Ffisegydd's answer, if your data is in a Pandas DataFrame, this should work nicely:
def overlapped_bar(df, show=False, width=0.9, alpha=.5,
title='', xlabel='', ylabel='', **plot_kwargs):
"""Like a stacked bar chart except bars on top of each other with transparency"""
xlabel = xlabel or df.index.name
N = len(df)
M = len(df.columns)
indices = np.arange(N)
colors = ['steelblue', 'firebrick', 'darksage', 'goldenrod', 'gray'] * int(M / 5. + 1)
for i, label, color in zip(range(M), df.columns, colors):
kwargs = plot_kwargs
kwargs.update({'color': color, 'label': label})
plt.bar(indices, df[label], width=width, alpha=alpha if i else 1, **kwargs)
plt.xticks(indices + .5 * width,
['{}'.format(idx) for idx in df.index.values])
plt.legend()
plt.title(title)
plt.xlabel(xlabel)
plt.ylabel(ylabel)
if show:
plt.show()
return plt.gcf()
And then in a python command line:
low = [1000.95, 1233.37, 1198.97, 1198.01, 1214.29, 1130.86, 1138.70, 1104.12, 1012.95, 1000.36]
high = [1184.53, 1523.48, 1521.05, 1517.88, 1519.88, 1414.98, 1419.34, 1415.13, 1182.70, 1165.17]
df = pd.DataFrame(np.matrix([high, low]).T, columns=['High', 'Low'],
index=pd.Index(['T%s' %i for i in range(len(high))],
name='Index'))
overlapped_bar(df, show=False)
It is actually simpler than the answers all over the internet make it appear.
a = range(1,10)
b = range(4,13)
ind = np.arange(len(a))
fig = plt.figure()
ax = fig.add_subplot(111)
ax.bar(x=ind, height=a, width=0.35,align='center')
ax.bar(x=ind, height=b, width=0.35/3, align='center')
plt.xticks(ind, a)
plt.tight_layout()
plt.show()

Categories