Matplotlib way to annotate bar plots with lines and figures [duplicate] - python

This question already has answers here:
Annotate bars with values on Pandas bar plots
(4 answers)
Closed 1 year ago.
I would like to create an annotation to a bar chart that compares the value of the bar to two reference values. An overlay such as shown in the picture, a kind of staff gauge, is possible, but I'm open to more elegant solutions.
The bar chart is generated with the pandas API to matplotlib (e.g. data.plot(kind="bar")), so a plus would be if the solution is playing nicely with that.

You may use smaller bars for the target and benchmark indicators. Pandas cannot annotate bars automatically, but you can simply loop over the values and use matplotlib's pyplot.annotate instead.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
a = np.random.randint(5,15, size=5)
t = (a+np.random.normal(size=len(a))*2).round(2)
b = (a+np.random.normal(size=len(a))*2).round(2)
df = pd.DataFrame({"a":a, "t":t, "b":b})
fig, ax = plt.subplots()
df["a"].plot(kind='bar', ax=ax, legend=True)
df["b"].plot(kind='bar', position=0., width=0.1, color="lightblue",legend=True, ax=ax)
df["t"].plot(kind='bar', position=1., width=0.1, color="purple", legend=True, ax=ax)
for i, rows in df.iterrows():
plt.annotate(rows["a"], xy=(i, rows["a"]), rotation=0, color="C0")
plt.annotate(rows["b"], xy=(i+0.1, rows["b"]), color="lightblue", rotation=+20, ha="left")
plt.annotate(rows["t"], xy=(i-0.1, rows["t"]), color="purple", rotation=-20, ha="right")
ax.set_xlim(-1,len(df))
plt.show()

There's no direct way to annotate a bar plot (as far as I am aware) Some time ago I needed to annotate one so I wrote this, perhaps you can adapt it to your needs.
import matplotlib.pyplot as plt
import numpy as np
ax = plt.subplot(111)
ax.set_xlim(-0.2, 3.2)
ax.grid(b=True, which='major', color='k', linestyle=':', lw=.5, zorder=1)
# x,y data
x = np.arange(4)
y = np.array([5, 12, 3, 7])
# Define upper y limit leaving space for the text above the bars.
up = max(y) * .03
ax.set_ylim(0, max(y) + 3 * up)
ax.bar(x, y, align='center', width=0.2, color='g', zorder=4)
# Add text to bars
for xi, yi, l in zip(*[x, y, list(map(str, y))]):
ax.text(xi - len(l) * .02, yi + up, l,
bbox=dict(facecolor='w', edgecolor='w', alpha=.5))
ax.set_xticks(x)
ax.set_xticklabels(['text1', 'text2', 'text3', 'text4'])
ax.tick_params(axis='x', which='major', labelsize=12)
plt.show()

Related

How to customize seaborn boxplot with specific color sequence when boxplots have hue

I want to make boxplots with hues but I want to color code it so that each specific X string is a certain color with the hue just being a lighter color. I am able to do a boxplot without a hue. When I incorporate the hue, I get the second boxplot which loses the colors. Can someone help me customize the colors for the figure that contains the hue?
Essentially, its what the answer for this question is but with boxplots.
This is my code:
first boxplot
order=['Ash1','E1A','FUS','p53']
colors=['gold','teal','darkorange','royalblue']
color_dict=dict(zip(order,colors))
fig,ax=plt.subplots(figsize=(25,15))
bp=sns.boxplot(data=df_idrs, x=df_idrs["construct"], y=df_idrs['Norm_Ef_IDR/Ef_GS'],ax=ax,palette=color_dict)
sns.stripplot(ax=ax,y='Norm_Ef_IDR/Ef_GS', x='construct', data=df_idrs,palette=color_dict,
jitter=1, marker='o', alpha=0.4,edgecolor='black',linewidth=1, dodge=True)
ax.axhline(y=1,linestyle="--",color='black',linewidth=2)
plt.legend(loc='upper left', bbox_to_anchor=(1.03, 1))
second boxplot
order=['Ash1','E1A','FUS','p53']
colors=['gold','teal','darkorange','royalblue']
color_dict=dict(zip(order,colors))
fig,ax=plt.subplots(figsize=(25,15))
bp=sns.boxplot(data=df_idrs, x=df_idrs["construct"], y=df_idrs['Norm_Ef_IDR/Ef_GS'],ax=ax, hue=df_idrs["location"])
sns.stripplot(y='Norm_Ef_IDR/Ef_GS', x='construct', data=df_idrs, hue=df_idrs["location"],
jitter=1, marker='o', alpha=0.4,edgecolor='black',linewidth=1, dodge=True)
ax.axhline(y=1,linestyle="--",color='black',linewidth=2)
plt.legend(loc='upper left', bbox_to_anchor=(1.03, 1))
The only thing that changed was the palette to hue. I have seen many examples on here but I am unable to get them to work. Using the second code, I have tried the following:
Nothing happens for this one.
for ind, bp in enumerate(ax.findobj(PolyCollection)):
rgb = to_rgb(colors[ind // 2])
if ind % 2 != 0:
rgb = 0.5 + 0.5 * np.array(rgb) # make whiter
bp.set_facecolor(rgb)
I get index out of range for the following one.
for i in range(0,4):
mybox = bp.artists[i]
mybox.set_facecolor(color_dict[order[i]])
Matplotlib stores the boxes in ax.patches, but there are also 2 dummy patches (used to construct the legend) that need to be filtered away. The dots of the stripplot are stored in ax.collections. There are also 2 dummy collections for the legend, but as those come at the end, they don't form a problem.
Some remarks:
sns.boxplot returns the subplot on which it was drawn; as it is called with ax=ax it will return that same ax
Setting jitter=1in the stripplot will smear the dots over a width of 1. 1 is the distance between the x positions, and the boxes are only 0.4 wide. To avoid clutter, the code below uses jitter=0.4.
Here is some example code starting from dummy test data:
from matplotlib import pyplot as plt
from matplotlib.legend_handler import HandlerTuple
from matplotlib.patches import PathPatch
from matplotlib.colors import to_rgb
import seaborn as sns
import pandas as pd
import numpy as np
np.random.seed(20230215)
order = ['Ash1', 'E1A', 'FUS', 'p53']
colors = ['gold', 'teal', 'darkorange', 'royalblue']
hue_order = ['A', 'B']
df_idrs = pd.DataFrame({'construct': np.repeat(order, 200),
'Norm_Ef_IDR/Ef_GS': (np.random.normal(0.03, 1, 800).cumsum() + 10) / 15,
'location': np.tile(np.repeat(hue_order, 100), 4)})
fig, ax = plt.subplots(figsize=(12, 5))
sns.boxplot(data=df_idrs, x=df_idrs['construct'], y=df_idrs['Norm_Ef_IDR/Ef_GS'], hue='location',
order=order, hue_order=hue_order, ax=ax)
box_colors = [f + (1 - f) * np.array(to_rgb(c)) # whiten colors depending on hue
for c in colors for f in np.linspace(0, 0.5, len(hue_order))]
box_patches = [p for p in ax.patches if isinstance(p, PathPatch)]
for patch, color in zip(box_patches, box_colors):
patch.set_facecolor(color)
sns.stripplot(y='Norm_Ef_IDR/Ef_GS', x='construct', data=df_idrs, hue=df_idrs['location'],
jitter=0.4, marker='o', alpha=0.4, edgecolor='black', linewidth=1, dodge=True, ax=ax)
for collection, color in zip(ax.collections, box_colors):
collection.set_facecolor(color)
ax.axhline(y=1, linestyle='--', color='black', linewidth=2)
handles = [tuple(box_patches[i::len(hue_order)]) for i in range(len(hue_order))]
ax.legend(handles=handles, labels=hue_order, title='hue category',
handlelength=4, handler_map={tuple: HandlerTuple(ndivide=None, pad=0)},
loc='upper left', bbox_to_anchor=(1.01, 1))
plt.tight_layout()
plt.show()

How to plot a paired histogram using seaborn

I would like to make a paired histogram like the one shown here using the seaborn distplot.
This kind of plot can also be referred to as the back-to-back histogram shown here, or a bihistogram inverted/mirrored along the x-axis as discussed here.
Here is my code:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
green = np.random.normal(20,10,1000)
blue = np.random.poisson(60,1000)
fig, ax = plt.subplots(figsize=(8,6))
sns.distplot(blue, hist=True, kde=True, hist_kws={'edgecolor':'black'}, kde_kws={'linewidth':2}, bins=10, color='blue')
sns.distplot(green, hist=True, kde=True, hist_kws={'edgecolor':'black'}, kde_kws={'linewidth':2}, bins=10, color='green')
ax.set_xticks(np.arange(-20,121,20))
ax.set_yticks(np.arange(0.0,0.07,0.01))
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
plt.show()
Here is the output:
When I use the method discussed here (plt.barh), I get the bar plot shown just below, which is not what I am looking for.
Or maybe I haven't understood the workaround well enough...
A simple/short implementation of python-seaborn-distplot similar to these kinds of plots would be perfect. I edited the figure of my first plot above to show the kind of plot I hope to achieve (though y-axis not upside down):
Any leads would be greatly appreciated.
You could use two subplots and invert the y-axis of the lower one and plot with the same bins.
df = pd.DataFrame({'a': np.random.normal(0,5,1000), 'b': np.random.normal(20,5,1000)})
fig =plt.figure(figsize=(5,5))
ax = fig.add_subplot(211)
ax2 = fig.add_subplot(212)
bins = np.arange(-20,40)
ax.hist(df['a'], bins=bins)
ax2.hist(df['b'],color='orange', bins=bins)
ax2.invert_yaxis()
edit:
improvements suggested by #mwaskom
fig, axes = plt.subplots(nrows=2, ncols=1, sharex=True, figsize=(5,5))
bins = np.arange(-20,40)
for ax, column, color, invert in zip(axes.ravel(), df.columns, ['teal', 'orange'], [False,True]):
ax.hist(df[column], bins=bins, color=color)
if invert:
ax.invert_yaxis()
plt.subplots_adjust(hspace=0)
Here is a possible approach using seaborn's displots.
Seaborn doesn't return the created graphical elements, but the ax can be interrogated. To make sure the ax only contains the elements you want upside down, those elements can be drawn first. Then, all the patches (the rectangular bars) and the lines (the curve for the kde) can be given their height in negative. Optionally the x-axis can be set at y == 0 using ax.spines['bottom'].set_position('zero').
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
green = np.random.normal(20, 10, 1000)
blue = np.random.poisson(60, 1000)
fig, ax = plt.subplots(figsize=(8, 6))
sns.distplot(green, hist=True, kde=True, hist_kws={'edgecolor': 'black'}, kde_kws={'linewidth': 2}, bins=10,
color='green')
for p in ax.patches: # turn the histogram upside down
p.set_height(-p.get_height())
for l in ax.lines: # turn the kde curve upside down
l.set_ydata(-l.get_ydata())
sns.distplot(blue, hist=True, kde=True, hist_kws={'edgecolor': 'black'}, kde_kws={'linewidth': 2}, bins=10,
color='blue')
ax.set_xticks(np.arange(-20, 121, 20))
ax.set_yticks(np.arange(0.0, 0.07, 0.01))
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
pos_ticks = np.array([t for t in ax.get_yticks() if t > 0])
ticks = np.concatenate([-pos_ticks[::-1], [0], pos_ticks])
ax.set_yticks(ticks)
ax.set_yticklabels([f'{abs(t):.2f}' for t in ticks])
ax.spines['bottom'].set_position('zero')
plt.show()

Two Y axis Bar plot: custom xticks

I am trying to add custom xticks to a relatively complicated bar graph plot and I am stuck.
I am plotting from two data frames, merged_90 and merged_15:
merged_15
Volume y_err_x Area_2D y_err_y
TripDate
2015-09-22 1663.016032 199.507503 1581.591701 163.473202
merged_90
Volume y_err_x Area_2D y_err_y
TripDate
1990-06-10 1096.530711 197.377497 1531.651913 205.197493
I want to create a bar graph with two axes (i.e. Area_2D and Volume) where the Area_2D and Volume bars are grouped based on their respective data frame. An example script would look like:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy
fig = plt.figure()
ax1 = fig.add_subplot(111)
merged_90.Volume.plot(ax=ax1, color='orange', kind='bar',position=2.5, yerr=merged_90['y_err_x'] ,use_index=False , width=0.1)
merged_15.Volume.plot(ax=ax1, color='red', kind='bar',position=0.9, yerr=merged_15['y_err_x'] ,use_index=False, width=0.1)
ax2 = ax1.twinx()
merged_90.Area_2D.plot(ax=ax2,color='green', kind='bar',position=3.5, yerr=merged_90['y_err_y'],use_index=False, width=0.1)
merged_15.Area_2D.plot(ax=ax2,color='blue', kind='bar',position=0, yerr=merged_15['y_err_y'],use_index=False, width=0.1)
ax1.set_xlim(-0.5,0.2)
x = scipy.arange(1)
ax2.set_xticks(x)
ax2.set_xticklabels(['2015'])
plt.tight_layout()
plt.show()
The resulting plot is:
One would think I could change:
x = scipy.arange(1)
ax2.set_xticks(x)
ax2.set_xticklabels(['2015'])
to
x = scipy.arange(2)
ax2.set_xticks(x)
ax2.set_xticklabels(['1990','2015'])
but that results in:
I would like to see the ticks ordered in chronological order (i.e. 1990,2015)
Thanks!
Have you considered dropping the second axis and plotting them as follows:
ind = np.array([0,0.3])
width = 0.1
fig, ax = plt.subplots()
Rects1 = ax.bar(ind, [merged_90.Volume.values, merged_15.Volume.values], color=['orange', 'red'] ,width=width)
Rects2 = ax.bar(ind + width, [merged_90.Area_2D.values, merged_15.Area_2D.values], color=['green', 'blue'] ,width=width)
ax.set_xticks([.1,.4])
ax.set_xticklabels(('1990','2015'))
This produces:
I omitted the error and colors but you can easily add them. That would produce a readable graph given your test data. As you mentioned in comments you would still rather have two axes, presumably for different data with proper scales. To do this you could do:
fig = plt.figure()
ax1 = fig.add_subplot(111)
merged_90.Volume.plot(ax=ax, color='orange', kind='bar',position=2.5, use_index=False , width=0.1)
merged_15.Volume.plot(ax=ax, color='red', kind='bar',position=1.0, use_index=False, width=0.1)
ax2 = ax1.twinx()
merged_90.Area_2D.plot(ax=ax,color='green', kind='bar',position=3.5,use_index=False, width=0.1)
merged_15.Area_2D.plot(ax=ax,color='blue', kind='bar',position=0,use_index=False, width=0.1)
ax1.set_xlim([-.45, .2])
ax2.set_xlim(-.45, .2])
ax1.set_xticks([-.35, 0])
ax1.set_xticklabels([1990, 2015])
This produces:
Your problem was with resetting just one axis limit and not the other, they are created as twins but do not necessarily follow the changes made to one another.

Merge matplotlib subplots with shared x-axis

I have two graphs to where both have the same x-axis, but with different y-axis scalings.
The plot with regular axes is the data with a trend line depicting a decay while the y semi-log scaling depicts the accuracy of the fit.
fig1 = plt.figure(figsize=(15,6))
ax1 = fig1.add_subplot(111)
# Plot of the decay model
ax1.plot(FreqTime1,DecayCount1, '.', color='mediumaquamarine')
# Plot of the optimized fit
ax1.plot(x1, y1M, '-k', label='Fitting Function: $f(t) = %.3f e^{%.3f\t} \
%+.3f$' % (aR1,kR1,bR1))
ax1.set_xlabel('Time (sec)')
ax1.set_ylabel('Count')
ax1.set_title('Run 1 of Cesium-137 Decay')
# Allows me to change scales
# ax1.set_yscale('log')
ax1.legend(bbox_to_anchor=(1.0, 1.0), prop={'size':15}, fancybox=True, shadow=True)
Now, i'm trying to figure out to implement both close together like the examples supplied by this link
http://matplotlib.org/examples/pylab_examples/subplots_demo.html
In particular, this one
When looking at the code for the example, i'm a bit confused on how to implant 3 things:
1) Scaling the axes differently
2) Keeping the figure size the same for the exponential decay graph but having a the line graph have a smaller y size and same x size.
For example:
3) Keeping the label of the function to appear in just only the decay graph.
Any help would be most appreciated.
Look at the code and comments in it:
import matplotlib.pyplot as plt
import numpy as np
from matplotlib import gridspec
# Simple data to display in various forms
x = np.linspace(0, 2 * np.pi, 400)
y = np.sin(x ** 2)
fig = plt.figure()
# set height ratios for subplots
gs = gridspec.GridSpec(2, 1, height_ratios=[2, 1])
# the first subplot
ax0 = plt.subplot(gs[0])
# log scale for axis Y of the first subplot
ax0.set_yscale("log")
line0, = ax0.plot(x, y, color='r')
# the second subplot
# shared axis X
ax1 = plt.subplot(gs[1], sharex = ax0)
line1, = ax1.plot(x, y, color='b', linestyle='--')
plt.setp(ax0.get_xticklabels(), visible=False)
# remove last tick label for the second subplot
yticks = ax1.yaxis.get_major_ticks()
yticks[-1].label1.set_visible(False)
# put legend on first subplot
ax0.legend((line0, line1), ('red line', 'blue line'), loc='lower left')
# remove vertical gap between subplots
plt.subplots_adjust(hspace=.0)
plt.show()
Here is my solution:
import numpy as np
import matplotlib.pyplot as plt
x = np.linspace(0, 2 * np.pi, 400)
y = np.sin(x ** 2)
fig, (ax1,ax2) = plt.subplots(nrows=2, sharex=True, subplot_kw=dict(frameon=False)) # frameon=False removes frames
plt.subplots_adjust(hspace=.0)
ax1.grid()
ax2.grid()
ax1.plot(x, y, color='r')
ax2.plot(x, y, color='b', linestyle='--')
One more option is seaborn.FacetGrid but this requires Seaborn and Pandas libraries.
Here are some adaptions to show how the code could work to add a combined legend when plotting a pandas dataframe. ax=ax0 can be used to plot on a given ax and ax0.get_legend_handles_labels() gets the information for the legend.
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
dates = pd.date_range('20210101', periods=100, freq='D')
df0 = pd.DataFrame({'x': np.random.normal(0.1, 1, 100).cumsum(),
'y': np.random.normal(0.3, 1, 100).cumsum()}, index=dates)
df1 = pd.DataFrame({'z': np.random.normal(0.2, 1, 100).cumsum()}, index=dates)
fig, (ax0, ax1) = plt.subplots(nrows=2, sharex=True, gridspec_kw={'height_ratios': [2, 1], 'hspace': 0})
df0.plot(ax=ax0, color=['dodgerblue', 'crimson'], legend=False)
df1.plot(ax=ax1, color='limegreen', legend=False)
# put legend on first subplot
handles0, labels0 = ax0.get_legend_handles_labels()
handles1, labels1 = ax1.get_legend_handles_labels()
ax0.legend(handles=handles0 + handles1, labels=labels0 + labels1)
# remove last tick label for the second subplot
yticks = ax1.get_yticklabels()
yticks[-1].set_visible(False)
plt.tight_layout()
plt.show()

How to plot a superimposed bar chart using matplotlib in python?

I want to plot a bar chart or a histogram using matplotlib. I don't want a stacked bar plot, but a superimposed barplot of two lists of data, for instance I have the following two lists of data with me:
Some code to begin with :
import matplotlib.pyplot as plt
from numpy.random import normal, uniform
highPower = [1184.53,1523.48,1521.05,1517.88,1519.88,1414.98,1419.34,
1415.13,1182.70,1165.17]
lowPower = [1000.95,1233.37, 1198.97,1198.01,1214.29,1130.86,1138.70,
1104.12,1012.95,1000.36]
plt.hist(highPower, bins=10, histtype='stepfilled', normed=True,
color='b', label='Max Power in mW')
plt.hist(lowPower, bins=10, histtype='stepfilled', normed=True,
color='r', alpha=0.5, label='Min Power in mW')
I want to plot these two lists against the number of values in the two lists such that I am able to see the variation per reading.
You can produce a superimposed bar chart using plt.bar() with the alpha keyword as shown below.
The alpha controls the transparency of the bar.
N.B. when you have two overlapping bars, one with an alpha < 1, you will get a mixture of colours. As such the bar will appear purple even though the legend shows it as a light red. To alleviate this I have modified the width of one of the bars, this way even if your powers should change you will still be able to see both bars.
plt.xticks can be used to set the location and format of the x-ticks in your graph.
import matplotlib.pyplot as plt
import numpy as np
width = 0.8
highPower = [1184.53,1523.48,1521.05,1517.88,1519.88,1414.98,
1419.34,1415.13,1182.70,1165.17]
lowPower = [1000.95,1233.37, 1198.97,1198.01,1214.29,1130.86,
1138.70,1104.12,1012.95,1000.36]
indices = np.arange(len(highPower))
plt.bar(indices, highPower, width=width,
color='b', label='Max Power in mW')
plt.bar([i+0.25*width for i in indices], lowPower,
width=0.5*width, color='r', alpha=0.5, label='Min Power in mW')
plt.xticks(indices+width/2.,
['T{}'.format(i) for i in range(len(highPower))] )
plt.legend()
plt.show()
Building on #Ffisegydd's answer, if your data is in a Pandas DataFrame, this should work nicely:
def overlapped_bar(df, show=False, width=0.9, alpha=.5,
title='', xlabel='', ylabel='', **plot_kwargs):
"""Like a stacked bar chart except bars on top of each other with transparency"""
xlabel = xlabel or df.index.name
N = len(df)
M = len(df.columns)
indices = np.arange(N)
colors = ['steelblue', 'firebrick', 'darksage', 'goldenrod', 'gray'] * int(M / 5. + 1)
for i, label, color in zip(range(M), df.columns, colors):
kwargs = plot_kwargs
kwargs.update({'color': color, 'label': label})
plt.bar(indices, df[label], width=width, alpha=alpha if i else 1, **kwargs)
plt.xticks(indices + .5 * width,
['{}'.format(idx) for idx in df.index.values])
plt.legend()
plt.title(title)
plt.xlabel(xlabel)
plt.ylabel(ylabel)
if show:
plt.show()
return plt.gcf()
And then in a python command line:
low = [1000.95, 1233.37, 1198.97, 1198.01, 1214.29, 1130.86, 1138.70, 1104.12, 1012.95, 1000.36]
high = [1184.53, 1523.48, 1521.05, 1517.88, 1519.88, 1414.98, 1419.34, 1415.13, 1182.70, 1165.17]
df = pd.DataFrame(np.matrix([high, low]).T, columns=['High', 'Low'],
index=pd.Index(['T%s' %i for i in range(len(high))],
name='Index'))
overlapped_bar(df, show=False)
It is actually simpler than the answers all over the internet make it appear.
a = range(1,10)
b = range(4,13)
ind = np.arange(len(a))
fig = plt.figure()
ax = fig.add_subplot(111)
ax.bar(x=ind, height=a, width=0.35,align='center')
ax.bar(x=ind, height=b, width=0.35/3, align='center')
plt.xticks(ind, a)
plt.tight_layout()
plt.show()

Categories