How to add error bars to a grouped bar plot? - python

I would like to add error bar in my plot that I can show the min max of each plot. Please, anyone can help me. Thanks in advance.
The min max is as follow:
Delay = (53.46 (min 0, max60) , 36.22 (min 12,max 70), 83 (min 21,max 54), 17 (min 12,max 70))
Latency = (38 (min 2,max 70), 44 (min 12,max 87), 53 (min 9,max 60), 10 (min 11,max 77))
import matplotlib.pyplot as plt
import pandas as pd
from pandas import DataFrame
from matplotlib.dates import date2num
import datetime
Delay = (53.46, 36.22, 83, 17)
Latency = (38, 44, 53, 10)
index = ['T=0', 'T=26', 'T=50','T=900']
df = pd.DataFrame({'Delay': Delay, 'Latency': Latency}, index=index)
ax = df.plot.bar(rot=0)
plt.xlabel('Time')
plt.ylabel('(%)')
plt.ylim(0, 101)
plt.savefig('TestX.png', dpi=300, bbox_inches='tight')
plt.show()

In order to plot in the correct location on a bar plot, the patch data for each bar must be extracted.
An ndarray is returned with one matplotlib.axes.Axes per column.
In the case of this figure, ax.patches contains 8 matplotlib.patches.Rectangle objects, one for each segment of each bar.
By using the associated methods for this object, the height, width, and x locations can be extracted, and used to draw a line with plt.vlines.
The height of the bar is used to extract the correct min and max value from dict, z.
Unfortunately, the patch data does not contain the bar label (e.g. Delay & Latency).
import pandas as pd
import matplotlib.pyplot as plt
# create dataframe
Delay = (53.46, 36.22, 83, 17)
Latency = (38, 44, 53, 10)
index = ['T=0', 'T=26', 'T=50','T=900']
df = pd.DataFrame({'Delay': Delay, 'Latency': Latency}, index=index)
# dicts with errors
Delay_error = {53.46: {'min': 0,'max': 60}, 36.22: {'min': 12,'max': 70}, 83: {'min': 21,'max': 54}, 17: {'min': 12,'max': 70}}
Latency_error = {38: {'min': 2, 'max': 70}, 44: {'min': 12,'max': 87}, 53: {'min': 9,'max': 60}, 10: {'min': 11,'max': 77}}
# combine them; providing all the keys are unique
z = {**Delay_error, **Latency_error}
# plot
ax = df.plot.bar(rot=0)
plt.xlabel('Time')
plt.ylabel('(%)')
plt.ylim(0, 101)
for p in ax.patches:
x = p.get_x() # get the bottom left x corner of the bar
w = p.get_width() # get width of bar
h = p.get_height() # get height of bar
min_y = z[h]['min'] # use h to get min from dict z
max_y = z[h]['max'] # use h to get max from dict z
plt.vlines(x+w/2, min_y, max_y, color='k') # draw a vertical line
If there are non-unique values in the two dicts, so they can't be combined, we can select the correct dict based on the bar plot order.
All the bars for a single label are plotted first.
In this case, index 0-3 are the Dalay bars, and 4-7 are the Latency bars
for i, p in enumerate(ax.patches):
print(i, p)
x = p.get_x()
w = p.get_width()
h = p.get_height()
if i < len(ax.patches)/2: # select which dictionary to use
d = Delay_error
else:
d = Latency_error
min_y = d[h]['min']
max_y = d[h]['max']
plt.vlines(x+w/2, min_y, max_y, color='k')

Some zipping and stacking will suffice—see bar_min_maxs below. Simplifying and slightly generalizing Trenton's code:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
# create dataframe
Delay = (53.46, 36.22, 83, 17)
Latency = (38, 44, 53, 10)
index = ['T=0', 'T=26', 'T=50','T=900']
df = pd.DataFrame({'Delay': Delay, 'Latency': Latency,
'Delay_min': (0, 12, 21, 12), # supply min and max
'Delay_max': (60, 70, 54, 70),
'Latency_min': (2, 12, 9, 11),
'Latency_max': (70, 87, 60, 77)},
index=index)
# plot
ax = df[['Delay', 'Latency']].plot.bar(rot=0)
plt.xlabel('Time')
plt.ylabel('(%)')
plt.ylim(0, 101)
# bar_min_maxs[i] is bar/patch i's min, max
bar_min_maxs = np.vstack((list(zip(df['Delay_min'], df['Delay_max'])),
list(zip(df['Latency_min'], df['Latency_max']))))
assert len(bar_min_maxs) == len(ax.patches)
for patch, (min_y, max_y) in zip(ax.patches, bar_min_maxs):
plt.vlines(patch.get_x() + patch.get_width()/2,
min_y, max_y, color='k')
And if errorbars are expressed through margins of errors instead of mins and maxs, i.e., the errorbar is centered at the bar's height w/ length 2 x margin of error, then here's code to plot those:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
# create dataframe
Delay = (53.46, 36.22, 83, 17)
Latency = (38, 44, 53, 10)
index = ['T=0', 'T=26', 'T=50','T=900']
df = pd.DataFrame({'Delay': Delay, 'Latency': Latency,
'Delay_moe': (5, 15, 25, 35), # supply margin of error
'Latency_moe': (10, 20, 30, 40)},
index=index)
# plot
ax = df[['Delay', 'Latency']].plot.bar(rot=0)
plt.xlabel('Time')
plt.ylabel('(%)')
plt.ylim(0, 101)
# bar_moes[i] is bar/patch i's margin of error, i.e., half the length of an
# errorbar centered at the bar's height
bar_moes = np.ravel(df[['Delay_moe', 'Latency_moe']].values.T)
assert len(bar_moes) == len(ax.patches)
for patch, moe in zip(ax.patches, bar_moes):
height = patch.get_height() # of bar
min_y, max_y = height - moe, height + moe
plt.vlines(patch.get_x() + patch.get_width()/2,
min_y, max_y, color='k')
One minor statistical note: if the difference b/t the two groups (Delay and Latency for each T=t) is of interest, then add a plot for the difference with an errorbar for the difference. A plot like the one above is not sufficient for directly analyzing differences; if, e.g., the two errorbars overlap at T=0, this does not imply that the difference b/t Delay and Latency is not statistically significant at whatever level was used. (Though if they don't overlap, then the difference is statistically significant.)

Related

How to draw an image based on data stored on an array in Google Colab

I need to create something like this
in Google Colab. I have all the data on an array. First, I tried using matplotlib to create a horizontal bar chart. I made this based on the examples given on their documentation:
import matplotlib.pyplot as plt
import numpy as np
n = 4
bars1 = (20, 35, 30, 35)
bars2 = (25, 32, 34, 20)
ind = np.arange(n) # the x locations for the groups
fig, ax = plt.subplots()
p1 = ax.barh(ind, bars1)
p2 = ax.barh(ind, bars2, left=bars1)
ax.set_ylabel('Bars')
ax.set_title('Divisions of each bars')
plt.yticks(ind)
# Label with label_type 'center' instead of the default 'edge'
ax.bar_label(p1, label_type='center')
ax.bar_label(p2, label_type='center')
plt.show()
and this is the result of this code
This looks good, but has a major limitation: I need to have the same number of intervals on every bar, which doesn't always happen on the first picture. Which library could I use to recreate something like this? I tried searching but I don't know exactly how to specify my problem.
The trick to getting a section to not appear is to add a value of np.nan, not float("nan"). So, using this data:
bars1 = (20, 35, 30, 35)
bars2 = (25, 32, 34, np.nan)
results in this graph:
I think you can achieve that with the code you already wrote.
The first thing let's consider is that each bar needs to have an
accumulated value = 100 (it can be any number) let's called MAX.
Then for each bar, you calculate the actual value you have, and if
it's less of our MAX, add the MAX - current value to your bar.
like that we will have all bars at MAX value.
but you will not have the exact same number of sections in each bar if one of those is already at MAX value. to fix that you will need to calculate the MAX value by finding the maximum value in your bars and add x value to it.
code :
import matplotlib.pyplot as plt
import numpy as np
def get_added_bar(bars):
max = 0
new_limit = 0
added_val = 5
vertic_sums = []
added_bar = []
for x in bars:
t=0
for item in x:
if t > len(vertic_sums)-1:
vertic_sums.append(item)
else:
vertic_sums[t] = vertic_sums[t]+item
if vertic_sums[t] > max:
max = vertic_sums[t]
t=t+1
new_limit = max + added_val
t=0
for x in vertic_sums:
added_bar.append(new_limit-vertic_sums[t])
t=t+1
return tuple(added_bar)
def sum_bars(bars) :
vertic_sums = []
for x in bars:
t=0
for item in x:
if t > len(vertic_sums)-1:
vertic_sums.append(item)
else:
vertic_sums[t] = vertic_sums[t]+item
t=t+1
return tuple(vertic_sums)
n = 4
# 0 for missing value
bars1 = (0, 35, 30, 35)
bars2 = (25, 32, 0, 20)
added_bar = get_added_bar((bars1,bars2)) #
ind = np.arange(n) # the x locations for the groups
fig, ax = plt.subplots()
print((bars1,bars2,added_bar))
p1 = ax.barh(ind, bars1)
p2 = ax.barh(ind, bars2, left=bars1)
p3 = ax.barh(ind, added_bar, left=sum_bars((bars1,bars2)), color='white')
ax.set_ylabel('Bars')
ax.set_title('Divisions of each bars')
plt.yticks(ind)
print(ind)
# Label with label_type 'center' instead of the default 'edge'
ax.bar_label(p1, label_type='center')
ax.bar_label(p2, label_type='center')
# ax.bar_label(p3, label_type='center')
plt.show()
you can check, I created a working example here:
https://replit.com/join/mgpidbnqfb-amirping
you can change any value in your bars with 0 if it's missing and you will still have it working

Change the width of merged bins in Matplotlib and Seaborn

I have a table of grades and I want all of the bins to be of the same width
i want the bins to be in the range of [0,56,60,65,70,80,85,90,95,100]
when the first bin is from 0-56 then 56-60 ... with the same width
sns.set_style('darkgrid')
newBins = [0,56,60,65,70,80,85,90,95,100]
sns.displot(data= scores , bins=newBins)
plt.xlabel('grade')
plt.xlim(0,100)
plt.xticks(newBins);
Expected output
how I can balance the width of the bins?
You need to cheat a bit. Define you own bins and name the bins with a linear range. Here is an example:
s = pd.Series(np.random.randint(100, size=100000))
bins = [-0.1, 50, 75, 95, 101]
s2 = pd.cut(s, bins=bins, labels=range(len(bins)-1))
ax = s2.astype(int).plot.hist(bins=len(bins)-
1)
ax.set_xticks(np.linspace(0, len(bins)-2, len(bins)))
ax.set_xticklabels(bins)
Output:
Old answer:
Why don't you let seaborn pick the bins for you:
sns.displot(data=scores, bins='auto')
Or set the number of bins that you want:
sns.displot(data=scores, bins=10)
They will be evenly distributed
You assigning a list to the bins argument of sns.distplot(). This specifies the edges of bins. Since these edges are not spaced evenly, the widths of bins vary.
I think that you may want to use a bar plot (sbs.barplot()) and not a histogram. You would need to compute how many data points are in each bin, and then plot bars without the information what range of values each bar represents. Something like this:
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_style('darkgrid')
import numpy as np
# sample data
data = np.random.randint(0, 100, 200)
newBins = [0,56,60,65,70,80,85,90,95,100]
# compute bar heights
hist, _ = np.histogram(data, bins=newBins)
# plot a bar diagram
sns.barplot(x = list(range(len(hist))), y = hist)
plt.show()
It gives:
just change the list of values that are you using as binds:
newBins = numpy.arange(0, 100, 1)
 You can use bin parameter from histplots but to get exact answer you have to use pd.cut() to creating your own bins.
np.random.seed(101)
df = pd.DataFrame({'scores':pd.Series(np.random.randint(100,size=175)),
'bins_created':pd.cut(scores,bins=[0,55,60,65,70,75,80,85,90,95,100])})
new_data = df['bins_created'].value_counts()
plt.figure(figsize=(10,5),dpi=100)
plots = sns.barplot(x=new_data.index,y=new_data.values)
plt.xlabel('grades')
plt.ylabel('counts')
for bar in plots.patches:
plots.annotate(format(bar.get_height(), '.2f'),
(bar.get_x() + bar.get_width() / 2,
bar.get_height()), ha='center', va='center',
size=10, xytext=(0,5),
textcoords='offset points')
plt.show()

Creating Horizontal Bars with Itertools and For Loops (Python)

I've been playing around with Matplotlib and created a horizontal bar using the following algorithm (Full code and junk data provided at the bottom of this post).
# Version 1
ax.broken_barh([(depth_start[0], thick[0]), (depth_start[1], thick[1]), (depth_start[2], thick[2])], (25, 0.8),
facecolors=('tab:brown', 'tab:blue', 'tab:green'))
which produces the following graphical output:
So I've been trying to make the code more efficient by introducing itertools
I managed to simplify the above code into a version 2:
# Version 2
for i in thick:
ax.broken_barh([(next(cycle_depth), next(cycle_thick))], (15, 0.8), facecolors=(next(cycle_colour)))
Great, this also produces the above bar in the same order with the same colours.
The Problem
But I'm struggling with my next objective which is to replace facecolors=('tab:brown', 'tab:blue', 'tab:green') with a function that uses a for loop. This function ideally selects the correct colour for each bar based on the thickness. All 3 bars return a brown colour as the function continuously returns the value associated with the else statement (see image below).
I've attempted substituting next(cycle_thick) in place of the variable cycle_think in the function, but then only one of the colours is correct again.
The colour_checker() function is as follows:
def colour_checker():
if cycle_thick == 10:
return 'tab:green'
elif cycle_thick == 20:
return 'tab:blue'
else:
return 'tab:brown'
# Version 3
for i in thick:
ax.broken_barh([(next(cycle_depth), next(cycle_thick))], (10, 0.8), facecolors=colour_checker())
Any hints or suggestions welcomed!
Full Code and Junk Data
import itertools
import matplotlib.pyplot as plt
# Junk data in the form of lists
depth_start = [90, 70, 40] # top of lithology
thick = [30, 20, 10] # thickness for each lithology
colour = ('tab:brown', 'tab:blue', 'tab:green')
# Lists to be cycled through
cycle_colour = itertools.cycle(colour)
cycle_depth = itertools.cycle(depth_start)
cycle_thick = itertools.cycle(thick)
#setting up the plot
fig, ax = plt.subplots()
def colour_checker():
if cycle_thick == [0]:
return 'tab:green'
elif cycle_thick == [1]:
return 'tab:blue'
else:
return 'tab:brown'
# Version 1
ax.broken_barh([(depth_start[0], thick[0]), (depth_start[1], thick[1]), (depth_start[2], thick[2])], (25, 0.8),
facecolors=('tab:brown', 'tab:blue', 'tab:green'))
# Version 2
for i in thick:
ax.broken_barh([(next(cycle_depth), next(cycle_thick))], (15, 0.8), facecolors=(next(cycle_colour)))
# Version 3
for i in thick:
ax.broken_barh([(next(cycle_depth), next(cycle_thick))], (10, 0.8), facecolors=colour_checker())
ax.set_ylabel('X_UTM Position')
ax.set_xlabel('MAMSL')
plt.show()
Since the intention of the outcome was ambiguous, I have created examples for all three versions I can imagine.
import matplotlib.pyplot as plt
# Junk data in the form of lists
depth_start = [90, 70, 40, 200, 170, 140] # top of lithology
thick = [30, 20, 10, 20, 10, 30] # thickness for each lithology
colour = ('tab:brown', 'tab:blue', 'tab:green')
#setting up the plot
fig, ax = plt.subplots()
#Version 1: using zip to chain all three lists
for start, length, color in zip(depth_start, thick, colour+colour[::-1]):
ax.broken_barh([(start, length)], (-0.4, 0.8), facecolors=color)
#Version 2: color cycler repetitive color assignments
from itertools import cycle
cycle_colour = cycle(colour)
for start, length in zip(depth_start, thick):
ax.broken_barh([(start, length)], (0.6, 0.8), facecolors=next(cycle_colour))
#Version 3: lookup table to color bars of a specific length with a certain color
color_dic = {30: 'tab:brown', 20: 'tab:blue', 10: 'tab:green'}
for start, length in zip(depth_start, thick):
ax.broken_barh([(start, length)], (1.6, 0.8), facecolors=color_dic[length])
ax.set_yticks(range(3))
ax.set_yticklabels(["Version 1", "Version 2", "Version 3"])
plt.show()
Sample output:

Bokeh: How to add a legend and custom color boundaries to an image plot?

I have a two-dimensional array that I want to plot using bokeh's bokeh.plotting.figure.Figure.image. It works wonderful.
Now, I want to add a legend using the colors used for the image. I don't find any example for my case. The legend that I'd like to achieve is similar to the picture.
from bokeh.models import LinearColorMapper, ColorBar
from bokeh.plotting import figure, show
plot = figure(x_range=(0,1), y_range=(0,1), toolbar_location="right")
color_mapper = LinearColorMapper(palette="YlGn9", low=-1, high=1, nan_color="white")
plot.image(image=[ndvi], color_mapper=color_mapper,dh=[1.0], dw=[1.0], x=[0], y=[0])
color_bar = ColorBar(color_mapper=color_mapper,label_standoff=12, border_line_color=None, location=(0,0))
plot.add_layout(color_bar, 'right')
Additionally, I'd like to have some custom color boundaries, with non-fixed intervals. Here is an example how it would be done with matplotlib:
cmap = colors.ListedColormap(['#27821f', '#3fa336', '#6ce362','#ffffff','#e063a8' ,'#cc3b8b','#9e008c','#59044f'])
bounds = [-1000, -500, -100, 0, 50, 100, 300, 500, 10000000]
norm = colors.BoundaryNorm(bounds, cmap.N)
fig, ax = plt.subplots()
ax.imshow(data, cmap=cmap, norm=norm)
You can choose the red-yellow-green palette. In bokeh the name is 'RdYlGn5', where the digit at the end tells how many colors are needed. To use it in a legend, you'ld need to import RdYlGn5 from bokeh.palettes.
For creating the legend, I only know of employing some dummy glyphs as in the code below.
I updated my example with the new requirements of setting custom bounds with non-fixed intervals. This post offers some guidance. Basically, the idea is to use a larger colormap with repeated colors. Such a format doesn't fit for general types of boundaries, but it fits yours, at least when the lowest and highest bound are interpreted to be infinite.
I also tried to layout the legend with some custom spaces to get all labels aligned. A background color is chosen to contrast with the legend entries.
There is a colorbar to verify how the colormap bounds work internally. After verification, you may leave it out. The example image has values from -1000 to 1000 to show how the values outside the strict colormap limits are handled.
Here is an example with dummy data:
from bokeh.models import LinearColorMapper, Legend, LegendItem, ColorBar, SingleIntervalTicker
from bokeh.plotting import figure, show
import numpy as np
x, y = np.meshgrid(np.linspace(0, 10, 1000), np.linspace(0, 10, 1000))
z = 1000*np.sin(x + np.cos(y))
plot = figure(x_range=(0, 1), y_range=(0, 1), toolbar_location="right")
base_colors = ['#27821f', '#3fa336', '#6ce362','#ffffff','#e063a8' ,'#cc3b8b','#9e008c','#59044f']
bounds = [-1000, -500, -100, 0, 50, 100, 300, 500, 10000000]
low = -600
high = 600
bound_colors = []
j = 0
for i in range(low, high, 50):
if i >= bounds[j+1]:
j += 1
bound_colors.append(base_colors[j])
color_mapper = LinearColorMapper(palette=bound_colors, low=low, high=high, nan_color="white")
plot.image(image=[z], color_mapper=color_mapper, dh=[1.0], dw=[1.0], x=[0], y=[0])
# these are a dummy glyphs to help draw the legend
dummy_for_legend = [plot.line(x=[1, 1], y=[1, 1], line_width=15, color=c, name='dummy_for_legend')
for c in base_colors]
legend_labels = [f' < {bounds[1]}'] + \
[('' if l < 0 else ' ' if l < 10 else ' ' if l < 100 else ' ')
+ f'{l} ‒ {h}' for l, h in zip(bounds[1:], bounds[2:-1])] + \
[f' > {bounds[-2]}']
legend1 = Legend(title="NDVI", background_fill_color='gold',
items=[LegendItem(label=lab, renderers=[gly]) for lab, gly in zip(legend_labels, dummy_for_legend) ])
plot.add_layout(legend1)
color_bar = ColorBar(color_mapper=color_mapper, label_standoff=12, border_line_color=None, location=(0, 0),
ticker=SingleIntervalTicker(interval=50))
plot.add_layout(color_bar)
show(plot)

How to plot a grouped bar chart from multiple datasets

I am going through Think Stats and I would like to compare multiple data sets visually. I can see from the book examples that it is possible to generate an interleaved bar graph with a different color for each data set by using a module provided by the book author, how to obtain the same result in pyplot?
Call the bar function multiple times, one for each series. You can control the left position of the bars using the left parameter, and you can use this to prevent overlap.
Entirely untested code:
pyplot.bar( numpy.arange(10) * 2, data1, color = 'red' )
pyplot.bar( numpy.arange(10) * 2 + 1, data2, color = 'red' )
Data2 will be drawn shifted over the right compared to where data one will be drawn.
Matplotlib's example code for interleaved bar charts works nicely for arbitrary real-valued x coordinates (as mentioned by #db42).
However, if your x coordinates are categorical values (like in the case of dictionaries in the linked question), the conversion from categorical x coordinates to real x coordinates is cumbersome and unnecessary.
You can plot two dictionaries side-by-side directly using matplotlib's api. The trick for plotting two bar charts with an offset to each other is to set align=edge and a positive width (+width) for plotting one bar chart, whereas a negative width (-width) for plotting the other one.
The example code modified for plotting two dictionaries looks like the following then:
"""
========
Barchart
========
A bar plot with errorbars and height labels on individual bars
"""
import matplotlib.pyplot as plt
# Uncomment the following line if you use ipython notebook
# %matplotlib inline
width = 0.35 # the width of the bars
men_means = {'G1': 20, 'G2': 35, 'G3': 30, 'G4': 35, 'G5': 27}
men_std = {'G1': 2, 'G2': 3, 'G3': 4, 'G4': 1, 'G5': 2}
rects1 = plt.bar(men_means.keys(), men_means.values(), -width, align='edge',
yerr=men_std.values(), color='r', label='Men')
women_means = {'G1': 25, 'G2': 32, 'G3': 34, 'G4': 20, 'G5': 25}
women_std = {'G1': 3, 'G2': 5, 'G3': 2, 'G4': 3, 'G5': 3}
rects2 = plt.bar(women_means.keys(), women_means.values(), +width, align='edge',
yerr=women_std.values(), color='y', label='Women')
# add some text for labels, title and axes ticks
plt.xlabel('Groups')
plt.ylabel('Scores')
plt.title('Scores by group and gender')
plt.legend()
def autolabel(rects):
"""
Attach a text label above each bar displaying its height
"""
for rect in rects:
height = rect.get_height()
plt.text(rect.get_x() + rect.get_width()/2., 1.05*height,
'%d' % int(height),
ha='center', va='bottom')
autolabel(rects1)
autolabel(rects2)
plt.show()
The result:
I came across this problem a while ago and created a wrapper function that takes a 2D array and automatically creates a multi-barchart from it:
The code:
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import operator as o
import numpy as np
dpoints = np.array([['rosetta', '1mfq', 9.97],
['rosetta', '1gid', 27.31],
['rosetta', '1y26', 5.77],
['rnacomposer', '1mfq', 5.55],
['rnacomposer', '1gid', 37.74],
['rnacomposer', '1y26', 5.77],
['random', '1mfq', 10.32],
['random', '1gid', 31.46],
['random', '1y26', 18.16]])
fig = plt.figure()
ax = fig.add_subplot(111)
def barplot(ax, dpoints):
'''
Create a barchart for data across different categories with
multiple conditions for each category.
#param ax: The plotting axes from matplotlib.
#param dpoints: The data set as an (n, 3) numpy array
'''
# Aggregate the conditions and the categories according to their
# mean values
conditions = [(c, np.mean(dpoints[dpoints[:,0] == c][:,2].astype(float)))
for c in np.unique(dpoints[:,0])]
categories = [(c, np.mean(dpoints[dpoints[:,1] == c][:,2].astype(float)))
for c in np.unique(dpoints[:,1])]
# sort the conditions, categories and data so that the bars in
# the plot will be ordered by category and condition
conditions = [c[0] for c in sorted(conditions, key=o.itemgetter(1))]
categories = [c[0] for c in sorted(categories, key=o.itemgetter(1))]
dpoints = np.array(sorted(dpoints, key=lambda x: categories.index(x[1])))
# the space between each set of bars
space = 0.3
n = len(conditions)
width = (1 - space) / (len(conditions))
# Create a set of bars at each position
for i,cond in enumerate(conditions):
indeces = range(1, len(categories)+1)
vals = dpoints[dpoints[:,0] == cond][:,2].astype(np.float)
pos = [j - (1 - space) / 2. + i * width for j in indeces]
ax.bar(pos, vals, width=width, label=cond,
color=cm.Accent(float(i) / n))
# Set the x-axis tick labels to be equal to the categories
ax.set_xticks(indeces)
ax.set_xticklabels(categories)
plt.setp(plt.xticks()[1], rotation=90)
# Add the axis labels
ax.set_ylabel("RMSD")
ax.set_xlabel("Structure")
# Add a legend
handles, labels = ax.get_legend_handles_labels()
ax.legend(handles[::-1], labels[::-1], loc='upper left')
barplot(ax, dpoints)
plt.show()
If you're interested in what this function does and the logic behind it, here's a (shamelessly self-promoting) link to the blog post describing it.

Categories