Assign specific colours to data in Matplotlib pie chart - python

I'm trying to create pie charts with matplotlib in which the colour of each category is fixed.
I've got a function which creates a pie chart from sets of value and category data. Here's one example:
Category Value
TI 65
Con 43
FR 40
TraI 40
Bug 38
Data 22
Int 15
KB 12
Other 8
Dep 7
PW 6
Uns 5
Perf 4
Dep 3
The problem is that the data differs from one instance to another, and that in turn changes the order of the categories. Thus, each category gets labelled a different colour each time I generate a chart. I could sort the data alphabetically every time, but that causes two problems: some categories are missing from some datasets, and I'd prefer it sorted by size anyway so that the smallest wedges are oriented horizontally.
How can I set matplotlib to assign colours depending on, say, the index of a pandas.Series?
Here's the code that I'm using to generate a pie chart:
import matplotlib.pyplot as plt
slices = [62, 39, 39, 38, 37, 21, 15, 9, 6, 7, 6, 5, 4, 3]
cmap = plt.cm.prism
colors = cmap(np.linspace(0., 1., len(slices)))
labels = [u'TI', u'Con', u'FR', u'TraI', u'Bug', u'Data', u'Int', u'KB', u'Other', u'Dep', u'PW', u'Uns', u'Perf', u'Dep']
fig = plt.figure(figsize=[10, 10])
ax = fig.add_subplot(111)
pie_wedge_collection = ax.pie(slices, colors=colors, labels=labels, labeldistance=1.05, autopct=make_autopct(slices))
for pie_wedge in pie_wedge_collection[0]:
pie_wedge.set_edgecolor('white')
titlestring = 'Issues'
ax.set_title(titlestring)
EDIT: I forgot to explain the autopct function, it's for adding value and percentage labels:
def make_autopct(values):
def my_autopct(pct):
total = sum(values)
val = int(round(pct*total/100.0))
return '{p:.2f}% ({v:d})'.format(p=pct,v=val)
return my_autopct

Here is a simpler solution to #tmdavison's answer.
Let's first see the problem with an MWE:
import matplotlib.pyplot as plt
labels = ['Frogs', 'Hogs', 'Dogs', 'Logs']
sizes = [15, 30, 45, 10]
fig, ax = plt.subplots(1, 2)
ax[0].pie(sizes, labels=labels)
ax[1].pie(sizes[1:], labels=labels[1:])
This produces the problem plots:
The problem is that in the left-hand plot, Hogs is coloured in orange, but in the right-hand plot Hogs is coloured in blue (with a similar mix-up for Logs and Dogs).
We would like the colours for the labels to be the same across both plots. We can do this by specifying a dictionary of colours to use:
labels = ['Frogs', 'Hogs', 'Dogs', 'Logs']
sizes = [15, 30, 45, 10]
colours = {'Frogs': 'C0',
'Hogs': 'C1',
'Dogs': 'C2',
'Logs': 'C3'}
fig, ax = plt.subplots(1, 2)
ax[0].pie(sizes,
labels=labels,
colors=[colours[key] for key in labels])
ax[1].pie(sizes[1:],
labels=labels[1:],
colors=[colours[key] for key in labels[1:]])
This works to create the plot:
Here we see that the labels are represented by the same colours across both plots, as desired.
If you have lots of categories it can be cumbersome to manually set a colour for each category. In this case you could construct the colours dictionary as:
colours = dict(zip(labels, plt.cm.tab10.colors[:len(labels)]))
If you have more than 10 categories you would instead use:
colours = dict(zip(labels, plt.cm.tab20.colors[:len(labels)]))

Here's an idea you could try. Make a dictionary from your labels and colors, so each color is mapped to a label. Then, after making the pie chart, go in an assign the facecolor of the wedge using this dictionary.
Here's an untested bit of code which might do what you are looking for:
import numpy as np
import matplotlib.pyplot as plt
def mypie(slices,labels,colors):
colordict={}
for l,c in zip(labels,colors):
print l,c
colordict[l]=c
fig = plt.figure(figsize=[10, 10])
ax = fig.add_subplot(111)
pie_wedge_collection = ax.pie(slices, labels=labels, labeldistance=1.05)#, autopct=make_autopct(slices))
for pie_wedge in pie_wedge_collection[0]:
pie_wedge.set_edgecolor('white')
pie_wedge.set_facecolor(colordict[pie_wedge.get_label()])
titlestring = 'Issues'
ax.set_title(titlestring)
return fig,ax,pie_wedge_collection
slices = [37, 39, 39, 38, 62, 21, 15, 9, 6, 7, 6, 5, 4, 3]
cmap = plt.cm.prism
colors = cmap(np.linspace(0., 1., len(slices)))
labels = [u'TI', u'Con', u'FR', u'TraI', u'Bug', u'Data', u'Int', u'KB', u'Other', u'Dep', u'PW', u'Uns', u'Perf', u'Dep']
fig,ax,pie_wedge_collection = mypie(slices,labels,colors)
plt.show()

Related

Color pie plot for loop [duplicate]

I'm trying to create pie charts with matplotlib in which the colour of each category is fixed.
I've got a function which creates a pie chart from sets of value and category data. Here's one example:
Category Value
TI 65
Con 43
FR 40
TraI 40
Bug 38
Data 22
Int 15
KB 12
Other 8
Dep 7
PW 6
Uns 5
Perf 4
Dep 3
The problem is that the data differs from one instance to another, and that in turn changes the order of the categories. Thus, each category gets labelled a different colour each time I generate a chart. I could sort the data alphabetically every time, but that causes two problems: some categories are missing from some datasets, and I'd prefer it sorted by size anyway so that the smallest wedges are oriented horizontally.
How can I set matplotlib to assign colours depending on, say, the index of a pandas.Series?
Here's the code that I'm using to generate a pie chart:
import matplotlib.pyplot as plt
slices = [62, 39, 39, 38, 37, 21, 15, 9, 6, 7, 6, 5, 4, 3]
cmap = plt.cm.prism
colors = cmap(np.linspace(0., 1., len(slices)))
labels = [u'TI', u'Con', u'FR', u'TraI', u'Bug', u'Data', u'Int', u'KB', u'Other', u'Dep', u'PW', u'Uns', u'Perf', u'Dep']
fig = plt.figure(figsize=[10, 10])
ax = fig.add_subplot(111)
pie_wedge_collection = ax.pie(slices, colors=colors, labels=labels, labeldistance=1.05, autopct=make_autopct(slices))
for pie_wedge in pie_wedge_collection[0]:
pie_wedge.set_edgecolor('white')
titlestring = 'Issues'
ax.set_title(titlestring)
EDIT: I forgot to explain the autopct function, it's for adding value and percentage labels:
def make_autopct(values):
def my_autopct(pct):
total = sum(values)
val = int(round(pct*total/100.0))
return '{p:.2f}% ({v:d})'.format(p=pct,v=val)
return my_autopct
Here is a simpler solution to #tmdavison's answer.
Let's first see the problem with an MWE:
import matplotlib.pyplot as plt
labels = ['Frogs', 'Hogs', 'Dogs', 'Logs']
sizes = [15, 30, 45, 10]
fig, ax = plt.subplots(1, 2)
ax[0].pie(sizes, labels=labels)
ax[1].pie(sizes[1:], labels=labels[1:])
This produces the problem plots:
The problem is that in the left-hand plot, Hogs is coloured in orange, but in the right-hand plot Hogs is coloured in blue (with a similar mix-up for Logs and Dogs).
We would like the colours for the labels to be the same across both plots. We can do this by specifying a dictionary of colours to use:
labels = ['Frogs', 'Hogs', 'Dogs', 'Logs']
sizes = [15, 30, 45, 10]
colours = {'Frogs': 'C0',
'Hogs': 'C1',
'Dogs': 'C2',
'Logs': 'C3'}
fig, ax = plt.subplots(1, 2)
ax[0].pie(sizes,
labels=labels,
colors=[colours[key] for key in labels])
ax[1].pie(sizes[1:],
labels=labels[1:],
colors=[colours[key] for key in labels[1:]])
This works to create the plot:
Here we see that the labels are represented by the same colours across both plots, as desired.
If you have lots of categories it can be cumbersome to manually set a colour for each category. In this case you could construct the colours dictionary as:
colours = dict(zip(labels, plt.cm.tab10.colors[:len(labels)]))
If you have more than 10 categories you would instead use:
colours = dict(zip(labels, plt.cm.tab20.colors[:len(labels)]))
Here's an idea you could try. Make a dictionary from your labels and colors, so each color is mapped to a label. Then, after making the pie chart, go in an assign the facecolor of the wedge using this dictionary.
Here's an untested bit of code which might do what you are looking for:
import numpy as np
import matplotlib.pyplot as plt
def mypie(slices,labels,colors):
colordict={}
for l,c in zip(labels,colors):
print l,c
colordict[l]=c
fig = plt.figure(figsize=[10, 10])
ax = fig.add_subplot(111)
pie_wedge_collection = ax.pie(slices, labels=labels, labeldistance=1.05)#, autopct=make_autopct(slices))
for pie_wedge in pie_wedge_collection[0]:
pie_wedge.set_edgecolor('white')
pie_wedge.set_facecolor(colordict[pie_wedge.get_label()])
titlestring = 'Issues'
ax.set_title(titlestring)
return fig,ax,pie_wedge_collection
slices = [37, 39, 39, 38, 62, 21, 15, 9, 6, 7, 6, 5, 4, 3]
cmap = plt.cm.prism
colors = cmap(np.linspace(0., 1., len(slices)))
labels = [u'TI', u'Con', u'FR', u'TraI', u'Bug', u'Data', u'Int', u'KB', u'Other', u'Dep', u'PW', u'Uns', u'Perf', u'Dep']
fig,ax,pie_wedge_collection = mypie(slices,labels,colors)
plt.show()

Set 'global' colorbar range for multiple matplotlib subplots of different ranges

I would like to plot data in subplots using matplotlib.pyplot in python. Each subplot will contain data of different ranges. I would like to plot them using pyplot.scatter, and use one single colorbar for the entire plot. Thus, the colorbar should encompass the entire range of the values in every subplot. However, when I use a loop to plot the subplots and call a colorbar outside of the loop, it only uses the range of values from the last subplot. A lot of examples available concern the sizing the position of the colorbar, so this answer (how to make one universal colorbar for multiple subplots) is not obvious.
I have the following self-contained example code. Here, two subplots are rendered, one that should be colored with frigid temperatures typical of Russia and the other with tropical temperatures of Brazil. However, the end result shows a colorbar that only ranges the tropical Brazilian temperatures, making the Russia subplot erroneous:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
core_list = ['Russia', 'Brazil']
core_depth = [0, 2, 4, 6, 8, 10]
lo = [-33, 28]
hi = [10, 38]
df = pd.DataFrame([], columns = ['Location', 'Depth', '%TOC', 'Temperature'])
#Fill df
for ii, name in enumerate(core_list):
for jj in core_depth:
df.loc[len(df.index)] = [name, jj, (np.random.randint(1, 20))/10, np.random.randint(lo[ii], hi[ii])]
#Russia data have much colder temperatures than Brazil data due to hi and lo
#Plot data from each location using scatter plots
fig, axs = plt.subplots(nrows = 1, ncols = 2, sharey = True)
for nn, name in enumerate(core_list):
core_mask = df['Location'] == name
data = df.loc[core_mask]
plt.sca(axs[nn])
plt.scatter(data['Depth'], data['%TOC'], c = data['Temperature'], s = 50, edgecolors = 'k')
axs[nn].set_xlabel('%TOC')
plt.text(1.25*min(data['%TOC']), 1.75, name)
if nn == 0:
axs[nn].set_ylabel('Depth')
cbar = plt.colorbar()
cbar.ax.set_ylabel('Temperature, degrees C')
#How did Russia get so warm?!? Temperatures and ranges of colorbar are set to last called location.
#How do I make one colorbar encompass global temperature range of both data sets?
The output of this code shows that the temperatures in Brazil and Russia fall within the same range of colors:
We know intuitively, and from glancing at the data, that this is wrong. So, how do we tell pyplot to plot this correctly?
The answer is straightforward using the vmax and vmin controls of pyplot.scatter. These must be set with a universal range of data, not just the data focused on in any single iteration of a loop. Thus, to change the code above:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
core_list = ['Russia', 'Brazil']
core_depth = [0, 2, 4, 6, 8, 10]
lo = [-33, 28]
hi = [10, 38]
df = pd.DataFrame([], columns = ['Location', 'Depth', '%TOC', 'Temperature'])
#Fill df
for ii, name in enumerate(core_list):
for jj in core_depth:
df.loc[len(df.index)] = [
name,
jj,
(np.random.randint(1, 20))/10,
np.random.randint(lo[ii], hi[ii])
]
#Russia data have much colder temperatures than Brazil data due to hi and lo
#Plot data from each location using scatter plots
fig, axs = plt.subplots(nrows = 1, ncols = 2, sharey = True)
for nn, name in enumerate(core_list):
core_mask = df['Location'] == name
data = df.loc[core_mask]
plt.sca(axs[nn])
plt.scatter(
data['Depth'],
data['%TOC'],
c=data['Temperature'],
s=50,
edgecolors='k',
vmax=max(df['Temperature']),
vmin=min(df['Temperature'])
)
axs[nn].set_xlabel('%TOC')
plt.text(1.25*min(data['%TOC']), 1.75, name)
if nn == 0:
axs[nn].set_ylabel('Depth')
cbar = plt.colorbar()
cbar.ax.set_ylabel('Temperature, degrees C')
Now, the output shows a temperature difference between Russia and Brazil, which one would expect after a cursory glance at the data. The change that fixes this problem occurs within the for loop, however it references all of the data to find a max and min:
plt.scatter(data['Depth'], data['%TOC'], c = data['Temperature'], s = 50, edgecolors = 'k', vmax = max(df['Temperature']), vmin = min(df['Temperature']) )

Matplotlib Xticks Values in Bar Chart

I have this code and im trying to make a graph. All values are in the lists are correct. However, i have problem in x axis. First, there is a gap between first two ticks. I read all matplotlib in their site but couldn find anyting useful for this problem. I'm all confused about xticks function. This is my graph
plt.bar(Valloc1,[diabete[w] for w in sorted(diabete.keys())], width=0.2,)
plt.bar(Valloc2,[not_diabete[w] for w in sorted(not_diabete.keys())], width=0.1, )
plt.xticks(all_years, rotation = '65')
plt.legend(['Diabete','Not-Diabete'])
plt.xlabel('Years')
plt.ylabel('# of patients')
plt.legend()
plt.show()
I have tried this line but everything went worse.
plt.xticks(range(all_years),all_years, rotation = '65')
I also want that two bars to be beside not overlap. Just like this :
Bars side-by-side
My variables are:
Valloc1 = [i for i in range(52)]
diabete = {{1967: 5, 1986: 13, 1985: 9, 1996: 5, 1984: 10, 1987: 6, 1991: 8...}
Here is an example how to solve your problem. As you don't provide the data, I first generate some random data in the example below and then visualise it as a bar plot like you requested:
from matplotlib import pyplot as plt
import numpy as np
##generating some data
years = [1936, 1945]+[i for i in range(1947,1997)]
data1 = np.random.rand(len(years))
data2 = np.random.rand(len(years))
diabete = {key: val for key,val in zip(years, data1)}
not_diabete = {key: val for key,val in zip(years, data2)}
##the actual graph:
fig, ax = plt.subplots(figsize = (10,4))
idx = np.asarray([i for i in range(len(years))])
width = 0.2
ax.bar(idx, [val for key,val in sorted(diabete.items())], width=width)
ax.bar(idx+width, [val for key,val in sorted(not_diabete.items())], width=width)
ax.set_xticks(idx)
ax.set_xticklabels(years, rotation=65)
ax.legend(['Diabete', 'Non-Diabete'])
ax.set_xlabel('years')
ax.set_ylabel('# of patients')
fig.tight_layout()
plt.show()
The result looks like this:

Create a Diverging Stacked Bar Chart in matplotlib

I have lists of data indicating responses to likert questions with a one (very unhappy) to five (very happy) scale. I would like to create a page of plots showing these lists as skewed stacked horizontal bar charts. The lists of responses can be of different sizes (e.g. when someone has opted out of answering a particular question). Here is a minimal example of the data:
likert1 = [1.0, 2.0, 1.0, 2.0, 1.0, 3.0, 3.0, 4.0, 4.0, 1.0, 1.0]
likert2 = [5.0, 4.0, 5.0, 4.0, 5.0, 3.0]
I would like to be able to plot this with something like:
plot_many_likerts(likert1, likert2)
At the moment I've written a function to iterate over the lists, and plot each one as its own subplot on a shared figure in matplotlib:
def plot_many_likerts(*lsts):
#get the figure and the list of axes for this plot
fig, axlst = plt.subplots(len(lsts), sharex=True)
for i in range(len(lsts)):
likert_horizontal_bar_list(lsts[i], axlst[i], xaxis=[1.0, 2.0, 3.0, 4.0, 5.0])
axlst[i].axis('off')
fig.show()
def likert_horizontal_bar_list(lst, ax, xaxis):
cnt = Counter(lst)
#del (cnt[None])
i = 0
colour_float = 0.00001
previous_right = 0
for key in sorted(xaxis):
ax.barh(bottom=0, width=cnt[key], height=0.4, left=previous_right, color=plt.cm.jet(colour_float),label=str(key))
i += 1
previous_right = previous_right + cnt[key]
colour_float = float(i) / float(len(xaxis))
This works not badly and create stacked bar charts all with the same representative sizes (e.g. the widths share common axis scales). Here is a screen shot:
What is currently Produced http://s7.postimg.org/vh0j816gn/figure_1.jpg
What I would like is to have these two plots centered on midpoints of the mode of the datasets (the datasets will have the same range). For instance:
What I would like to see http://s29.postimg.org/z0qwv4ryr/figure_2.jpg
Suggestions on how I might do this?
I needed to make a divergent bar chart for some likert data. I was using pandas, but the approach would probably be similar without it. The key mechanism is to add in an invisible buffer at the start.
likert_colors = ['white', 'firebrick','lightcoral','gainsboro','cornflowerblue', 'darkblue']
dummy = pd.DataFrame([[1,2,3,4, 5], [5,6,7,8, 5], [10, 4, 2, 10, 5]],
columns=["SD", "D", "N", "A", "SA"],
index=["Key 1", "Key B", "Key III"])
middles = dummy[["SD", "D"]].sum(axis=1)+dummy["N"]*.5
longest = middles.max()
complete_longest = dummy.sum(axis=1).max()
dummy.insert(0, '', (middles - longest).abs())
dummy.plot.barh(stacked=True, color=likert_colors, edgecolor='none', legend=False)
z = plt.axvline(longest, linestyle='--', color='black', alpha=.5)
z.set_zorder(-1)
plt.xlim(0, complete_longest)
xvalues = range(0,complete_longest,10)
xlabels = [str(x-longest) for x in xvalues]
plt.xticks(xvalues, xlabels)
plt.show()
There are many limitations to this approach. First, bars no longer get a black outline, and the legend will have an extra blank element. I just hid the legend (I figure there's probably a way to hide just the individual element). I'm not sure of a convenient way to make the bars have an outline without also adding the outline to the buffer element.
First, we establish some colors and dummy data. Then we calculate the width of the left two columns and half of the middle-most column (which i know to be "SD", "D", and "N", respectively). I find the longest column, and use its width to calculate the difference needed for the other columns. Next, I insert this new buffer column into the first column position with a blank title (which felt gross, lemme tell you). For good measure, I also added a vertical line (axvline) behind the middle of the middle bar based on the advice of [2]. Finally, I adjust the x-axis to have the proper scale by offsetting its labels.
You might want more horizontal space on the left - you can easily do so by adding to "longest".
[2] Heiberger, Richard M., and Naomi B. Robbins. "Design of diverging stacked bar charts for Likert scales and other applications." Journal of Statistical Software 57.5 (2014): 1-32.
I too recently needed to make a divergent bar chart for some Likert data. I took a slightly different approach than #austin-cory-bart.
I modified an example from the gallery instead and created this:
import numpy as np
import matplotlib.pyplot as plt
category_names = ['Strongly disagree', 'Disagree',
'Neither agree nor disagree', 'Agree', 'Strongly agree']
results = {
'Question 1': [10, 15, 17, 32, 26],
'Question 2': [26, 22, 29, 10, 13],
'Question 3': [35, 37, 7, 2, 19],
'Question 4': [32, 11, 9, 15, 33],
'Question 5': [21, 29, 5, 5, 40],
'Question 6': [8, 19, 5, 30, 38]
}
def survey(results, category_names):
"""
Parameters
----------
results : dict
A mapping from question labels to a list of answers per category.
It is assumed all lists contain the same number of entries and that
it matches the length of *category_names*. The order is assumed
to be from 'Strongly disagree' to 'Strongly aisagree'
category_names : list of str
The category labels.
"""
labels = list(results.keys())
data = np.array(list(results.values()))
data_cum = data.cumsum(axis=1)
middle_index = data.shape[1]//2
offsets = data[:, range(middle_index)].sum(axis=1) + data[:, middle_index]/2
# Color Mapping
category_colors = plt.get_cmap('coolwarm_r')(
np.linspace(0.15, 0.85, data.shape[1]))
fig, ax = plt.subplots(figsize=(10, 5))
# Plot Bars
for i, (colname, color) in enumerate(zip(category_names, category_colors)):
widths = data[:, i]
starts = data_cum[:, i] - widths - offsets
rects = ax.barh(labels, widths, left=starts, height=0.5,
label=colname, color=color)
# Add Zero Reference Line
ax.axvline(0, linestyle='--', color='black', alpha=.25)
# X Axis
ax.set_xlim(-90, 90)
ax.set_xticks(np.arange(-90, 91, 10))
ax.xaxis.set_major_formatter(lambda x, pos: str(abs(int(x))))
# Y Axis
ax.invert_yaxis()
# Remove spines
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['left'].set_visible(False)
# Ledgend
ax.legend(ncol=len(category_names), bbox_to_anchor=(0, 1),
loc='lower left', fontsize='small')
# Set Background Color
fig.set_facecolor('#FFFFFF')
return fig, ax
fig, ax = survey(results, category_names)
plt.show()

How to plot a grouped bar chart from multiple datasets

I am going through Think Stats and I would like to compare multiple data sets visually. I can see from the book examples that it is possible to generate an interleaved bar graph with a different color for each data set by using a module provided by the book author, how to obtain the same result in pyplot?
Call the bar function multiple times, one for each series. You can control the left position of the bars using the left parameter, and you can use this to prevent overlap.
Entirely untested code:
pyplot.bar( numpy.arange(10) * 2, data1, color = 'red' )
pyplot.bar( numpy.arange(10) * 2 + 1, data2, color = 'red' )
Data2 will be drawn shifted over the right compared to where data one will be drawn.
Matplotlib's example code for interleaved bar charts works nicely for arbitrary real-valued x coordinates (as mentioned by #db42).
However, if your x coordinates are categorical values (like in the case of dictionaries in the linked question), the conversion from categorical x coordinates to real x coordinates is cumbersome and unnecessary.
You can plot two dictionaries side-by-side directly using matplotlib's api. The trick for plotting two bar charts with an offset to each other is to set align=edge and a positive width (+width) for plotting one bar chart, whereas a negative width (-width) for plotting the other one.
The example code modified for plotting two dictionaries looks like the following then:
"""
========
Barchart
========
A bar plot with errorbars and height labels on individual bars
"""
import matplotlib.pyplot as plt
# Uncomment the following line if you use ipython notebook
# %matplotlib inline
width = 0.35 # the width of the bars
men_means = {'G1': 20, 'G2': 35, 'G3': 30, 'G4': 35, 'G5': 27}
men_std = {'G1': 2, 'G2': 3, 'G3': 4, 'G4': 1, 'G5': 2}
rects1 = plt.bar(men_means.keys(), men_means.values(), -width, align='edge',
yerr=men_std.values(), color='r', label='Men')
women_means = {'G1': 25, 'G2': 32, 'G3': 34, 'G4': 20, 'G5': 25}
women_std = {'G1': 3, 'G2': 5, 'G3': 2, 'G4': 3, 'G5': 3}
rects2 = plt.bar(women_means.keys(), women_means.values(), +width, align='edge',
yerr=women_std.values(), color='y', label='Women')
# add some text for labels, title and axes ticks
plt.xlabel('Groups')
plt.ylabel('Scores')
plt.title('Scores by group and gender')
plt.legend()
def autolabel(rects):
"""
Attach a text label above each bar displaying its height
"""
for rect in rects:
height = rect.get_height()
plt.text(rect.get_x() + rect.get_width()/2., 1.05*height,
'%d' % int(height),
ha='center', va='bottom')
autolabel(rects1)
autolabel(rects2)
plt.show()
The result:
I came across this problem a while ago and created a wrapper function that takes a 2D array and automatically creates a multi-barchart from it:
The code:
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import operator as o
import numpy as np
dpoints = np.array([['rosetta', '1mfq', 9.97],
['rosetta', '1gid', 27.31],
['rosetta', '1y26', 5.77],
['rnacomposer', '1mfq', 5.55],
['rnacomposer', '1gid', 37.74],
['rnacomposer', '1y26', 5.77],
['random', '1mfq', 10.32],
['random', '1gid', 31.46],
['random', '1y26', 18.16]])
fig = plt.figure()
ax = fig.add_subplot(111)
def barplot(ax, dpoints):
'''
Create a barchart for data across different categories with
multiple conditions for each category.
#param ax: The plotting axes from matplotlib.
#param dpoints: The data set as an (n, 3) numpy array
'''
# Aggregate the conditions and the categories according to their
# mean values
conditions = [(c, np.mean(dpoints[dpoints[:,0] == c][:,2].astype(float)))
for c in np.unique(dpoints[:,0])]
categories = [(c, np.mean(dpoints[dpoints[:,1] == c][:,2].astype(float)))
for c in np.unique(dpoints[:,1])]
# sort the conditions, categories and data so that the bars in
# the plot will be ordered by category and condition
conditions = [c[0] for c in sorted(conditions, key=o.itemgetter(1))]
categories = [c[0] for c in sorted(categories, key=o.itemgetter(1))]
dpoints = np.array(sorted(dpoints, key=lambda x: categories.index(x[1])))
# the space between each set of bars
space = 0.3
n = len(conditions)
width = (1 - space) / (len(conditions))
# Create a set of bars at each position
for i,cond in enumerate(conditions):
indeces = range(1, len(categories)+1)
vals = dpoints[dpoints[:,0] == cond][:,2].astype(np.float)
pos = [j - (1 - space) / 2. + i * width for j in indeces]
ax.bar(pos, vals, width=width, label=cond,
color=cm.Accent(float(i) / n))
# Set the x-axis tick labels to be equal to the categories
ax.set_xticks(indeces)
ax.set_xticklabels(categories)
plt.setp(plt.xticks()[1], rotation=90)
# Add the axis labels
ax.set_ylabel("RMSD")
ax.set_xlabel("Structure")
# Add a legend
handles, labels = ax.get_legend_handles_labels()
ax.legend(handles[::-1], labels[::-1], loc='upper left')
barplot(ax, dpoints)
plt.show()
If you're interested in what this function does and the logic behind it, here's a (shamelessly self-promoting) link to the blog post describing it.

Categories