My Python code is:
values = [234, 64, 54,10, 0, 1, 0, 9, 2, 1, 7, 7]
months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
'Jul','Aug','Sep','Oct', 'Nov','Dec']
colors = ['yellowgreen', 'red', 'gold', 'lightskyblue',
'white','lightcoral','blue','pink', 'darkgreen',
'yellow','grey','violet','magenta','cyan']
plt.pie(values, labels=labels, autopct='%1.1f%%', shadow=True,
colors=colors, startangle=90, radius=1.2)
plt.show()
Is it possible to show the labels "Jan", "Feb", "Mar", etc. and the percentages, either:
without overlapping, or
using an arrow mark?
Alternatively you can put the legends beside the pie graph:
import matplotlib.pyplot as plt
import numpy as np
x = np.char.array(['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct', 'Nov','Dec'])
y = np.array([234, 64, 54,10, 0, 1, 0, 9, 2, 1, 7, 7])
colors = ['yellowgreen','red','gold','lightskyblue','white','lightcoral','blue','pink', 'darkgreen','yellow','grey','violet','magenta','cyan']
porcent = 100.*y/y.sum()
patches, texts = plt.pie(y, colors=colors, startangle=90, radius=1.2)
labels = ['{0} - {1:1.2f} %'.format(i,j) for i,j in zip(x, porcent)]
sort_legend = True
if sort_legend:
patches, labels, dummy = zip(*sorted(zip(patches, labels, y),
key=lambda x: x[2],
reverse=True))
plt.legend(patches, labels, loc='left center', bbox_to_anchor=(-0.1, 1.),
fontsize=8)
plt.savefig('piechart.png', bbox_inches='tight')
EDIT: if you want to keep the legend in the original order, as you mentioned in the comments, you can set sort_legend=False in the code above, giving:
If anyone just wants to offset the labels automatically, and not use a legend, I wrote this function that does it (yup I'm a real try-hard). It uses numpy but could easily be re-written in pure python.
import numpy as np
def fix_labels(mylabels, tooclose=0.1, sepfactor=2):
vecs = np.zeros((len(mylabels), len(mylabels), 2))
dists = np.zeros((len(mylabels), len(mylabels)))
for i in range(0, len(mylabels)-1):
for j in range(i+1, len(mylabels)):
a = np.array(mylabels[i].get_position())
b = np.array(mylabels[j].get_position())
dists[i,j] = np.linalg.norm(a-b)
vecs[i,j,:] = a-b
if dists[i,j] < tooclose:
mylabels[i].set_x(a[0] + sepfactor*vecs[i,j,0])
mylabels[i].set_y(a[1] + sepfactor*vecs[i,j,1])
mylabels[j].set_x(b[0] - sepfactor*vecs[i,j,0])
mylabels[j].set_y(b[1] - sepfactor*vecs[i,j,1])
So use it like:
wedges, labels, autopct = ax1.pie(sizes, labels=groups, autopct='%1.1f%%',
shadow=False, startangle=90)
fix_labels(autopct, sepfactor=3)
fix_labels(labels, sepfactor=2)
This works well as-written if you only have a few labels overlapping. If you have a whole bunch like OP, you might want to add a random direction vector to the vecs[i,j,:] = a-b line. That would probably work well.
Try tightlayout.
plt.tight_layout()
at the end of your code. It may prevent the overlap a little bit.
First of all; avoid pie charts whenever you can!
Secondly, have a think about how objects work in python. I believe this example should be self-explaining, however, you obviously don't need to move labels manually.
from matplotlib import pyplot as plt
fig, ax = plt.subplots()
ax.axis('equal')
patches, texts, autotexts = ax.pie([12,6,2,3],
labels=['A', 'B', 'C', 'no data'],
autopct='%1.1f%%',
pctdistance=0.5,
labeldistance=1.1)
# Move a label
texts[1]._x =-0.5
texts[1]._y =+0.5
# E.g. change some formatting
texts[-1]._color = 'blue'
There are some options to modify the labels:
# Check all options
print(texts[0].__dict__)
returns
{'_stale': False,
'stale_callback': <function matplotlib.artist._stale_axes_callback(self, val)>,
'_axes': <AxesSubplot:>,
'figure': <Figure size 432x288 with 1 Axes>,
'_transform': <matplotlib.transforms.CompositeGenericTransform at 0x7fe09bedf210>,
'_transformSet': True,
'_visible': True,
'_animated': False,
'_alpha': None,
'clipbox': <matplotlib.transforms.TransformedBbox at 0x7fe065d3dd50>,
'_clippath': None,
'_clipon': False,
'_label': '',
'_picker': None,
'_contains': None,
'_rasterized': None,
'_agg_filter': None,
'_mouseover': False,
'eventson': False,
'_oid': 0,
'_propobservers': {},
'_remove_method': <function list.remove(value, /)>,
'_url': None,
'_gid': None,
'_snap': None,
'_sketch': None,
'_path_effects': [],
'_sticky_edges': _XYPair(x=[], y=[]),
'_in_layout': True,
'_x': -0.07506663683168735,
'_y': 1.097435647331897,
'_text': 'A',
'_color': 'black',
'_fontproperties': <matplotlib.font_manager.FontProperties at 0x7fe065d3db90>,
'_usetex': False,
'_wrap': False,
'_verticalalignment': 'center',
'_horizontalalignment': 'right',
'_multialignment': None,
'_rotation': 'horizontal',
'_bbox_patch': None,
'_renderer': <matplotlib.backends.backend_agg.RendererAgg at 0x7fe08b01fd90>,
'_linespacing': 1.2,
'_rotation_mode': None}
Related
I'm trying to create pie charts with matplotlib in which the colour of each category is fixed.
I've got a function which creates a pie chart from sets of value and category data. Here's one example:
Category Value
TI 65
Con 43
FR 40
TraI 40
Bug 38
Data 22
Int 15
KB 12
Other 8
Dep 7
PW 6
Uns 5
Perf 4
Dep 3
The problem is that the data differs from one instance to another, and that in turn changes the order of the categories. Thus, each category gets labelled a different colour each time I generate a chart. I could sort the data alphabetically every time, but that causes two problems: some categories are missing from some datasets, and I'd prefer it sorted by size anyway so that the smallest wedges are oriented horizontally.
How can I set matplotlib to assign colours depending on, say, the index of a pandas.Series?
Here's the code that I'm using to generate a pie chart:
import matplotlib.pyplot as plt
slices = [62, 39, 39, 38, 37, 21, 15, 9, 6, 7, 6, 5, 4, 3]
cmap = plt.cm.prism
colors = cmap(np.linspace(0., 1., len(slices)))
labels = [u'TI', u'Con', u'FR', u'TraI', u'Bug', u'Data', u'Int', u'KB', u'Other', u'Dep', u'PW', u'Uns', u'Perf', u'Dep']
fig = plt.figure(figsize=[10, 10])
ax = fig.add_subplot(111)
pie_wedge_collection = ax.pie(slices, colors=colors, labels=labels, labeldistance=1.05, autopct=make_autopct(slices))
for pie_wedge in pie_wedge_collection[0]:
pie_wedge.set_edgecolor('white')
titlestring = 'Issues'
ax.set_title(titlestring)
EDIT: I forgot to explain the autopct function, it's for adding value and percentage labels:
def make_autopct(values):
def my_autopct(pct):
total = sum(values)
val = int(round(pct*total/100.0))
return '{p:.2f}% ({v:d})'.format(p=pct,v=val)
return my_autopct
Here is a simpler solution to #tmdavison's answer.
Let's first see the problem with an MWE:
import matplotlib.pyplot as plt
labels = ['Frogs', 'Hogs', 'Dogs', 'Logs']
sizes = [15, 30, 45, 10]
fig, ax = plt.subplots(1, 2)
ax[0].pie(sizes, labels=labels)
ax[1].pie(sizes[1:], labels=labels[1:])
This produces the problem plots:
The problem is that in the left-hand plot, Hogs is coloured in orange, but in the right-hand plot Hogs is coloured in blue (with a similar mix-up for Logs and Dogs).
We would like the colours for the labels to be the same across both plots. We can do this by specifying a dictionary of colours to use:
labels = ['Frogs', 'Hogs', 'Dogs', 'Logs']
sizes = [15, 30, 45, 10]
colours = {'Frogs': 'C0',
'Hogs': 'C1',
'Dogs': 'C2',
'Logs': 'C3'}
fig, ax = plt.subplots(1, 2)
ax[0].pie(sizes,
labels=labels,
colors=[colours[key] for key in labels])
ax[1].pie(sizes[1:],
labels=labels[1:],
colors=[colours[key] for key in labels[1:]])
This works to create the plot:
Here we see that the labels are represented by the same colours across both plots, as desired.
If you have lots of categories it can be cumbersome to manually set a colour for each category. In this case you could construct the colours dictionary as:
colours = dict(zip(labels, plt.cm.tab10.colors[:len(labels)]))
If you have more than 10 categories you would instead use:
colours = dict(zip(labels, plt.cm.tab20.colors[:len(labels)]))
Here's an idea you could try. Make a dictionary from your labels and colors, so each color is mapped to a label. Then, after making the pie chart, go in an assign the facecolor of the wedge using this dictionary.
Here's an untested bit of code which might do what you are looking for:
import numpy as np
import matplotlib.pyplot as plt
def mypie(slices,labels,colors):
colordict={}
for l,c in zip(labels,colors):
print l,c
colordict[l]=c
fig = plt.figure(figsize=[10, 10])
ax = fig.add_subplot(111)
pie_wedge_collection = ax.pie(slices, labels=labels, labeldistance=1.05)#, autopct=make_autopct(slices))
for pie_wedge in pie_wedge_collection[0]:
pie_wedge.set_edgecolor('white')
pie_wedge.set_facecolor(colordict[pie_wedge.get_label()])
titlestring = 'Issues'
ax.set_title(titlestring)
return fig,ax,pie_wedge_collection
slices = [37, 39, 39, 38, 62, 21, 15, 9, 6, 7, 6, 5, 4, 3]
cmap = plt.cm.prism
colors = cmap(np.linspace(0., 1., len(slices)))
labels = [u'TI', u'Con', u'FR', u'TraI', u'Bug', u'Data', u'Int', u'KB', u'Other', u'Dep', u'PW', u'Uns', u'Perf', u'Dep']
fig,ax,pie_wedge_collection = mypie(slices,labels,colors)
plt.show()
I have a script where I am plotting several variables in a grid of 5x1. I have noticed that when I have data that makes my legend shorter, the subplots themselves have an acceptable height and horizontal padding. When I have data that makes my legend bigger (vertically) the subplots are squashed leaving extra horizontal padding between the plots.
Is there a way to prevent this? To separate the legend assignment from the axes object and draw each plot independent of legend spacing?
Below is a minimal reproducible example to show what I mean:
#!/usr/bin/env python3
from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
def plotter(df, var_cols):
dfa = df.query("accepted == 'accepted'")
dfr = df.query("accepted != 'accepted'")
colors = {v: c for v, c in zip(['accepted', 'rejected', 'rerun'],
['darkgreen', 'firebrick', 'steelblue'])}
fig, axes = plt.subplots(nrows=len(var_cols), sharex=True)
for var, ax in zip(var_cols, axes):
for k, d in dfr.groupby('accepted'):
ax.scatter(d.iteration, d[var], label=k, alpha=0.8, c=d.accepted.map(colors))
ax.plot(dfa.iteration, dfa[var], '-o', label='accepted', color=colors['accepted'])
# Grab 3rd axes because I want the legend to be towards the center
handles, labels = axes[2].get_legend_handles_labels()
# Sort legend labels to put 'accepted' on top
labels, handles = zip(*sorted(zip(labels, handles), key=lambda t: t[0]))
axes[2].legend(handles, labels, markerscale=1.2, bbox_to_anchor=(1, 0.5))
fig.tight_layout()
def main():
states = {1: 'accepted', 2: 'rejected', 3: 'rerun'}
np.random.seed(666)
dat1 = pd.DataFrame({
'iteration': [0, 1, 2],
'accepted': ['accepted']*3,
'h_cap': [10.1, 6.5, 12.2],
'h_stor': [500, 410, 0],
'h_mark': [10, 6, 1],
'bid': [500, 100, 50],
'npv': [2.278, 2.6, 2.85]
})
dat2 = pd.DataFrame({
'iteration': range(10),
'accepted': [states[num] for num in np.random.randint(1, 4, size=10)],
'h_cap': np.random.rand(10),
'h_stor': np.random.rand(10),
'h_mark': np.random.rand(10),
'bid': np.random.rand(10),
'npv': np.random.rand(10)
})
var_cols = ['h_cap', 'h_stor', 'h_mark', 'bid', 'npv']
plotter(dat1, var_cols)
plt.savefig(Path('~/Desktop/nonsmushed.png').expanduser())
plotter(dat2, var_cols)
plt.savefig(Path('~/Desktop/smushed.png').expanduser())
if __name__ == '__main__':
main()
nonsmushed.png
smushed.png
Because your legend "belongs" to axes[2], tight_layout() adjusts the spacing so that the adjacent axes don't cover the legend.
I think the simplest solution would be to create a "figure-level" legend (fig.legend()), but the problem with that is that tight_layout() doesn't account for that legend, and you will have to adjust the right margin by hand (there might be a way to calculate it automatically if needed, but that might get messy)
(...)
labels, handles = zip(*sorted(zip(labels, handles), key=lambda t: t[0]))
fig.legend(handles, labels, markerscale=1.2, bbox_to_anchor=(1, 0.5))
fig.tight_layout()
fig.subplots_adjust(right=0.75) # adjust value as needed
Something like this:
There is a very good package to do it in R. In python, the best that I could figure out is this, using the squarify package (inspired by a post on how to do treemaps):
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns # just to have better line color and width
import squarify
# for those using jupyter notebooks
%matplotlib inline
df = pd.DataFrame({
'v1': np.ones(100),
'v2': np.random.randint(1, 4, 100)})
df.sort_values(by='v2', inplace=True)
# color scale
cmap = mpl.cm.Accent
mini, maxi = df['v2'].min(), df['v2'].max()
norm = mpl.colors.Normalize(vmin=mini, vmax=maxi)
colors = [cmap(norm(value)) for value in df['v2']]
# figure
fig = plt.figure()
ax = fig.add_subplot(111, aspect="equal")
ax = squarify.plot(df['v1'], color=colors, ax=ax)
ax.set_xticks([])
ax.set_yticks([]);
But when I create not 100 but 200 elements (or other non-square numbers), the squares become misaligned.
Another problem is that if I change v2 to some categorical variable (e.g., a hundred As, Bs, Cs and Ds), I get this error:
could not convert string to float: 'a'
So, could anyone help me with these two questions:
how can I solve the alignment problem with non-square numbers of observations?
how can use categorical variables in v2?
Beyond this, I am really open if there are any other python packages that can create waffle plots more efficiently.
I spent a few days to build a more general solution, PyWaffle.
You can install it through
pip install pywaffle
The source code: https://github.com/gyli/PyWaffle
PyWaffle does not use matshow() method, but builds those squares one by one. That makes it easier for customization. Besides, what it provides is a custom Figure class, which returns a figure object. By updating attributes of the figure, you can basically control everything in the chart.
Some examples:
Colored or transparent background:
import matplotlib.pyplot as plt
from pywaffle import Waffle
data = {'Democratic': 48, 'Republican': 46, 'Libertarian': 3}
fig = plt.figure(
FigureClass=Waffle,
rows=5,
values=data,
colors=("#983D3D", "#232066", "#DCB732"),
title={'label': 'Vote Percentage in 2016 US Presidential Election', 'loc': 'left'},
labels=["{0} ({1}%)".format(k, v) for k, v in data.items()],
legend={'loc': 'lower left', 'bbox_to_anchor': (0, -0.4), 'ncol': len(data), 'framealpha': 0}
)
fig.gca().set_facecolor('#EEEEEE')
fig.set_facecolor('#EEEEEE')
plt.show()
Use icons replacing squares:
data = {'Democratic': 48, 'Republican': 46, 'Libertarian': 3}
fig = plt.figure(
FigureClass=Waffle,
rows=5,
values=data,
colors=("#232066", "#983D3D", "#DCB732"),
legend={'loc': 'upper left', 'bbox_to_anchor': (1, 1)},
icons='child', icon_size=18,
icon_legend=True
)
Multiple subplots in one chart:
import pandas as pd
data = pd.DataFrame(
{
'labels': ['Hillary Clinton', 'Donald Trump', 'Others'],
'Virginia': [1981473, 1769443, 233715],
'Maryland': [1677928, 943169, 160349],
'West Virginia': [188794, 489371, 36258],
},
).set_index('labels')
fig = plt.figure(
FigureClass=Waffle,
plots={
'311': {
'values': data['Virginia'] / 30000,
'labels': ["{0} ({1})".format(n, v) for n, v in data['Virginia'].items()],
'legend': {'loc': 'upper left', 'bbox_to_anchor': (1.05, 1), 'fontsize': 8},
'title': {'label': '2016 Virginia Presidential Election Results', 'loc': 'left'}
},
'312': {
'values': data['Maryland'] / 30000,
'labels': ["{0} ({1})".format(n, v) for n, v in data['Maryland'].items()],
'legend': {'loc': 'upper left', 'bbox_to_anchor': (1.2, 1), 'fontsize': 8},
'title': {'label': '2016 Maryland Presidential Election Results', 'loc': 'left'}
},
'313': {
'values': data['West Virginia'] / 30000,
'labels': ["{0} ({1})".format(n, v) for n, v in data['West Virginia'].items()],
'legend': {'loc': 'upper left', 'bbox_to_anchor': (1.3, 1), 'fontsize': 8},
'title': {'label': '2016 West Virginia Presidential Election Results', 'loc': 'left'}
},
},
rows=5,
colors=("#2196f3", "#ff5252", "#999999"), # Default argument values for subplots
figsize=(9, 5) # figsize is a parameter of plt.figure
)
I've put together a working example, below, which I think meets your needs. Some work is needed to fully generalize the approach, but I think you'll find that it's a good start. The trick was to use matshow() to solve your non-square problem, and to build a custom legend to easily account for categorical values.
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
# Let's make a default data frame with catagories and values.
df = pd.DataFrame({ 'catagories': ['cat1', 'cat2', 'cat3', 'cat4'],
'values': [84911, 14414, 10062, 8565] })
# Now, we define a desired height and width.
waffle_plot_width = 20
waffle_plot_height = 7
classes = df['catagories']
values = df['values']
def waffle_plot(classes, values, height, width, colormap):
# Compute the portion of the total assigned to each class.
class_portion = [float(v)/sum(values) for v in values]
# Compute the number of tiles for each catagories.
total_tiles = width * height
tiles_per_class = [round(p*total_tiles) for p in class_portion]
# Make a dummy matrix for use in plotting.
plot_matrix = np.zeros((height, width))
# Popoulate the dummy matrix with integer values.
class_index = 0
tile_index = 0
# Iterate over each tile.
for col in range(waffle_plot_width):
for row in range(height):
tile_index += 1
# If the number of tiles populated is sufficient for this class...
if tile_index > sum(tiles_per_class[0:class_index]):
# ...increment to the next class.
class_index += 1
# Set the class value to an integer, which increases with class.
plot_matrix[row, col] = class_index
# Create a new figure.
fig = plt.figure()
# Using matshow solves your "non-square" problem.
plt.matshow(plot_matrix, cmap=colormap)
plt.colorbar()
# Get the axis.
ax = plt.gca()
# Minor ticks
ax.set_xticks(np.arange(-.5, (width), 1), minor=True);
ax.set_yticks(np.arange(-.5, (height), 1), minor=True);
# Gridlines based on minor ticks
ax.grid(which='minor', color='w', linestyle='-', linewidth=2)
# Manually constructing a legend solves your "catagorical" problem.
legend_handles = []
for i, c in enumerate(classes):
lable_str = c + " (" + str(values[i]) + ")"
color_val = colormap(float(i+1)/len(classes))
legend_handles.append(mpatches.Patch(color=color_val, label=lable_str))
# Add the legend. Still a bit of work to do here, to perfect centering.
plt.legend(handles=legend_handles, loc=1, ncol=len(classes),
bbox_to_anchor=(0., -0.1, 0.95, .10))
plt.xticks([])
plt.yticks([])
# Call the plotting function.
waffle_plot(classes, values, waffle_plot_height, waffle_plot_width,
plt.cm.coolwarm)
Below is an example of the output this script produced. As you can see, it works fairly well for me, and meets all of your stated needs. Just let me know if it gives you any trouble. Enjoy!
You can use this function for automatic creation of a waffle with simple parameters:
def create_waffle_chart(categories, values, height, width, colormap, value_sign=''):
# compute the proportion of each category with respect to the total
total_values = sum(values)
category_proportions = [(float(value) / total_values) for value in values]
# compute the total number of tiles
total_num_tiles = width * height # total number of tiles
print ('Total number of tiles is', total_num_tiles)
# compute the number of tiles for each catagory
tiles_per_category = [round(proportion * total_num_tiles) for proportion in category_proportions]
# print out number of tiles per category
for i, tiles in enumerate(tiles_per_category):
print (df_dsn.index.values[i] + ': ' + str(tiles))
# initialize the waffle chart as an empty matrix
waffle_chart = np.zeros((height, width))
# define indices to loop through waffle chart
category_index = 0
tile_index = 0
# populate the waffle chart
for col in range(width):
for row in range(height):
tile_index += 1
# if the number of tiles populated for the current category
# is equal to its corresponding allocated tiles...
if tile_index > sum(tiles_per_category[0:category_index]):
# ...proceed to the next category
category_index += 1
# set the class value to an integer, which increases with class
waffle_chart[row, col] = category_index
# instantiate a new figure object
fig = plt.figure()
# use matshow to display the waffle chart
colormap = plt.cm.coolwarm
plt.matshow(waffle_chart, cmap=colormap)
plt.colorbar()
# get the axis
ax = plt.gca()
# set minor ticks
ax.set_xticks(np.arange(-.5, (width), 1), minor=True)
ax.set_yticks(np.arange(-.5, (height), 1), minor=True)
# add dridlines based on minor ticks
ax.grid(which='minor', color='w', linestyle='-', linewidth=2)
plt.xticks([])
plt.yticks([])
# compute cumulative sum of individual categories to match color schemes between chart and legend
values_cumsum = np.cumsum(values)
total_values = values_cumsum[len(values_cumsum) - 1]
# create legend
legend_handles = []
for i, category in enumerate(categories):
if value_sign == '%':
label_str = category + ' (' + str(values[i]) + value_sign + ')'
else:
label_str = category + ' (' + value_sign + str(values[i]) + ')'
color_val = colormap(float(values_cumsum[i])/total_values)
legend_handles.append(mpatches.Patch(color=color_val, label=label_str))
# add legend to chart
plt.legend(
handles=legend_handles,
loc='lower center',
ncol=len(categories),
bbox_to_anchor=(0., -0.2, 0.95, .1)
)
I'm trying to create pie charts with matplotlib in which the colour of each category is fixed.
I've got a function which creates a pie chart from sets of value and category data. Here's one example:
Category Value
TI 65
Con 43
FR 40
TraI 40
Bug 38
Data 22
Int 15
KB 12
Other 8
Dep 7
PW 6
Uns 5
Perf 4
Dep 3
The problem is that the data differs from one instance to another, and that in turn changes the order of the categories. Thus, each category gets labelled a different colour each time I generate a chart. I could sort the data alphabetically every time, but that causes two problems: some categories are missing from some datasets, and I'd prefer it sorted by size anyway so that the smallest wedges are oriented horizontally.
How can I set matplotlib to assign colours depending on, say, the index of a pandas.Series?
Here's the code that I'm using to generate a pie chart:
import matplotlib.pyplot as plt
slices = [62, 39, 39, 38, 37, 21, 15, 9, 6, 7, 6, 5, 4, 3]
cmap = plt.cm.prism
colors = cmap(np.linspace(0., 1., len(slices)))
labels = [u'TI', u'Con', u'FR', u'TraI', u'Bug', u'Data', u'Int', u'KB', u'Other', u'Dep', u'PW', u'Uns', u'Perf', u'Dep']
fig = plt.figure(figsize=[10, 10])
ax = fig.add_subplot(111)
pie_wedge_collection = ax.pie(slices, colors=colors, labels=labels, labeldistance=1.05, autopct=make_autopct(slices))
for pie_wedge in pie_wedge_collection[0]:
pie_wedge.set_edgecolor('white')
titlestring = 'Issues'
ax.set_title(titlestring)
EDIT: I forgot to explain the autopct function, it's for adding value and percentage labels:
def make_autopct(values):
def my_autopct(pct):
total = sum(values)
val = int(round(pct*total/100.0))
return '{p:.2f}% ({v:d})'.format(p=pct,v=val)
return my_autopct
Here is a simpler solution to #tmdavison's answer.
Let's first see the problem with an MWE:
import matplotlib.pyplot as plt
labels = ['Frogs', 'Hogs', 'Dogs', 'Logs']
sizes = [15, 30, 45, 10]
fig, ax = plt.subplots(1, 2)
ax[0].pie(sizes, labels=labels)
ax[1].pie(sizes[1:], labels=labels[1:])
This produces the problem plots:
The problem is that in the left-hand plot, Hogs is coloured in orange, but in the right-hand plot Hogs is coloured in blue (with a similar mix-up for Logs and Dogs).
We would like the colours for the labels to be the same across both plots. We can do this by specifying a dictionary of colours to use:
labels = ['Frogs', 'Hogs', 'Dogs', 'Logs']
sizes = [15, 30, 45, 10]
colours = {'Frogs': 'C0',
'Hogs': 'C1',
'Dogs': 'C2',
'Logs': 'C3'}
fig, ax = plt.subplots(1, 2)
ax[0].pie(sizes,
labels=labels,
colors=[colours[key] for key in labels])
ax[1].pie(sizes[1:],
labels=labels[1:],
colors=[colours[key] for key in labels[1:]])
This works to create the plot:
Here we see that the labels are represented by the same colours across both plots, as desired.
If you have lots of categories it can be cumbersome to manually set a colour for each category. In this case you could construct the colours dictionary as:
colours = dict(zip(labels, plt.cm.tab10.colors[:len(labels)]))
If you have more than 10 categories you would instead use:
colours = dict(zip(labels, plt.cm.tab20.colors[:len(labels)]))
Here's an idea you could try. Make a dictionary from your labels and colors, so each color is mapped to a label. Then, after making the pie chart, go in an assign the facecolor of the wedge using this dictionary.
Here's an untested bit of code which might do what you are looking for:
import numpy as np
import matplotlib.pyplot as plt
def mypie(slices,labels,colors):
colordict={}
for l,c in zip(labels,colors):
print l,c
colordict[l]=c
fig = plt.figure(figsize=[10, 10])
ax = fig.add_subplot(111)
pie_wedge_collection = ax.pie(slices, labels=labels, labeldistance=1.05)#, autopct=make_autopct(slices))
for pie_wedge in pie_wedge_collection[0]:
pie_wedge.set_edgecolor('white')
pie_wedge.set_facecolor(colordict[pie_wedge.get_label()])
titlestring = 'Issues'
ax.set_title(titlestring)
return fig,ax,pie_wedge_collection
slices = [37, 39, 39, 38, 62, 21, 15, 9, 6, 7, 6, 5, 4, 3]
cmap = plt.cm.prism
colors = cmap(np.linspace(0., 1., len(slices)))
labels = [u'TI', u'Con', u'FR', u'TraI', u'Bug', u'Data', u'Int', u'KB', u'Other', u'Dep', u'PW', u'Uns', u'Perf', u'Dep']
fig,ax,pie_wedge_collection = mypie(slices,labels,colors)
plt.show()
I have lists of data indicating responses to likert questions with a one (very unhappy) to five (very happy) scale. I would like to create a page of plots showing these lists as skewed stacked horizontal bar charts. The lists of responses can be of different sizes (e.g. when someone has opted out of answering a particular question). Here is a minimal example of the data:
likert1 = [1.0, 2.0, 1.0, 2.0, 1.0, 3.0, 3.0, 4.0, 4.0, 1.0, 1.0]
likert2 = [5.0, 4.0, 5.0, 4.0, 5.0, 3.0]
I would like to be able to plot this with something like:
plot_many_likerts(likert1, likert2)
At the moment I've written a function to iterate over the lists, and plot each one as its own subplot on a shared figure in matplotlib:
def plot_many_likerts(*lsts):
#get the figure and the list of axes for this plot
fig, axlst = plt.subplots(len(lsts), sharex=True)
for i in range(len(lsts)):
likert_horizontal_bar_list(lsts[i], axlst[i], xaxis=[1.0, 2.0, 3.0, 4.0, 5.0])
axlst[i].axis('off')
fig.show()
def likert_horizontal_bar_list(lst, ax, xaxis):
cnt = Counter(lst)
#del (cnt[None])
i = 0
colour_float = 0.00001
previous_right = 0
for key in sorted(xaxis):
ax.barh(bottom=0, width=cnt[key], height=0.4, left=previous_right, color=plt.cm.jet(colour_float),label=str(key))
i += 1
previous_right = previous_right + cnt[key]
colour_float = float(i) / float(len(xaxis))
This works not badly and create stacked bar charts all with the same representative sizes (e.g. the widths share common axis scales). Here is a screen shot:
What is currently Produced http://s7.postimg.org/vh0j816gn/figure_1.jpg
What I would like is to have these two plots centered on midpoints of the mode of the datasets (the datasets will have the same range). For instance:
What I would like to see http://s29.postimg.org/z0qwv4ryr/figure_2.jpg
Suggestions on how I might do this?
I needed to make a divergent bar chart for some likert data. I was using pandas, but the approach would probably be similar without it. The key mechanism is to add in an invisible buffer at the start.
likert_colors = ['white', 'firebrick','lightcoral','gainsboro','cornflowerblue', 'darkblue']
dummy = pd.DataFrame([[1,2,3,4, 5], [5,6,7,8, 5], [10, 4, 2, 10, 5]],
columns=["SD", "D", "N", "A", "SA"],
index=["Key 1", "Key B", "Key III"])
middles = dummy[["SD", "D"]].sum(axis=1)+dummy["N"]*.5
longest = middles.max()
complete_longest = dummy.sum(axis=1).max()
dummy.insert(0, '', (middles - longest).abs())
dummy.plot.barh(stacked=True, color=likert_colors, edgecolor='none', legend=False)
z = plt.axvline(longest, linestyle='--', color='black', alpha=.5)
z.set_zorder(-1)
plt.xlim(0, complete_longest)
xvalues = range(0,complete_longest,10)
xlabels = [str(x-longest) for x in xvalues]
plt.xticks(xvalues, xlabels)
plt.show()
There are many limitations to this approach. First, bars no longer get a black outline, and the legend will have an extra blank element. I just hid the legend (I figure there's probably a way to hide just the individual element). I'm not sure of a convenient way to make the bars have an outline without also adding the outline to the buffer element.
First, we establish some colors and dummy data. Then we calculate the width of the left two columns and half of the middle-most column (which i know to be "SD", "D", and "N", respectively). I find the longest column, and use its width to calculate the difference needed for the other columns. Next, I insert this new buffer column into the first column position with a blank title (which felt gross, lemme tell you). For good measure, I also added a vertical line (axvline) behind the middle of the middle bar based on the advice of [2]. Finally, I adjust the x-axis to have the proper scale by offsetting its labels.
You might want more horizontal space on the left - you can easily do so by adding to "longest".
[2] Heiberger, Richard M., and Naomi B. Robbins. "Design of diverging stacked bar charts for Likert scales and other applications." Journal of Statistical Software 57.5 (2014): 1-32.
I too recently needed to make a divergent bar chart for some Likert data. I took a slightly different approach than #austin-cory-bart.
I modified an example from the gallery instead and created this:
import numpy as np
import matplotlib.pyplot as plt
category_names = ['Strongly disagree', 'Disagree',
'Neither agree nor disagree', 'Agree', 'Strongly agree']
results = {
'Question 1': [10, 15, 17, 32, 26],
'Question 2': [26, 22, 29, 10, 13],
'Question 3': [35, 37, 7, 2, 19],
'Question 4': [32, 11, 9, 15, 33],
'Question 5': [21, 29, 5, 5, 40],
'Question 6': [8, 19, 5, 30, 38]
}
def survey(results, category_names):
"""
Parameters
----------
results : dict
A mapping from question labels to a list of answers per category.
It is assumed all lists contain the same number of entries and that
it matches the length of *category_names*. The order is assumed
to be from 'Strongly disagree' to 'Strongly aisagree'
category_names : list of str
The category labels.
"""
labels = list(results.keys())
data = np.array(list(results.values()))
data_cum = data.cumsum(axis=1)
middle_index = data.shape[1]//2
offsets = data[:, range(middle_index)].sum(axis=1) + data[:, middle_index]/2
# Color Mapping
category_colors = plt.get_cmap('coolwarm_r')(
np.linspace(0.15, 0.85, data.shape[1]))
fig, ax = plt.subplots(figsize=(10, 5))
# Plot Bars
for i, (colname, color) in enumerate(zip(category_names, category_colors)):
widths = data[:, i]
starts = data_cum[:, i] - widths - offsets
rects = ax.barh(labels, widths, left=starts, height=0.5,
label=colname, color=color)
# Add Zero Reference Line
ax.axvline(0, linestyle='--', color='black', alpha=.25)
# X Axis
ax.set_xlim(-90, 90)
ax.set_xticks(np.arange(-90, 91, 10))
ax.xaxis.set_major_formatter(lambda x, pos: str(abs(int(x))))
# Y Axis
ax.invert_yaxis()
# Remove spines
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['left'].set_visible(False)
# Ledgend
ax.legend(ncol=len(category_names), bbox_to_anchor=(0, 1),
loc='lower left', fontsize='small')
# Set Background Color
fig.set_facecolor('#FFFFFF')
return fig, ax
fig, ax = survey(results, category_names)
plt.show()