I'm trying to make my subplots share the same axis, as they're currently different (it appears the circles in the plot itself are not perfectly aligned). When I try to pass sharex=True into ax = plt.subplot(1, 5, row+1, polar=True, sharex=True) I return an error suggesting TypeError: cannot create weak reference to 'bool' object.
Here is what my plot currently looks like, as you should be able to see, the axis (circles) inside the plot are not perfectly aligned, and I cannot work out how to align them using plt.subplot.
Does anybody have any recommendations?
Code to reproduce example:
import matplotlib.pyplot as plt
import pandas as pd
def make_spider(row, title, color):
import math
categories = list(df)
N = len(categories)
angles = [n / float(N) * 2 * math.pi for n in range(N)]
angles += angles[:1]
ax = plt.subplot(1, 5, row+1, polar=True)
plt.xticks(angles[:-1], categories, color='grey', size=8)
values = df.iloc[row].values.flatten().tolist()
values += values[:1]
ax.plot(angles, values, color=color, linewidth=2, linestyle='solid')
ax.fill(angles, values, color=color, alpha = .4)
plt.gca().set_rmax(.2)
my_dpi = 40
plt.figure(figsize=(1000/my_dpi, 1000/my_dpi), dpi=96)
my_palette = plt.cm.get_cmap('Set2', len(df.index)+1)
for row in range(0, len(df.index)):
make_spider( row = row, title='Cluster: ' + str(row), color=my_palette(row) )
Dataframe:
df = pd.DataFrame.from_dict({"no_rooms":{"0":-0.3470532925,"1":-0.082144001,"2":-0.082144001,"3":-0.3470532925,"4":-0.3470532925},"total_area":{"0":-0.1858487321,"1":-0.1685491141,"2":-0.1632483955,"3":-0.1769700284,"4":-0.0389887094},"car_park_spaces":{"0":-0.073703681,"1":-0.073703681,"2":-0.073703681,"3":-0.073703681,"4":-0.073703681},"house_price":{"0":-0.2416123064,"1":-0.2841806825,"2":-0.259622004,"3":-0.3529449824,"4":-0.3414842657},"pop_density":{"0":-0.1271390651,"1":-0.3105853643,"2":-0.2316607937,"3":-0.3297832328,"4":-0.4599021194},"business_rate":{"0":-0.1662745006,"1":-0.1426329043,"2":-0.1577528867,"3":-0.163560133,"4":-0.1099718326},"noqual_pc":{"0":-0.0251535462,"1":-0.1540641646,"2":-0.0204666924,"3":-0.0515740013,"4":-0.0445135996},"level4qual_pc":{"0":-0.0826103951,"1":-0.1777759951,"2":-0.114263357,"3":-0.1787044751,"4":-0.2709496389},"badhealth_pc":{"0":-0.105481688,"1":-0.1760349683,"2":-0.128215043,"3":-0.1560577648,"4":-0.1760349683}})
Best create the sharing a priori to plotting. The plot to the already shared axes.
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
dic = {"no_rooms":{"0":-0.347,"1":-0.082,"2":-0.082, "3":-0.347,"4":-0.347},
"total_area":{"0":-0.185,"1":-0.168,"2":-0.163, "3":-0.176,"4":-0.038},
"car_park_spaces":{"0":-0.073,"1":-0.073,"2":-0.073, "3":-0.073,"4":-0.073},
"house_price":{"0":-0.241,"1":-0.284,"2":-0.259,"3":-0.352,"4":-0.341},
"pop_density":{"0":-0.127,"1":-0.310,"2":-0.231,"3":-0.329,"4":-0.459},
"business_rate":{"0":-0.166,"1":-0.142,"2":-0.157,"3":-0.163,"4":-0.109},
"noqual_pc":{"0":-0.025,"1":-0.15,"2":-0.020,"3":-0.051,"4":-0.044},
"level4qual_pc":{"0":-0.082,"1":-0.17,"2":-0.114,"3":-0.178,"4":-0.270},
"badhealth_pc":{"0":-0.105,"1":-0.176,"2":-0.128,"3":-0.156,"4":-0.176}}
df = pd.DataFrame.from_dict(dic)
def make_spider(row, title, color, ax=None):
categories = list(df)
N = len(categories)
angles = np.arange(N+1)/N*2*np.pi
ax.set_xticks(angles[:-1])
ax.set_xticklabels(categories, color='grey', size=8)
ax.tick_params(labelleft=True)
values = df.iloc[row].values.flatten().tolist()
values += values[:1]
ax.plot(angles, values, color=color, linewidth=2, linestyle='solid')
ax.fill(angles, values, color=color, alpha = .4)
fig, axes = plt.subplots(ncols=len(df.index), subplot_kw=dict(polar=True), sharey=True,
figsize=(15,8))
my_palette = plt.cm.get_cmap('Set2', len(df.index)+1)
for row, ax in enumerate(axes):
make_spider( row = row, title='Cluster: ' + str(row), color=my_palette(row), ax=ax )
plt.show()
You have to set the same y_lim/r_lim and y_ticks/r_ticks to all axes. This can for example be done by passing the last ax reference to plt.subplot to set sharey for all axes:
def make_spider(row, title, color, last_ax=None):
import math
categories = list(df)
N = len(categories)
angles = [n / float(N) * 2 * math.pi for n in range(N)]
angles += angles[:1]
# add last ax as sharey here:
ax = plt.subplot(1, 5, row+1, polar=True, sharey=last_ax)
plt.xticks(angles[:-1], categories, color='grey', size=8)
values = df.iloc[row].values.flatten().tolist()
values += values[:1]
ax.plot(angles, values, color=color, linewidth=2, linestyle='solid')
ax.fill(angles, values, color=color, alpha = .4)
plt.gca().set_rmax(.2)
# return axes to store them
return plt.gca()
my_dpi = 40
plt.figure(figsize=(1000/my_dpi, 1000/my_dpi), dpi=96)
my_palette = plt.cm.get_cmap('Set2', len(df.index)+1)
axs = [] # store axes
for row in range(0, len(df.index)):
if row != 0: # if not the first subplot, pass last ax as argument
axs.append(
make_spider(row=row, title='Cluster: ' + str(row), color=my_palette(row),
last_ax=axs[row - 1]))
else:
axs.append(
make_spider(row=row, title='Cluster: ' + str(row), color=my_palette(row)))
OR by passing the limits/ticks to the plots directly:
def make_spider(row, title, color, rlim, rticks):
import math
categories = list(df)
N = len(categories)
angles = [n / float(N) * 2 * math.pi for n in range(N)]
angles += angles[:1]
ax = plt.subplot(1, 5, row+1, polar=True)
plt.xticks(angles[:-1], categories, color='grey', size=8)
values = df.iloc[row].values.flatten().tolist()
values += values[:1]
ax.plot(angles, values, color=color, linewidth=2, linestyle='solid')
ax.fill(angles, values, color=color, alpha = .4)
ax.set_rlim(rlim)
ax.set_rticks(rticks)
# return axes to store them (not needed but may help later)
return ax
my_dpi = 40
plt.figure(figsize=(1000/my_dpi, 1000/my_dpi), dpi=96)
my_palette = plt.cm.get_cmap('Set2', len(df.index)+1)
axs = []
for row in range(0, len(df.index)):
axs.append(
make_spider(
row=row, title='Cluster: ' + str(row), color=my_palette(row),
rlim=(-.5, 0), rticks=[-.3, -.2, -.1, 0.]))
Related
So I have this plot here:
What I want to do is to have every second element of yaxis to be coloured for example in blue and the rest in red.
Here is the result I want to get:
and here is the code I got:
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
mpl.rcParams['toolbar'] = 'None'
plt.style.use('fivethirtyeight')
result_7_s = amount * s_7_days
result_14_s = amount * s_14_days
result_21_s = amount * s_21_days
result_7_fc = amount * fc_7_days
result_14_fc = amount * fc_14_days
result_21_fc = amount * fc_21_days
final_y = np.array([int(result_7_s), int(result_14_s),
int(result_21_s), int(result_7_fc),
int(result_14_fc), int(result_21_fc)])
fig, ax = plt.subplots(num = 'Test')
x = np.array([7, 14, 21])
plt.xticks(ticks = x, labels = x)
plt.yticks(ticks = final_y, labels = final_y)
plt.title(f'Prices for {amount} people')
plt.xlabel('Days')
plt.ylabel('Price')
plt.tight_layout()
ax.bar(x - 0.5, final_y[:3], width=1, color='#444444', label='Standard')
ax.bar(x + 0.5, final_y[3:], width=1, color='#e5ae38', label='First Class')
ax.tick_params(axis='y', colors = 'blue') # <-------
ax.yaxis.set_major_formatter('{x}$')
plt.legend()
plt.savefig('result.png')
plt.show()
Iterate over the tick labels to apply the desired color to each one of them:
for n, tick_label in enumerate(ax.yaxis.get_ticklabels()):
tick_label.set_color("red" if n%2 else "blue")
Here is the solution I came with:
for i in range(0, 3):
plt.gca().get_yticklabels()[i].set_color('blue')
for i in range(3, 6):
plt.gca().get_yticklabels()[i].set_color('red')
I have two numeric arrays of equal length, with one array always having the element value >= to the corresponding (same index) element in the second array.
I am trying to visualize in a single graph:
i) difference between the corresponding elements,
ii) values of the corresponding elements in the two arrays.
I have tried plotting the CDF as below:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
arr1 = np.random.uniform(1,20,[25,1])
arr2 = arr1 + np.random.uniform(1,10,[25,1])
df1 = pd.DataFrame(arr1)
df2 = pd.DataFrame(arr2)
fix, ax = plt.subplots()
sns.kdeplot(df1[0], cumulative=True, color='orange', label='arr1')
sns.kdeplot(df2[0], cumulative=True, color='b', label='arr2')
sns.kdeplot(df2[0]-df1[0], cumulative=True, color='r', label='difference')
plt.show()
which gives the following output:
However, it does not capture the difference, and values of the corresponding elements together. For example, suppose the difference between two elements is 3. The two numbers can be 2 and 5, but they can also be 15 and 18, and this can not be determined from the CDF.
Which kind of plotting can visualize both the difference between the elements and the values of the elements?
I do not wish to line plot as below because not much statistical insights can be derived from the visualization.
ax.plot(df1[0])
ax.plot(df2[0])
ax.plot(df2[0]-df1[0])
There are lots of ways to show difference between two values. It really depends on your goal for the chart, how quantitative or qualitative you want to be, or if you want to show the raw data somehow. Here are a few ideas that come to mind that do not involve simple line plots or density functions. I strongly recommend the book Better Data Visualization by Johnathan Schwabish. He discusses interesting considerations regarding data presentation.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import ticker
arr1 = np.random.uniform(1,20, size=25)
arr2 = arr1 + np.random.uniform(1,10, size=25)
df = pd.DataFrame({
'col1' : arr1,
'col2' : arr2
})
df['diff'] = df.col2 - df.col1
df['sum'] = df.col1 + df.col2
fig, axes = plt.subplots(ncols=2, nrows=3, figsize=(15,15))
axes = axes.flatten()
# Pyramid chart
df_sorted = df.sort_values(by='sum', ascending=True)
axes[0].barh(
y = np.arange(1,26),
width = -df_sorted.col1
)
axes[0].barh(
y = np.arange(1,26),
width = df_sorted.col2
)
# Style axes[0]
style_func(axes[0], 'Pyramid Chart')
# Dot Plot
axes[1].scatter(df.col1, np.arange(1, 26), label='col1')
axes[1].scatter(df.col2, np.arange(1, 26), label='col2')
axes[1].hlines(
y = np.arange(1, 26),
xmin = df.col1, xmax = df.col2,
zorder=0, linewidth=1.5, color='k'
)
# Style axes[1]
legend = axes[1].legend(ncol=2, loc='center', bbox_to_anchor=(0.14,1.025), edgecolor='w')
style_func(axes[1], 'Dot Plot')
set_xlim = axes[1].set_xlim(0,25)
# Dot Plot 2
df_sorted = df.sort_values(by=['col1', 'diff'], ascending=False)
axes[2].scatter(df_sorted.col1, np.arange(1, 26), label='col1')
axes[2].scatter(df_sorted.col2, np.arange(1, 26), label='col2')
axes[2].hlines(
y = np.arange(1, 26),
xmin = df_sorted.col1, xmax = df_sorted.col2,
zorder=0, linewidth=1.5, color='k'
)
# Style axes[2]
legend = axes[2].legend(ncol=2, loc='center', bbox_to_anchor=(0.14,1.025), edgecolor='w')
style_func(axes[2], 'Dot Plot')
set_xlim = axes[2].set_xlim(0,25)
# Dot Plot 3
df_sorted = df.sort_values(by='sum', ascending=True)
axes[3].scatter(-df_sorted.col1, np.arange(1, 26), label='col1')
axes[3].scatter(df_sorted.col2, np.arange(1, 26), label='col2')
axes[3].vlines(x=0, ymin=-1, ymax=27, linewidth=2.5, color='k')
axes[3].hlines(
y = np.arange(1, 26),
xmin = -df_sorted.col1, xmax = df_sorted.col2,
zorder=0, linewidth=2
)
# Style axes[3]
legend = axes[3].legend(ncol=2, loc='center', bbox_to_anchor=(0.14,1.025), edgecolor='w')
style_func(axes[3], 'Dot Plot')
# Strip plot
axes[4].scatter(df.col1, [4] * 25)
axes[4].scatter(df.col2, [6] * 25)
axes[4].set_ylim(0, 10)
axes[4].vlines(
x = [df.col1.mean(), df.col2.mean()],
ymin = [3.5, 5.5], ymax=[4.5,6.5],
color='black', linewidth =2
)
# Style axes[4]
axes[4].yaxis.set_major_locator(ticker.FixedLocator([4,6]))
axes[4].yaxis.set_major_formatter(ticker.FixedFormatter(['col1','col2']))
hide_spines = [axes[4].spines[x].set_visible(False) for x in ['left','top','right']]
set_title = axes[4].set_title('Strip Plot', fontweight='bold')
tick_params = axes[4].tick_params(axis='y', left=False)
grid = axes[4].grid(axis='y', dashes=(8,3), alpha=0.3, color='gray')
# Slope chart
for i in range(25):
axes[5].plot([0,1], [df.col1[i], df.col2[i]], color='k')
align = ['left', 'right']
for i in range(1,3):
axes[5].text(x = i - 1, y = 0, s = 'col' + str(i),
fontsize=14, fontweight='bold', ha=align[i-1])
set_title = axes[5].set_title('Slope chart', fontweight='bold')
axes[5].axis('off')
def style_func(ax, title):
hide_spines = [ax.spines[x].set_visible(False) for x in ['left','top','right']]
set_title = ax.set_title(title, fontweight='bold')
set_xlim = ax.set_xlim(-25,25)
x_locator = ax.xaxis.set_major_locator(ticker.MultipleLocator(5))
y_locator = ax.yaxis.set_major_locator(ticker.FixedLocator(np.arange(1,26, 2)))
spine_width = ax.spines['bottom'].set_linewidth(1.5)
x_tick_params = ax.tick_params(axis='x', length=8, width=1.5)
x_tick_params = ax.tick_params(axis='y', left=False)
What about a parallel coordinates plot with plotly? This will allow to see the distinct values of each original array but then also if they converge on the same diffrence?
https://plot.ly/python/parallel-coordinates-plot/
I want to make a clustermap/heatmap of gene presence-absence data from patients where the genes will be grouped into categories (e.g chemotaxis, endotoxin etc) and labelled appropriately. I haven't found any such option in seaborn documentation. I know how to generate the heatmap, I just don't know how to label yticks as categories. Here is a sample (unrelated to my work) of what I want to achieve:
Here , yticklabels January, February and March are given group label winter and other yticklabels are also similarly labelled.
I've reproduced the example you gave in seaborn, adapting #Stein's answer from here.
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from itertools import groupby
import datetime
import seaborn as sns
def test_table():
months = [datetime.date(2008, i+1, 1).strftime('%B') for i in range(12)]
seasons = ['Winter',]*3 + ['Spring',]*2 + ['Summer']*3 + ['Pre-Winter',]*4
tuples = list(zip(months, seasons))
index = pd.MultiIndex.from_tuples(tuples, names=['first', 'second'])
d = {i: [np.random.randint(0,50) for _ in range(12)] for i in range(1950, 1960)}
df = pd.DataFrame(d, index=index)
return df
def add_line(ax, xpos, ypos):
line = plt.Line2D([ypos, ypos+ .2], [xpos, xpos], color='black', transform=ax.transAxes)
line.set_clip_on(False)
ax.add_line(line)
def label_len(my_index,level):
labels = my_index.get_level_values(level)
return [(k, sum(1 for i in g)) for k,g in groupby(labels)]
def label_group_bar_table(ax, df):
xpos = -.2
scale = 1./df.index.size
for level in range(df.index.nlevels):
pos = df.index.size
for label, rpos in label_len(df.index,level):
add_line(ax, pos*scale, xpos)
pos -= rpos
lypos = (pos + .5 * rpos)*scale
ax.text(xpos+.1, lypos, label, ha='center', transform=ax.transAxes)
add_line(ax, pos*scale , xpos)
xpos -= .2
df = test_table()
fig = plt.figure(figsize = (10, 10))
ax = fig.add_subplot(111)
sns.heatmap(df)
#Below 3 lines remove default labels
labels = ['' for item in ax.get_yticklabels()]
ax.set_yticklabels(labels)
ax.set_ylabel('')
label_group_bar_table(ax, df)
fig.subplots_adjust(bottom=.1*df.index.nlevels)
plt.show()
Gives:
Hope that helps.
I haven't tested this with seaborn yet, but the following works with vanilla matplotlib.
#!/usr/bin/env python
"""
Annotate a group of y-tick labels as such.
"""
import matplotlib.pyplot as plt
from matplotlib.transforms import TransformedBbox
def annotate_yranges(groups, ax=None):
"""
Annotate a group of consecutive yticklabels with a group name.
Arguments:
----------
groups : dict
Mapping from group label to an ordered list of group members.
ax : matplotlib.axes object (default None)
The axis instance to annotate.
"""
if ax is None:
ax = plt.gca()
label2obj = {ticklabel.get_text() : ticklabel for ticklabel in ax.get_yticklabels()}
for ii, (group, members) in enumerate(groups.items()):
first = members[0]
last = members[-1]
bbox0 = _get_text_object_bbox(label2obj[first], ax)
bbox1 = _get_text_object_bbox(label2obj[last], ax)
set_yrange_label(group, bbox0.y0 + bbox0.height/2,
bbox1.y0 + bbox1.height/2,
min(bbox0.x0, bbox1.x0),
-2,
ax=ax)
def set_yrange_label(label, ymin, ymax, x, dx=-0.5, ax=None, *args, **kwargs):
"""
Annotate a y-range.
Arguments:
----------
label : string
The label.
ymin, ymax : float, float
The y-range in data coordinates.
x : float
The x position of the annotation arrow endpoints in data coordinates.
dx : float (default -0.5)
The offset from x at which the label is placed.
ax : matplotlib.axes object (default None)
The axis instance to annotate.
"""
if not ax:
ax = plt.gca()
dy = ymax - ymin
props = dict(connectionstyle='angle, angleA=90, angleB=180, rad=0',
arrowstyle='-',
shrinkA=10,
shrinkB=10,
lw=1)
ax.annotate(label,
xy=(x, ymin),
xytext=(x + dx, ymin + dy/2),
annotation_clip=False,
arrowprops=props,
*args, **kwargs,
)
ax.annotate(label,
xy=(x, ymax),
xytext=(x + dx, ymin + dy/2),
annotation_clip=False,
arrowprops=props,
*args, **kwargs,
)
def _get_text_object_bbox(text_obj, ax):
# https://stackoverflow.com/a/35419796/2912349
transform = ax.transData.inverted()
# the figure needs to have been drawn once, otherwise there is no renderer?
plt.ion(); plt.show(); plt.pause(0.001)
bb = text_obj.get_window_extent(renderer = ax.get_figure().canvas.renderer)
# handle canvas resizing
return TransformedBbox(bb, transform)
if __name__ == '__main__':
import numpy as np
fig, ax = plt.subplots(1,1)
# so we have some extra space for the annotations
fig.subplots_adjust(left=0.3)
data = np.random.rand(10,10)
ax.imshow(data)
ticklabels = 'abcdefghij'
ax.set_yticks(np.arange(len(ticklabels)))
ax.set_yticklabels(ticklabels)
groups = {
'abc' : ('a', 'b', 'c'),
'def' : ('d', 'e', 'f'),
'ghij' : ('g', 'h', 'i', 'j')
}
annotate_yranges(groups)
plt.show()
I want to separate or increase the distance of my table and my graph so they don't layover. I thought of increasing the size to right and put the table there but I can't seem to make it work, and I can't find a way to offset the table by 1 line.
Graph
global dataread
global top4
global iV
top4mod = [] #holder for table, combines amplitude and frequency (bin*3.90Hz)
plt.plot(x1, fy1, '-') #plot x-y
plt.axis([0, 500, 0, 1.2]) #range for x-y plot
plt.xlabel('Hz')
columns = ('Frequency','Hz')
rows = ['# %d' % p for p in (1,2,3,4)] #top4
colors = 'C0'
print(len(rows))
print(len(str(top4)))
print(top4)
iV=[d*bins for d in iV] # convert bins into frequency
i=0;
FirstCol = [4, 3, 2, 1]
while i < 4:
Table.append([iV[i]] + [top4[i]])#[FirstCol[i]]
i = i+1
cell_text = []
n_rows = len(Table)
index = np.arange(len(columns)) + 1 #0.3 orginal
bar_width = 0.4
y_offset = np.array([0.0] * len(columns))
for row in range(n_rows):
#plt.bar(index, Table[row], bar_width, bottom=y_offset, color='C0') #dont use this
y_offset = y_offset + Table[row]
cell_text.append(['%1.1f' % p for p in y_offset])
the_table = plt.table(cellText=Table,rowLabels=rows, colLabels=columns,loc='bottom')
#plt.figure(figsize=(7,8))
# Adjust layout to make room for the table:
plt.subplots_adjust(bottom=0.2) #left=0.2, bottom=0.2
plt.show() #display plot
Using bbox
You can set the position of the table using the bbox argument. It expects either a bbox instance or a 4-tuple of values (left, bottom, width, height), which are in axes coordinates. E.g.
plt.table(..., bbox=[0.0,-0.5,1,0.3])
produces a table that is as wide as the axes (left=0, width=1) but positionned below the axes (bottom=-0.5, height=0.3).
import numpy as np
import matplotlib.pyplot as plt
data = np.random.rand(4,2)
columns = ('Frequency','Hz')
rows = ['# %d' % p for p in (1,2,3,4)]
plt.plot(data[:,0], data[:,1], '-') #plot x-y
plt.axis([0, 1, 0, 1.2]) #range for x-y plot
plt.xlabel('Hz')
the_table = plt.table(cellText=data,rowLabels=rows, colLabels=columns,
loc='bottom', bbox=[0.0,-0.45,1,.28])
plt.subplots_adjust(bottom=0.3)
plt.show()
Create dedicated axes
You can also create an axes (tabax) to put the table into. You would then set the loc to "center", turn the axis spines off and only use a very small subplots_adjust bottom parameter.
import numpy as np
import matplotlib.pyplot as plt
data = np.random.rand(4,2)
columns = ('Frequency','Hz')
rows = ['# %d' % p for p in (1,2,3,4)]
fig, (ax, tabax) = plt.subplots(nrows=2)
ax.plot(data[:,0], data[:,1], '-') #plot x-y
ax.axis([0, 1, 0, 1.2]) #range for x-y plot
ax.set_xlabel('Hz')
tabax.axis("off")
the_table = tabax.table(cellText=data,rowLabels=rows, colLabels=columns,
loc='center')
plt.subplots_adjust(bottom=0.05)
plt.show()
I have the following code:
import matplotlib.pyplot as plt
import numpy as np
np.random.seed(123456)
import pandas as pd
df = pd.DataFrame(3 * np.random.rand(4, 4), index=['a', 'b', 'c', 'd'],
columns=['x', 'y','z','w'])
plt.style.use('ggplot')
colors = plt.rcParams['axes.color_cycle']
fig, axes = plt.subplots(nrows=2, ncols=3)
for ax in axes.flat:
ax.axis('off')
for ax, col in zip(axes.flat, df.columns):
ax.pie(df[col], labels=df.index, autopct='%.2f', colors=colors)
ax.set(ylabel='', title=col, aspect='equal')
axes[0, 0].legend(bbox_to_anchor=(0, 0.5))
fig.savefig('your_file.png') # Or whichever format you'd like
plt.show()
Which produce the following:
My question is, how can I remove the label based on a condition. For example I'd only want to display labels with percent > 20%. Such that the labels and value of a,c,d won't be displayed in X, etc.
The autopct argument from pie can be a callable, which will receive the current percentage. So you only would need to provide a function that returns an empty string for the values you want to omit the percentage.
Function
def my_autopct(pct):
return ('%.2f' % pct) if pct > 20 else ''
Plot with matplotlib.axes.Axes.pie
fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(8, 6))
for ax, col in zip(axes.flat, df.columns):
ax.pie(df[col], labels=df.index, autopct=my_autopct)
ax.set(ylabel='', title=col, aspect='equal')
fig.tight_layout()
Plot directly with the dataframe
axes = df.plot(kind='pie', autopct=my_autopct, figsize=(8, 6), subplots=True, layout=(2, 2), legend=False)
for ax in axes.flat:
yl = ax.get_ylabel()
ax.set(ylabel='', title=yl)
fig = axes[0, 0].get_figure()
fig.tight_layout()
If you need to parametrize the value on the autopct argument, you'll need a function that returns a function, like:
def autopct_generator(limit):
def inner_autopct(pct):
return ('%.2f' % pct) if pct > limit else ''
return inner_autopct
ax.pie(df[col], labels=df.index, autopct=autopct_generator(20), colors=colors)
For the labels, the best thing I can come up with is using list comprehension:
for ax, col in zip(axes.flat, df.columns):
data = df[col]
labels = [n if v > data.sum() * 0.2 else ''
for n, v in zip(df.index, data)]
ax.pie(data, autopct=my_autopct, colors=colors, labels=labels)
Note, however, that the legend by default is being generated from the first passed labels, so you'll need to pass all values explicitly to keep it intact.
axes[0, 0].legend(df.index, bbox_to_anchor=(0, 0.5))
For labels I have used:
def my_level_list(data):
list = []
for i in range(len(data)):
if (data[i]*100/np.sum(data)) > 2 : #2%
list.append('Label '+str(i+1))
else:
list.append('')
return list
patches, texts, autotexts = plt.pie(data, radius = 1, labels=my_level_list(data), autopct=my_autopct, shadow=True)
You can make the labels function a little shorter using list comprehension:
def my_autopct(pct):
return ('%1.1f' % pct) if pct > 1 else ''
def get_new_labels(sizes, labels):
new_labels = [label if size > 1 else '' for size, label in zip(sizes, labels)]
return new_labels
fig, ax = plt.subplots()
_,_,_ = ax.pie(sizes, labels=get_new_labels(sizes, labels), colors=colors, autopct=my_autopct, startangle=90, rotatelabels=False)