Legend only for one of two pies in pandas - python

I made two pies: one is inside the other. I also want to make a legend but only for the inner circle.
One more significant thing: the inner circle has only two labels, that repeated 5 times, so when I make a legend for both pies, I get something like "paid, free, paid, free, etc"
...
titles = ['Free', 'Paid']
subgroup_names= 5*titles
subgroup_size = final.num.tolist()
a, b, c = [plt.cm.Blues, plt.cm.Reds, plt.cm.Greens]
#Outer ring
fig, ax = plt.subplots()
ax.axis('equal')
mypie, _ = ax.pie(group_size, radius = 2.5, labels = group_names,
colors = [a(0.7), a(0.6), a(0.5), a(0.4), a(0.3)])
plt.setp(mypie, width = 1, edgecolor = 'white')
#Inner ring
mypie2, _ = ax.pie(subgroup_size, radius = 1.6, labels = subgroup_names,
labeldistance = 0.7, colors = [b(0.5), c(0.5)])
plt.setp(mypie2, width = 0.8, edgecolor = 'white')
plt.legend()
plt.show()

plt.legend accepts a list of handles and labels as parameters. get_legend_handles_labels() conveniently gets a list of handles and of labels that would normally be used. Via list indexing you can grab the interesting part.
To center the labels inside the plot, the textprops= parameter of plt.pie accepts a horizontal and vertical alignment.
import matplotlib.pyplot as plt
import numpy as np
titles = ['Free', 'Paid']
subgroup_names = 5 * titles
subgroup_size = np.random.uniform(10, 30, len(subgroup_names))
group_size = subgroup_size.reshape(5, 2).sum(axis=1)
group_names = [f'Group {l}' for l in 'abcde']
a, b, c = [plt.cm.Blues, plt.cm.Reds, plt.cm.Greens]
# Outer ring
fig, ax = plt.subplots()
ax.axis('equal')
mypie, _ = ax.pie(group_size, radius=2.5, labels=group_names,
colors=[a(0.7), a(0.6), a(0.5), a(0.4), a(0.3)])
plt.setp(mypie, width=1, edgecolor='white')
# Inner ring
mypie2, _ = ax.pie(subgroup_size, radius=1.6, labels=subgroup_names,
labeldistance=0.7, colors=[b(0.5), c(0.5)],
textprops={'va': 'center', 'ha': 'center'})
plt.setp(mypie2, width=0.8, edgecolor='white')
handles, labels = plt.gca().get_legend_handles_labels()
labels_to_skip = len(group_names)
plt.legend(handles[labels_to_skip:labels_to_skip + 2], labels[labels_to_skip:labels_to_skip + 2])
plt.show()
PS: To leave out the labels from the pie chart and only have them in the legend, call plt.pie() without the labels= parameter. And create the legend from the patches returned by plt.pie() (limited to the first two in this case):
# Inner ring
mypie2, _ = ax.pie(subgroup_size, radius=1.6,
labeldistance=0.7, colors=[b(0.5), c(0.5)])
plt.setp(mypie2, width=0.8, edgecolor='white')
plt.legend(mypie2[:len(titles)], titles)

Related

Prevent labels from overlapping in matplotlib vertical timeline plot

The methods I normally use for prevent overlap of data labels don't seem to apply to this matplotlib-based vertical timeline. Normally I would use something like autofmt_xdate.
I don't actually care how "tall" (long) the chart needs to be or if the vertical distance between the items perfectly preserves the ratio of the date gap between items. I just don't want the text labels to overlap.
I am not very experience with matplotlib but thusfar I've determined that the code that controls the text labels is
_ = ax.text(label_offsets[i], d, l, ha=align, fontfamily='serif', fontweight='bold', color='royalblue',fontsize=12)
where d is a timestamp being used as a y-axis coordinate. I tried creating a loop counter and a list to contain the value of d from each loop iteration, so that I could make a comparison between the current and prior date, then add a fixed value (e.g. 15 days) to the current value of d if the prior value of d was too close. That didn't seem to do the trick.
The plot code:
chartdata = pd.read_csv(
"data/Events.csv"
)
chartdata = chartdata.query('Year > 1939')
dates = pd.to_datetime(chartdata['Date_Clean_Approx'])
min_date = date(np.min(dates).year - 2, np.min(dates).month, np.min(dates).day)
max_date = date(np.max(dates).year + 2, np.max(dates).month, np.max(dates).day)
labels = chartdata['Name']
# labels with associated dates
labels = ['{0:%d %b %Y}:\n{1}'.format(d, l) for l, d in zip (labels, dates)]
fig, ax = plt.subplots(figsize=(6, 32), constrained_layout=True)
_ = ax.set_xlim(-20, 20)
_ = ax.set_ylim(min_date, max_date)
_ = ax.axvline(0, ymin=0.05, ymax=0.95, c='deeppink', zorder=1)
_ = ax.scatter(np.zeros(len(dates)), dates, s=120, c='palevioletred', zorder=2)
_ = ax.scatter(np.zeros(len(dates)), dates, s=30, c='darkmagenta', zorder=3)
label_offsets = np.repeat(2.0, len(dates))
label_offsets[1::2] = -2.0
for i, (l, d) in enumerate(zip(labels, dates)):
d = d - timedelta(days=90)
align = 'right'
if i % 2 == 0:
align = 'left'
_ = ax.text(label_offsets[i], d, l, ha=align, fontfamily='serif', fontweight='bold', color='royalblue',fontsize=12)
stems = np.repeat(2.0, len(dates))
stems[1::2] *= -1.0
x = ax.hlines(dates, 0, stems, color='darkmagenta')
# hide lines around chart
for spine in ["left", "top", "right", "bottom"]:
_ = ax.spines[spine].set_visible(False)
# hide tick labels
_ = ax.set_xticks([])
_ = ax.set_yticks([])
_ = ax.set_title('UAP (UFO) Milestones, 1940 - Present',
fontweight="bold",
fontfamily='serif',
fontsize=16,
color='royalblue')
pyplot(fig)
As you can see in the image, I increased the height of the plot to reduce overlap in the labels. The plot got very long but still has overlap in text. Once a solution is determined I think that embedding the logic to "adjust relative gap size and chart height as needed to avoid text label overlap" into a convenient plot function would be a large contribution to the matplotlib library.
I'm definitely open to better, more programmatic, solutions but for now I replaced the dates with a same-length vector of consecutive integers and used it in place of the actual dates everywhere except within the text of the labels.
###### Timeline#
chartdata = pd.read_csv(
"/home/kodachi/Documents/ET/aliendb/www/app/data/Events.csv"
)
chartdata=chartdata.query('Year > 1939')
dates = pd.to_datetime(chartdata['Date_Clean_Approx'])
min_date = date(np.min(dates).year - 2, np.min(dates).month, np.min(dates).day)
max_date = date(np.max(dates).year + 2, np.max(dates).month, np.max(dates).day)
###
# fake date
fake_d=np.c_[1:len(dates)]
###
labels = chartdata['Name']
# labels with associated dates
labels = ['{0:%d %b %Y}:\n{1}'.format(d, l) for l, d in zip (labels, dates)]
fig, ax = plt.subplots(figsize=(8, 28))#, constrained_layout=True)
_ = ax.set_xlim(-20, 20)
#_ = ax.set_ylim(min_date, max_date)
_ = ax.set_ylim(1, 96)
_ = ax.axvline(0, ymin=0.05, ymax=.985, c='deeppink', zorder=1)#ymax=0.95
#_ = ax.scatter(np.zeros(len(dates)), dates, s=120, c='palevioletred', zorder=2)
#_ = ax.scatter(np.zeros(len(dates)), dates, s=30, c='darkmagenta', zorder=3)
_ = ax.scatter(np.zeros(len(fake_d)), fake_d, s=120, c='palevioletred', zorder=2)
_ = ax.scatter(np.zeros(len(fake_d)), fake_d, s=30, c='darkmagenta', zorder=3)
#label_offsets = np.repeat(2.0, len(dates))
label_offsets = np.repeat(2.0, len(fake_d))
label_offsets[1::2] = -2.0
for i, (l, d) in enumerate(zip(labels, fake_d)): #dates
#d = d - timedelta(days=90)
align = 'right'
if i % 2 == 0:
align = 'left'
_ = ax.text(label_offsets[i], d, l, ha=align, fontfamily='serif',
fontweight='bold', color='royalblue',fontsize=12)
#stems = np.repeat(2.0, len(dates))
stems = np.repeat(2.0, len(fake_d))
stems[1::2] *= -1.0
#x = ax.hlines(dates, 0, stems, color='darkmagenta')
x = ax.hlines(fake_d, 0, stems, color='darkmagenta')
# hide lines around chart
for spine in ["left", "top", "right", "bottom"]:
_ = ax.spines[spine].set_visible(False)
# hide tick labels
_ = ax.set_xticks([])
_ = ax.set_yticks([])
_ = ax.set_title('UAP (UFO) Milestones, 1940 - Present',
fontweight="bold",
fontfamily='serif',
fontsize=16,
color='darkgreen')

Interactive overlay of multiple histograms in matplotlib

I have multiple plots to show that overlap each other.
import matplotlib.pyplot as plt
a = [1,2,2,3,3,3,4,4,4,4,5,5,5,5,5]
b = [5,4,4,3,3,3,2,2,2,2,1,1,1,1,1]
c = [4,5,5,6,6,7,7,7,8,8,8,9,9,10,10]
x_min, x_max = min(a + b + c), max(a + b + c)
plt.hist(a, range=(x_min,x_max), bins = 10, alpha=0.5, label="a")
plt.hist(b, range=(x_min,x_max), bins = 10, alpha=0.5, label="b")
plt.hist(c, range=(x_min,x_max), bins = 10, alpha=0.5, label="c")
plt.legend()
plt.show()
Is it possible for me to generate all the individual plots in one step, then allow the user to interactively choose which to overlay in a second step?
In this example, a correct solution would have three interactive check boxes (one fore each plot). Because there are 3 check boxes, there are 2^3=8 possible ways the user could specify the plot.
Obviously, you have to write your own function. Matplotlib hist() returns a BarContainer - a fancy name for a list of rectangle objects, i.e, the bars of the histogram. We can set the visibility of each rectangle like we can set the visibility of each line in a line plot. An implementation therefore could look like this:
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.widgets import CheckButtons
def select_plots(list_of_inputs, labels=list("abcdefghijklmn"), bins=10):
#list_of_input and labels must have the same length
nr_of_hists=len(list_of_inputs)
labels = labels[:nr_of_hists]
x_min, x_max = min(min(l) for l in list_of_inputs), max(max(l) for l in list_of_inputs)
#collect list of barcontainers generated by hist plots
barcontainers = []
fig, ax = plt.subplots(figsize=(10, 8))
for label, list in zip(labels, list_of_inputs):
_, _, curr_barcontainer = ax.hist(list, range=(x_min,x_max), bins=bins, alpha=0.5, label=label)
barcontainers.append(curr_barcontainer)
#create checkboxes and color them to correspond with the plot
ax_chkbox = plt.axes([0.8, 0.9-0.05*nr_of_hists, 0.1, 0.05*nr_of_hists])
check_boxes = CheckButtons(ax_chkbox, labels, np.ones(nr_of_hists, dtype=bool))
handles, _ = ax.get_legend_handles_labels()
for rect, handle in zip(check_boxes.rectangles, handles):
rect.set_facecolor(handle.get_facecolor())
#redraw figure when checkbox status has changed
def chkboxcall(label):
for barcontainer, status in zip(barcontainers, check_boxes.get_status()):
[rect.set_visible(status) for rect in barcontainer]
fig.canvas.draw_idle()
#connect widget function
check_boxes.on_clicked(chkboxcall)
plt.show()
#return last status of checkboxes after figure has been closed
return [i for i, status in enumerate(check_boxes.get_status()) if status]
#input lists with their corresponding label names
a = [1,2,2,3,3,3,4,4,4,4,5,5,5,5,5,6,6]
b = [5,4,4,3,3,3,2,2,2,2,1,1,1,1,1]
c = [4,5,5,6,6,7,7,7,8,8,8,9,9,10,10]
all_lists = [a, b, c]
all_labels = ["a", "b", "c"]
#bins can be integers or ranges like [0, 2, 6, 7, 10]
bins = 10
idx = select_plots(all_lists, all_labels, bins)
print("The following data are valid:", *[all_labels[i] for i in idx])
Sample output:
>>>The following data are valid: a c
The problem with referring to line examples for histograms is that histograms are actually the composition of multiple lines. This means, when the button is called, you need to loop over the lines to get the expected behavior.
In the following example we see how we loop over each part p of the histogram. We change the transparency depending on its current value. Note: %matplotlib qt is required for jupyter notebook.
import matplotlib.pyplot as plt
from matplotlib.widgets import CheckButtons
%matplotlib qt
### Specify Data And Range
a = [1,2,2,3,3,3,4,4,4,4,5,5,5,5,5]
b = [5,4,4,3,3,3,2,2,2,2,1,1,1,1,1]
c = [4,5,5,6,6,7,7,7,8,8,8,9,9,10,10]
x_min, x_max = min(a + b + c), max(a + b + c)
### Create Plots
fig, ax = plt.subplots()
p1 = ax.hist(a, range=(x_min,x_max), bins = 10, alpha=0.3, label="a")[2]
p2 = ax.hist(b, range=(x_min,x_max), bins = 10, alpha=0.3, label="b")[2]
p3 = ax.hist(c, range=(x_min,x_max), bins = 10, alpha=0.3, label="c")[2]
plt.subplots_adjust(left=0.2)
plots = [p1, p2, p3]
# Make Check Buttons
rax = plt.axes([0.05, 0.4, 0.1, 0.15])
labels = ["a", "b", "c"]
check = CheckButtons(rax, labels)
def action(label, default_transparency = 0.3):
index = labels.index(label)
for p in plots[index]:
if p.get_alpha() == default_transparency:
p.set_alpha(0.0)
else:
p.set_alpha(default_transparency)
plt.draw()
### Run Widget
check.on_clicked(action)
plt.legend()
plt.show()

Scatter plot with different groups and marginal histograms for each group

I already have a scatter plot with different groups of elements and histograms on the margins, but they are linked to the whole data, not to the individual groups:
I'd like to have 2 histograms, one for each group of elements. How do I do that?
Here's my code:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import csv
from matplotlib.colors import LinearSegmentedColormap
data= pd.read_csv("data.csv")
x=data['Fe']
y=data['V']
z=data['Discovery']
# Fixing random state for reproducibility
np.random.seed(19680801)
# definitions for the axes
left, width = 0.1, 0.7
bottom, height = 0.1, 0.7
spacing = 0.05
rect_scatter = [left, bottom, width, height]
rect_histx = [left, bottom + height + spacing, width, 0.2]
rect_histy = [left + width + spacing, bottom, 0.2, height]
# start with a rectangular Figure
fig=plt.figure(figsize=(7, 6))
ax_scatter = plt.axes(rect_scatter)
ax_scatter.tick_params(direction='in', top=True, right=True)
ax_histx = plt.axes(rect_histx)
ax_histx.tick_params(direction='in', labelbottom=True)
ax_histy = plt.axes(rect_histy)
ax_histy.tick_params(direction='in', labelleft=False)
# the function that separates the dots in different classes:
classes = np.zeros( len(x) )
classes[(z == 'Transit')] = 1
classes[(z == 'Radial Velocity')] = 2
# create color map:
colors = ['purple', 'orange']
cm = LinearSegmentedColormap.from_list('custom', colors, N=len(colors))
# the scatter plot:
scatter = ax_scatter.scatter(x, y, c=classes, s=10, cmap=cm, alpha=0.6)
lines, labels = scatter.legend_elements()
# legend with custom labels
labels = [r'Transit', r'Radial Velocity']
legend = ax_scatter.legend(lines, labels,
loc="upper left", title="Planetary Discovery Method")
ax_scatter.add_artist(legend)
# now determine nice limits by hand:
binwidth = 0.1
ax_scatter.set_xlim((-1, 0.7))
ax_scatter.set_ylim((-0.9, 0.9))
#histogram
weights = np.ones_like(x)/(len(x))
weights2 = np.ones_like(y)/(len(y))
ax_histx.hist(x, bins=bins, weights=weights, color='chartreuse')
ax_histy.hist(y, bins=bins, weights=weights, orientation='horizontal', color='darkmagenta')
ax_histx.set_xlim(ax_scatter.get_xlim())
ax_histy.set_ylim(ax_scatter.get_ylim())
#labeling
ax_scatter.set_xlabel('[Fe/H]')
ax_scatter.set_ylabel('[V/H]')
ax_histy.set_xlabel('Relative Dist.')
ax_histx.set_ylabel('Relative Dist.')
plt.show()
I'll add an example of a plot I'm trying to reach:

Set size of matplotlib subplots

I created two subplots on a MPL figure, but i'm having an hard time setting the size on them. I want the space to be splitted between the two charts, so each chart needs to have 50% of the total width of the figure, and i want them to have the same height of the figure, here is how i initialized the subplots:
fig = plt.figure(facecolor='#131722',dpi=155, figsize=(10, 3))
ax1 = plt.subplot2grid((3,3), (2,0), facecolor='#131722')
ax2 = plt.subplot2grid((5,3), (2,2), colspan=5, rowspan=4, facecolor='#131722')
Colors = [['#0400ff', '#FF0000'], ['#09ff00', '#ff8c00']]
for x in List:
Index = List.index(x)
rate_buy = []
total_buy = []
rate_sell = []
total_sell = []
for y in x['data']['asks']:
rate_sell.append(y[0])
total_sell.append(y[1])
for y in x['data']['bids']:
rate_buy.append(y[0])
total_buy.append(y[1])
rBuys = pd.DataFrame({'buy': rate_buy})
rSells = pd.DataFrame({'sell': rate_sell})
tBuys = pd.DataFrame({'total': total_buy})
tSells = pd.DataFrame({'total': total_sell})
ax1.plot(rBuys.buy, tBuys.total, color=Colors[Index][0], linewidth=0.5, alpha=1, label='test')
ax2.plot(rSells.sell, tSells.total, color=Colors[Index][1],alpha=0.5, linewidth=1, label=x['exchange'])
ax1.fill_between(rBuys.buy, 0, tBuys.total, facecolor=Colors[Index][0], alpha=0.4)
ax2.fill_between(rSells.sell, 0, tSells.total, facecolor=Colors[Index][1], alpha=0.4)
And this is what i'm getting:
use plt.tight_layout() before calling plt.show().

Python Matplotlib Box Plot Two Data Sets Side by Side

I would like to make a boxplot using two data sets. Each set is a list of floats. A and B are examples of the two data sets
A = []
B = []
for i in xrange(10):
l = [random.random() for i in xrange(100)]
m = [random.random() for i in xrange(100)]
A.append(l)
B.append(m)
I would like the boxplots for A and B to appear next to each other, not on each other. Also, I would like more gap between the different x-values and perhaps thinner boxes. My code is below and so is the plot it produces (the present code puts A on top of B). Thanks for helping.
def draw_plot(data, edge_color, fill_color):
bp = ax.boxplot(data, patch_artist=True)
for element in ['boxes', 'whiskers', 'fliers', 'medians', 'caps']:
plt.setp(bp[element], color=edge_color)
plt.xticks(xrange(11))
for patch in bp['boxes']:
patch.set(facecolor=fill_color)
fig, ax = plt.subplots()
draw_plot(A, "tomato", "white")
draw_plot(B, "skyblue", "white")
plt.savefig('sample_box.png', bbox_inches='tight')
plt.close()
Looking at the documentation of boxplot we find that it has a positions argument, which can be used to set the positions of the boxplots. You would need to supply a list or array with as many elements as you want to draw boxplots.
import numpy as np; np.random.seed(1)
import matplotlib.pyplot as plt
A = np.random.rand(100,10)
B = np.random.rand(100,10)
def draw_plot(data, offset,edge_color, fill_color):
pos = np.arange(data.shape[1])+offset
bp = ax.boxplot(data, positions= pos, widths=0.3, patch_artist=True, manage_xticks=False)
for element in ['boxes', 'whiskers', 'fliers', 'medians', 'caps']:
plt.setp(bp[element], color=edge_color)
for patch in bp['boxes']:
patch.set(facecolor=fill_color)
fig, ax = plt.subplots()
draw_plot(A, -0.2, "tomato", "white")
draw_plot(B, +0.2,"skyblue", "white")
plt.xticks(xrange(10))
plt.savefig(__file__+'.png', bbox_inches='tight')
plt.show()
plt.close()

Categories