Matplotlib pie charts as scatter plot

Matplotlib pie charts as scatter plot - python

I have an interesting problem where I am trying to use multiple matplotlib pie charts as a scatter plot. I have read this post regarding this matplotlib tutorial and was able to get those working. However, I found that I was able to achieve the same results using the built-in pie function and plotting many pie charts on the same axis.
When using this alternative method, I found that after plotting the pie charts the axes lose their labels and whenever you pan the original data is still contained inside of the where the bounds of the original data should be, but the pie charts are only contained inside of the figure canvas.
The following code replicates the issue that I'm having.
import matplotlib.pyplot as plt
import pandas as pd
import random
def rand(): #simulate some random data
return [random.randint(0,100) for _ in range(10)]
def plot_pie(x, ax):
ax.pie(x[['a','b','c']], center=(x['lat'],x['lon']), radius=1,colors=['r', 'b', 'g'])
#my data is stored in a similar styled dataframe that I read from a csv and the data is static
sim_data = pd.DataFrame({'a':rand(),'b':rand(),'c':rand(), 'lat':rand(),'lon':rand()})
fig, ax = plt.subplots()
plt.scatter(x=sim_data['lat'], y=sim_data['lon'], s=1000, facecolor='none',edgecolors='r')
y_init = ax.get_ylim()
x_init = ax.get_xlim()
sim_data.apply(lambda x : plot_pie(x,ax), axis=1)
ax.set_ylim(y_init)
ax.set_xlim(x_init)
plt.show()
The reason that I reset the x and y limits of the axis is that I assume the pie function automatically sets the bounds of the axes to the last pie chart and this was my work around.
UPDATE
After reading the docs again I found that matplotlib pie chart objects as a default are set to not clip to the extents of any axes. To solve it, just updating that parameter seemed to work for me. The following code is the solution to my problem. I also found that by plotting each pie chart I would lose my axes ticks, to solve that I had to pass the frame parameter to the pie charts.
def plot_pie(x, ax):
ax.pie(x[['a','b','c']], center=(x['lat'],x['lon']), radius=1,colors=['r', 'b', 'g'], wedgeprops={'clip_on':True}, frame=True)

Data generated as in original post. I added a frame for each plot for clarity.
def plot_pie(x, ax, r=1):
# radius for pieplot size on a scatterplot
ax.pie(x[['a','b','c']], center=(x['lat'],x['lon']), radius=r, colors=['r', 'b', 'g'])
fig, axs = plt.subplots(1, 3, figsize=(15, 5))
fig.patch.set_facecolor('white')
# original plot
ax = axs[0]
ax.scatter(x=sim_data['lat'], y=sim_data['lon'], s=1000, facecolor='none', edgecolors='r')
y_init = ax.get_ylim()
x_init = ax.get_xlim()
sim_data.apply(lambda x : plot_pie(x,ax), axis=1)
ax.set_ylim(y_init)
ax.set_xlim(x_init)
ax.set_title('Original')
ax.set_frame_on(True)
# r-beginner's solution
ax = axs[1]
ax.scatter(x=sim_data['lat'], y=sim_data['lon'], s=1000, facecolor='none', edgecolors='r')
y_init = ax.get_ylim()
x_init = ax.get_xlim()
sim_data.apply(lambda x : plot_pie(x,ax), axis=1)
ax.set_ylim([0, y_init[1]*1.1])
ax.set_xlim([0, x_init[1]*1.1])
ax.set_title('r-beginners')
ax.set_frame_on(True)
# my solution
ax = axs[2]
# do not use `s=` for size, it will not work properly when you are scattering pieplots
# because pieplots will be plotted above them
ax.scatter(x=sim_data['lat'], y=sim_data['lon'], s=0)
# git min/max values for the axes
y_init = ax.get_ylim()
x_init = ax.get_xlim()
sim_data.apply(lambda x : plot_pie(x, ax, r=7), axis=1)
# from zero to xlim/ylim with step 10
_ = ax.yaxis.set_ticks(range(0, round(y_init[1])+10, 10))
_ = ax.xaxis.set_ticks(range(0, round(x_init[1])+10, 10))
_ = ax.set_title('My')
ax.set_frame_on(True)

Related

python bar chart total label on bar

plt.figure(figsize = (8,5))
sns.countplot(data = HRdfMerged, x = 'Gender', hue='Attrition').set_title('Gender vs Attrition')
I'm having a hard time adding a label to the top of my bar that states the total number. I have tried many different ways but can't get it right. Im using matplotlib. Picture of bar chart added.

Once you have called sns.countplot, we will explore the list ax.patches to get information from the bars and place the texts you want:
# Imports.
import matplotlib.pyplot as plt
import seaborn as sns
# Load a dataset to replicate what you have in the question.
data = sns.load_dataset("titanic")
fig, ax = plt.subplots() # Use the object-oriented approach with Matplotlib when you can.
sns.countplot(data=data, x="class", hue="who", ax=ax)
ax.set_title("title goes here")
fig.show()
# For each bar, grab its coordinates and colors, find a suitable location
# for a text and place it there.
for patch in ax.patches:
x0, y0 = patch.get_xy() # Bottom-left corner.
x0 += patch.get_width()/2 # Middle of the width.
y0 += patch.get_height() # Top of the bar
color = patch.get_facecolor()
ax.text(x0, y0, str(y0), ha="center", va="bottom", color="white", clip_on=True, bbox=dict(ec="black",
fc=color))
Play around with the kwargs of ax.text to get the result you prefer. An alternative:
ax.text(x0, y0, str(y0), ha="center", va="bottom", color=color, clip_on=True)

You can also use the convenient Axes.bar_label method here to do this in just a couple lines.
Since seaborn does not return the BaContainer objects to us, we will need to access them from the Axes object via Axes.containers attribute.
import matplotlib.pyplot as plt
import seaborn as sns
data = sns.load_dataset("titanic")
fig, ax = plt.subplots()
sns.countplot(data=data, x="class", hue="who", ax=ax)
for bar_contain in ax.containers:
ax.bar_label(bar_contain)

Rotating boxplot x-axis labels in subplots [duplicate]

Im trying to plot a scatter matrix. I'm building on the example given in this thread Is there a function to make scatterplot matrices in matplotlib?. Here I have just modified the code slightly to make the axis visible for all the subplots. The modified code is given below
import itertools
import numpy as np
import matplotlib.pyplot as plt
def main():
np.random.seed(1977)
numvars, numdata = 4, 10
data = 10 * np.random.random((numvars, numdata))
fig = scatterplot_matrix(data, ['mpg', 'disp', 'drat', 'wt'],
linestyle='none', marker='o', color='black', mfc='none')
fig.suptitle('Simple Scatterplot Matrix')
plt.show()
def scatterplot_matrix(data, names, **kwargs):
"""Plots a scatterplot matrix of subplots. Each row of "data" is plotted
against other rows, resulting in a nrows by nrows grid of subplots with the
diagonal subplots labeled with "names". Additional keyword arguments are
passed on to matplotlib's "plot" command. Returns the matplotlib figure
object containg the subplot grid."""
numvars, numdata = data.shape
fig, axes = plt.subplots(nrows=numvars, ncols=numvars, figsize=(8,8))
fig.subplots_adjust(hspace=0.05, wspace=0.05)
for ax in axes.flat:
# Hide all ticks and labels
ax.xaxis.set_visible(True)
ax.yaxis.set_visible(True)
# # Set up ticks only on one side for the "edge" subplots...
# if ax.is_first_col():
# ax.yaxis.set_ticks_position('left')
# if ax.is_last_col():
# ax.yaxis.set_ticks_position('right')
# if ax.is_first_row():
# ax.xaxis.set_ticks_position('top')
# if ax.is_last_row():
# ax.xaxis.set_ticks_position('bottom')
# Plot the data.
for i, j in zip(*np.triu_indices_from(axes, k=1)):
for x, y in [(i,j), (j,i)]:
axes[x,y].plot(data[x], data[y], **kwargs)
# Label the diagonal subplots...
for i, label in enumerate(names):
axes[i,i].annotate(label, (0.5, 0.5), xycoords='axes fraction',
ha='center', va='center')
# Turn on the proper x or y axes ticks.
for i, j in zip(range(numvars), itertools.cycle((-1, 0))):
axes[j,i].xaxis.set_visible(True)
axes[i,j].yaxis.set_visible(True)
fig.tight_layout()
plt.xticks(rotation=45)
fig.show()
return fig
main()
I cant seem to be able to rotate the x-axis text of all the subplots. As it can be seen, i have tried the plt.xticks(rotation=45) trick. But this seems to perform the rotation for the last subplot alone.

Just iterate through the axes tied to the figure, set the active axes to the iterated object, and modify:
for ax in fig.axes:
matplotlib.pyplot.sca(ax)
plt.xticks(rotation=90)

plt only acts on the current active axes. You should bring it inside your last loop where you set some of the labels visibility to True:
# Turn on the proper x or y axes ticks.
for i, j in zip(range(numvars), itertools.cycle((-1, 0))):
axes[j,i].xaxis.set_visible(True)
axes[i,j].yaxis.set_visible(True)
for tick in axes[i,j].get_xticklabels():
tick.set_rotation(45)
for tick in axes[j,i].get_xticklabels():
tick.set_rotation(45)

for ax in fig.axes:
ax.tick_params(labelrotation=90)

scatter plot color bar does not look right

I have written my code to create a scatter plot with a color bar on the right. But the color bar does not look right, in the sense that the color is too light to be mapped to the actual color used in the plot. I am not sure what is missing or wrong here. But I am hoping to get something similar to what's shown here: https://medium.com/#juliansteam/what-bert-topic-modelling-reveal-about-the-2021-unrest-in-south-africa-d0d15629a9b4 (about in the middle of the page)
df = .... # data loading
df["topic"] = topics
# Plot parameters
top_n = topn
fontsize = 15
# some data preparation
to_plot = df.copy()
to_plot[df.topic >= top_n] = -1
outliers = to_plot.loc[to_plot.topic == -1]
non_outliers = to_plot.loc[to_plot.topic != -1]
#the actual plot
fig, ax = plt.subplots(figsize=(15, 15))
scatter_outliers = ax.scatter(outliers['x'], outliers['y'], color="#E0E0E0", s=1, alpha=.3)
scatter = ax.scatter(non_outliers['x'], non_outliers['y'], c=non_outliers['topic'], s=1, alpha=.3, cmap='hsv_r')
ax.text(0.99, 0.01, f"BERTopic - Top {top_n} topics", transform=ax.transAxes, horizontalalignment="right", color="black")
plt.xticks([], [])
plt.yticks([], [])
plt.colorbar(scatter)
plt.savefig(outfile+"_1.png", format='png', dpi=300)
plt.clf()
plt.close()
As you can see, an example plot looks like this. The color bar is created, but compared to that shown in the link above, the color is very light and does not seem to map to those on the scatter plot. Any suggestions?

The colorbar uses the given alpha=.3. In the scatterplot, many dots with the same color are superimposed, causing them to look brighter than a single dot.
One way to tackle this, is to create a ScalarMappable object to be used by the colorbar, taking the colormap and the norm of the scatter plot (but not its alpha). Note that simply changing the alpha of the scatter object (scatter.set_alpha(1)) would also change the plot itself.
import matplotlib.pyplot as plt
from matplotlib.cm import ScalarMappable
import numpy as np
x = np.random.normal(np.repeat(np.random.uniform(0, 20, 10), 1000))
y = np.random.normal(np.repeat(np.random.uniform(0, 10, 10), 1000))
c = np.repeat(np.arange(10), 1000)
scatter = plt.scatter(x, y, c=c, cmap='hsv_r', alpha=.3, s=3)
plt.colorbar(ScalarMappable(cmap=scatter.get_cmap(), norm=scatter.norm))
plt.tight_layout()
plt.show()

Pandas dataframe plotting: yscale log and xy label and legend issues

Intro
I am new to python, matplotlib and pandas. I spent a lot of time reviewing material to come up with the following. And I am stuck.
Question:
I am trying to plot using pandas. I have three Y axis and of which one is log scale.
I cannot figure out why the log function(1) and label function(2) doesn't work for my secondary axis ax2 in the code. It works everywhere else.
All the legends are separated (3). Is there a simpler way to handle this other than do manually.
When I plot the secondary axis part, separately it comes out fine. I ran the plot removing third axis, still problem persists. I put here the code with all axis as I need the solution proposed to work together in this manner.
Here methods are given for solving (3) alone but I am particularly looking for dataframe based plotting. Also other manual techniques are given in the same site, which I do not want to use!
Code and explanation
# Importing the basic libraries
import matplotlib.pyplot as plt
from pandas import DataFrame
# test3 = Dataframe with 5 columns
test3 = df.ix[:,['tau','E_tilde','Max_error_red','time_snnls','z_t_gandb']]
# Setting up plot with 3 'y' axis
fig, ax = plt.subplots()
ax2, ax3 = ax.twinx(), ax.twinx()
rspine = ax3.spines['right']
rspine.set_position(('axes', 1.25))
ax3.set_frame_on(True)
ax3.patch.set_visible(False)
fig.subplots_adjust(right=0.75)
# Setting the color and labels
ax.set_xlabel('tau(nounit)')
ax.set_ylabel('Time(s)', color = 'b')
ax2.set_ylabel('Max_error_red', color = 'r')
ax3.set_ylabel('E_tilde', color = 'g')
# Setting the logscaling
ax.set_xscale('log') # Works
ax2.set_yscale('log')# Doesnt work
# Plotting the dataframe
test3.plot(x = 'tau', y = 'time_snnls', ax=ax, style='b-')
test3.plot(x = 'tau', y = 'Max_error_red', ax=ax2, style='r-', secondary_y=True)
test3.plot(x = 'tau', y = 'z_t_gandb', ax=ax, style='b-.')
test3.plot(x = 'tau', y = 'E_tilde', ax=ax3, style='g-')

The issue is the secondary_y=True option. Remove that, and it works fine. I think the problem is that you have already set up your twin axes, and having secondary_y=True is interfering with that.
As for the legend: set legend=False in each of your test3.plot commands, and then gather then legend handles and labels from the axes after you have made the plot using ax.get_legend_handles_labels(). Then you can plot them all on one legend.
Finally, to make sure the axes labels are set correctly, you must set them after you have plotted your data, as the pandas DataFrame plotting methods will overwrite whatever you have tried to set. By doing this afterwards, you make sure that it is your label that is set.
Heres a working script (with dummy data):
import matplotlib.pyplot as plt
from pandas import DataFrame
import numpy as np
# Fake up some data
test3 = DataFrame({
'tau':np.logspace(-3,0,100),
'E_tilde':np.linspace(100,0,100),
'Max_error_red':np.logspace(-2,1,100),
'time_snnls':np.linspace(5,0,100),
'z_t_gandb':np.linspace(16,15,100)
})
# Setting up plot with 3 'y' axis
fig, ax = plt.subplots()
ax2, ax3 = ax.twinx(), ax.twinx()
rspine = ax3.spines['right']
rspine.set_position(('axes', 1.25))
ax3.set_frame_on(True)
ax3.patch.set_visible(False)
fig.subplots_adjust(right=0.75)
# Setting the logscaling
ax.set_xscale('log') # Works
ax2.set_yscale('log')# Doesnt work
# Plotting the dataframe
test3.plot(x = 'tau', y = 'time_snnls', ax=ax, style='b-',legend=False)
test3.plot(x = 'tau', y = 'Max_error_red', ax=ax2, style='r-',legend=False)
test3.plot(x = 'tau', y = 'z_t_gandb', ax=ax, style='b-.',legend=False)
test3.plot(x = 'tau', y = 'E_tilde', ax=ax3, style='g-',legend=False)
# Setting the color and labels
ax.set_xlabel('tau(nounit)')
ax.set_ylabel('Time(s)', color = 'b')
ax2.set_ylabel('Max_error_red', color = 'r')
ax3.set_ylabel('E_tilde', color = 'g')
# Gather all the legend handles and labels to plot in one legend
l1 = ax.get_legend_handles_labels()
l2 = ax2.get_legend_handles_labels()
l3 = ax3.get_legend_handles_labels()
handles = l1[0]+l2[0]+l3[0]
labels = l1[1]+l2[1]+l3[1]
ax.legend(handles,labels,loc=5)
plt.show()

Add second axis to polar plot

I try to plot two polar plots in one figure. See code below:
fig = super(PlotWindPowerDensity, self).get_figure()
rect = [0.1, 0.1, 0.8, 0.8]
ax = WindSpeedDirectionAxes(fig, rect)
self.values_dict = collections.OrderedDict(sorted(self.values_dict.items()))
values = self.values_dict.items()
di, wpd = zip(*values)
wpd = np.array(wpd).astype(np.double)
wpdmask = np.isfinite(wpd)
theta = self.radar_factory(int(len(wpd)))
# spider plot
ax.plot(theta[wpdmask], wpd[wpdmask], color = 'b', alpha = 0.5)
ax.fill(theta[wpdmask], wpd[wpdmask], facecolor = 'b', alpha = 0.5)
# bar plot
ax.plot_bar(table=self.table, sectors=self.sectors, speedbins=self.wpdbins, option='wind_power_density', colorfn=get_sequential_colors)
fig.add_axes(ax)
return fig
The length of the bar is the data base (how many sampling points for this sector). The colors of the bars show the frequency of certain value bins (eg. 2.5-5 m/s) in the correspondent sector (blue: low, red: high). The blue spider plot shows the mean value for each sector.
In the shown figure, the values of each plot are similar, but this is rare. I need to assign the second plot to another axis and show this axis in another direction.
EDIT:
After the nice answer of Joe, i get the result of the figure.
That's almost everything i wanted to achieve. But there are some points i wasn't able to figure out.
The plot is made for dynamicly changing data bases. Therefore i need a dynamic way to get the same location of the circles. Till now I solve it with:
start, end = ax2.get_ylim()
ax2.yaxis.set_ticks(np.arange(0, end, end / len(ax.yaxis.get_ticklocs())))
means: for second axis i alter the ticks in order to fit the ticklocs to the one's of first axis.
In most cases i get some decimal places, but i don't want that, because it corrupts the clearness of the plot. Is there a way to solve this problem more smartly?
The ytics (the radial one's) range from 0 to the next-to-last circle. How can i achieve that the values range from the first circle to the very last (the border)? The same like for the first axis.

So, as I understand it, you want to display data with very different magnitudes on the same polar plot. Basically you're asking how to do something similar to twinx for polar axes.
As an example to illustrate the problem, it would be nice to display the green series on the plot below at a different scale than the blue series, while keeping them on the same polar axes for easy comparison.:
import numpy as np
import matplotlib.pyplot as plt
numpoints = 30
theta = np.linspace(0, 2*np.pi, numpoints)
r1 = np.random.random(numpoints)
r2 = 5 * np.random.random(numpoints)
params = dict(projection='polar', theta_direction=-1, theta_offset=np.pi/2)
fig, ax = plt.subplots(subplot_kw=params)
ax.fill_between(theta, r2, color='blue', alpha=0.5)
ax.fill_between(theta, r1, color='green', alpha=0.5)
plt.show()
However, ax.twinx() doesn't work for polar plots.
It is possible to work around this, but it's not very straight-forward. Here's an example:
import numpy as np
import matplotlib.pyplot as plt
def main():
numpoints = 30
theta = np.linspace(0, 2*np.pi, numpoints)
r1 = np.random.random(numpoints)
r2 = 5 * np.random.random(numpoints)
params = dict(projection='polar', theta_direction=-1, theta_offset=np.pi/2)
fig, ax = plt.subplots(subplot_kw=params)
ax2 = polar_twin(ax)
ax.fill_between(theta, r2, color='blue', alpha=0.5)
ax2.fill_between(theta, r1, color='green', alpha=0.5)
plt.show()
def polar_twin(ax):
ax2 = ax.figure.add_axes(ax.get_position(), projection='polar',
label='twin', frameon=False,
theta_direction=ax.get_theta_direction(),
theta_offset=ax.get_theta_offset())
ax2.xaxis.set_visible(False)
# There should be a method for this, but there isn't... Pull request?
ax2._r_label_position._t = (22.5 + 180, 0.0)
ax2._r_label_position.invalidate()
# Ensure that original axes tick labels are on top of plots in twinned axes
for label in ax.get_yticklabels():
ax.figure.texts.append(label)
return ax2
main()
That does what we want, but it looks fairly bad at first. One improvement would be to the tick labels to correspond to what we're plotting:
plt.setp(ax2.get_yticklabels(), color='darkgreen')
plt.setp(ax.get_yticklabels(), color='darkblue')
However, we still have the double-grids, which are rather confusing. One easy way around this is to manually set the r-limits (and/or r-ticks) such that the grids will fall on top of each other. Alternately, you could write a custom locator to do this automatically. Let's stick with the simple approach here:
ax.set_rlim([0, 5])
ax2.set_rlim([0, 1])
Caveat: Because shared axes don't work for polar plots, the implmentation I have above will have problems with anything that changes the position of the original axes. For example, adding a colorbar to the figure will cause all sorts of problems. It's possible to work around this, but I've left that part out. If you need it, let me know, and I'll add an example.
At any rate, here's the full, stand-alone code to generate the final figure:
import numpy as np
import matplotlib.pyplot as plt
np.random.seed(1977)
def main():
numpoints = 30
theta = np.linspace(0, 2*np.pi, numpoints)
r1 = np.random.random(numpoints)
r2 = 5 * np.random.random(numpoints)
params = dict(projection='polar', theta_direction=-1, theta_offset=np.pi/2)
fig, ax = plt.subplots(subplot_kw=params)
ax2 = polar_twin(ax)
ax.fill_between(theta, r2, color='blue', alpha=0.5)
ax2.fill_between(theta, r1, color='green', alpha=0.5)
plt.setp(ax2.get_yticklabels(), color='darkgreen')
plt.setp(ax.get_yticklabels(), color='darkblue')
ax.set_ylim([0, 5])
ax2.set_ylim([0, 1])
plt.show()
def polar_twin(ax):
ax2 = ax.figure.add_axes(ax.get_position(), projection='polar',
label='twin', frameon=False,
theta_direction=ax.get_theta_direction(),
theta_offset=ax.get_theta_offset())
ax2.xaxis.set_visible(False)
# There should be a method for this, but there isn't... Pull request?
ax2._r_label_position._t = (22.5 + 180, 0.0)
ax2._r_label_position.invalidate()
# Bit of a hack to ensure that the original axes tick labels are on top of
# whatever is plotted in the twinned axes. Tick labels will be drawn twice.
for label in ax.get_yticklabels():
ax.figure.texts.append(label)
return ax2
if __name__ == '__main__':
main()

Just to add onto #JoeKington 's (great) answer, I found that the "hack to ensure that the original axes tick labels are on top of whatever is plotted in the twinned axes" didn't work for me so as an alternative I've used:
from matplotlib.ticker import MaxNLocator
#Match the tick point locations by setting the same number of ticks in the
# 2nd axis as the first
ax2.yaxis.set_major_locator(MaxNLocator(nbins=len(ax1.get_yticks())))
#Set the last tick as the plot limit
ax2.set_ylim(0, ax2.get_yticks()[-1])
#Remove the tick label at zero
ax2.yaxis.get_major_ticks()[0].label1.set_visible(False)

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Matplotlib pie charts as scatter plot - python

Related

python bar chart total label on bar

Rotating boxplot x-axis labels in subplots [duplicate]

scatter plot color bar does not look right

Pandas dataframe plotting: yscale log and xy label and legend issues

Add second axis to polar plot

Categories

Resources