scatterplot and combined polar histogram in matplotlib - python

I am attempting to produce a plot like this which combines a cartesian scatter plot and a polar histogram. (Radial lines optional)
A similar solution (by Nicolas Legrand) exists for looking at differences in x and y (code here), but we need to look at ratios (i.e. x/y).
More specifically, this is useful when we want to look at the relative risk measure which is the ratio of two probabilities.
The scatter plot on it's own is obviously not a problem, but the polar histogram is more advanced.
The most promising lead I have found is this central example from the matplotlib gallery here
I have attempted to do this, but have run up against the limits of my matplotlib skills. Any efforts moving towards this goal would be great.

I'm sure that others will have better suggestions, but one method that gets something like you want (without the need for extra axes artists) is to use a polar projection with a scatter and bar chart together. Something like
import matplotlib.pyplot as plt
import numpy as np
x = np.random.uniform(size=100)
y = np.random.uniform(size=100)
r = np.sqrt(x**2 + y**2)
phi = np.arctan2(y, x)
h, b = np.histogram(phi, bins=np.linspace(0, np.pi/2, 21), density=True)
colors = plt.cm.Spectral(h / h.max())
ax = plt.subplot(111, projection='polar')
ax.scatter(phi, r, marker='.')
ax.bar(b[:-1], h, width=b[1:] - b[:-1],
align='edge', bottom=np.max(r) + 0.2, color=colors)
# Cut off at 90 degrees
ax.set_thetamax(90)
# Set the r grid to cover the scatter plot
ax.set_rgrids([0, 0.5, 1])
# Let's put a line at 1 assuming we want a ratio of some sort
ax.set_thetagrids([45], [1])
which will give
It is missing axes labels and some beautification, but it might be a place to start. I hope it is helpful.

You can use two axes on top of each other:
import matplotlib.pyplot as plt
fig = plt.figure(figsize=(6,6))
ax1 = fig.add_axes([0.1,0.1,.8,.8], label="cartesian")
ax2 = fig.add_axes([0.1,0.1,.8,.8], projection="polar", label="polar")
ax2.set_rorigin(-1)
ax2.set_thetamax(90)
plt.show()

Ok. Thanks to the answer from Nicolas, and the answer from tomjn I have a working solution :)
import numpy as np
import matplotlib.pyplot as plt
# Scatter data
n = 50
x = 0.3 + np.random.randn(n)*0.1
y = 0.4 + np.random.randn(n)*0.02
def radial_corner_plot(x, y, n_hist_bins=51):
"""Scatter plot with radial histogram of x/y ratios"""
# Axis setup
fig = plt.figure(figsize=(6,6))
ax1 = fig.add_axes([0.1,0.1,.6,.6], label="cartesian")
ax2 = fig.add_axes([0.1,0.1,.8,.8], projection="polar", label="polar")
ax2.set_rorigin(-20)
ax2.set_thetamax(90)
# define useful constant
offset_in_radians = np.pi/4
def rotate_hist_axis(ax):
"""rotate so that 0 degrees is pointing up and right"""
ax.set_theta_offset(offset_in_radians)
ax.set_thetamin(-45)
ax.set_thetamax(45)
return ax
# Convert scatter data to histogram data
r = np.sqrt(x**2 + y**2)
phi = np.arctan2(y, x)
h, b = np.histogram(phi,
bins=np.linspace(0, np.pi/2, n_hist_bins),
density=True)
# SCATTER PLOT -------------------------------------------------------
ax1.scatter(x,y)
ax1.set(xlim=[0, 1], ylim=[0, 1], xlabel="x", ylabel="y")
ax1.spines['right'].set_visible(False)
ax1.spines['top'].set_visible(False)
# HISTOGRAM ----------------------------------------------------------
ax2 = rotate_hist_axis(ax2)
# rotation of axis requires rotation in bin positions
b = b - offset_in_radians
# plot the histogram
bars = ax2.bar(b[:-1], h, width=b[1:] - b[:-1], align='edge')
def update_hist_ticks(ax, desired_ratios):
"""Update tick positions and corresponding tick labels"""
x = np.ones(len(desired_ratios))
y = 1/desired_ratios
phi = np.arctan2(y,x) - offset_in_radians
# define ticklabels
xticklabels = [str(round(float(label), 2)) for label in desired_ratios]
# apply updates
ax2.set(xticks=phi, xticklabels=xticklabels)
return ax
ax2 = update_hist_ticks(ax2, np.array([1/8, 1/4, 1/2, 1, 2, 4, 8]))
# just have radial grid lines
ax2.grid(which="major", axis="y")
# remove bin count labels
ax2.set_yticks([])
return (fig, [ax1, ax2])
fig, ax = radial_corner_plot(x, y)
Thanks for the pointers!

Related

How to draw the normal distribution of a barplot with log x axis?

I'd like to draw a lognormal distribution of a given bar plot.
Here's the code
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
import numpy as np; np.random.seed(1)
import scipy.stats as stats
import math
inter = 33
x = np.logspace(-2, 1, num=3*inter+1)
yaxis = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.01,0.03,0.3,0.75,1.24,1.72,2.2,3.1,3.9,
4.3,4.9,5.3,5.6,5.87,5.96,6.01,5.83,5.42,4.97,4.60,4.15,3.66,3.07,2.58,2.19,1.90,1.54,1.24,1.08,0.85,0.73,
0.84,0.59,0.55,0.53,0.48,0.35,0.29,0.15,0.15,0.14,0.12,0.14,0.15,0.05,0.05,0.05,0.04,0.03,0.03,0.03, 0.02,
0.02,0.03,0.01,0.01,0.01,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0,0]
fig, ax = plt.subplots()
ax.bar(x[:-1], yaxis, width=np.diff(x), align="center", ec='k', color='w')
ax.set_xscale('log')
plt.xlabel('Diameter (mm)', fontsize='12')
plt.ylabel('Percentage of Total Particles (%)', fontsize='12')
plt.ylim(0,8)
plt.xlim(0.01, 10)
fig.set_size_inches(12, 12)
plt.savefig("Test.png", dpi=300, bbox_inches='tight')
Resulting plot:
What I'm trying to do is to draw the Probability Density Function exactly like the one shown in red in the graph below:
An idea is to convert everything to logspace, with u = log10(x). Then draw the density histogram in there. And also calculate a kde in the same space. Everything gets drawn as y versus u. When we have u at a top twin axes, x can stay at the bottom. Both axes get aligned by setting the same xlims, but converted to logspace on the top axis. The top axis can be hidden to get the desired result.
import matplotlib.pyplot as plt
import numpy as np
from scipy import stats
inter = 33
u = np.linspace(-2, 1, num=3*inter+1)
x = 10**u
us = np.linspace(u[0], u[-1], 500)
yaxis = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.01,0.03,0.3,0.75,1.24,1.72,2.2,3.1,3.9,
4.3,4.9,5.3,5.6,5.87,5.96,6.01,5.83,5.42,4.97,4.60,4.15,3.66,3.07,2.58,2.19,1.90,1.54,1.24,1.08,0.85,0.73,
0.84,0.59,0.55,0.53,0.48,0.35,0.29,0.15,0.15,0.14,0.12,0.14,0.15,0.05,0.05,0.05,0.04,0.03,0.03,0.03, 0.02,
0.02,0.03,0.01,0.01,0.01,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0,0]
yaxis = np.array(yaxis)
# reconstruct data from the given frequencies
u_data = np.repeat((u[:-1] + u[1:]) / 2, (yaxis * 100).astype(np.int))
kde = stats.gaussian_kde((u[:-1]+u[1:])/2, weights=yaxis, bw_method=0.2)
total_area = (np.diff(u)*yaxis).sum() # total area of all bars; divide by this area to normalize
fig, ax = plt.subplots()
ax2 = ax.twiny()
ax2.bar(u[:-1], yaxis, width=np.diff(u), align="edge", ec='k', color='w', label='frequencies')
ax2.plot(us, total_area*kde(us), color='crimson', label='kde')
ax2.plot(us, total_area * stats.norm.pdf(us, u_data.mean(), u_data.std()), color='dodgerblue', label='lognormal')
ax2.legend()
ax.set_xscale('log')
ax.set_xlabel('Diameter (mm)', fontsize='12')
ax.set_ylabel('Percentage of Total Particles (%)', fontsize='12')
ax.set_ylim(0,8)
xlim = np.array([0.01,10])
ax.set_xlim(xlim)
ax2.set_xlim(np.log10(xlim))
ax2.set_xticks([]) # hide the ticks at the top
plt.tight_layout()
plt.show()
PS: Apparently this also can be achieved directly without explicitly using u (at the cost of being slightly more cryptic):
x = np.logspace(-2, 1, num=3*inter+1)
xs = np.logspace(-2, 1, 500)
total_area = (np.diff(np.log10(x))*yaxis).sum() # total area of all bars; divide by this area to normalize
kde = gaussian_kde((np.log10(x[:-1])+np.log10(x[1:]))/2, weights=yaxis, bw_method=0.2)
ax.bar(x[:-1], yaxis, width=np.diff(x), align="edge", ec='k', color='w')
ax.plot(xs, total_area*kde(np.log10(xs)), color='crimson')
ax.set_xscale('log')
Note that the bandwidth set for gaussian_kde is a somewhat arbitrarily value. Larger values give a more equalized curve, smaller values keep closer to the data. Some experimentation can help.

Custom Spider chart --> Display curves instead of lines between point on a polar plot in matplotlib

I have measured the positions of different products in different angles positions (6 values in steps of 60 deg. over a complete rotation). Instead of representing my values on a Cartesian graph where 0 and 360 are the same point, I want to use a polar graph.
With matplotlib, I got a spider chart type graph, but I want to avoid straight lines between points and display and extrapolated values between those. I have a solution that is kind of OK, but I was hoping there is a nice "one liner" I could use to have a more realistic representation or a better tangent handling for some points.
Does anyone have an idea to improve my code below ?
# Libraries
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
# Some data to play with
df = pd.DataFrame({'measure':[10, -5, 15,20,20, 20,15,5,10], 'angle':[0,45,90,135,180, 225, 270, 315,360]})
# The few lines I would like to avoid...
angles = [y/180*np.pi for x in [np.arange(x, x+45,5) for x in df.angle[:-1]] for y in x]
values = [y for x in [np.linspace(x, df.measure[i+1], 10)[:-1] for i, x in enumerate(df.measure[:-1])] for y in x]
angles.append(360/180*np.pi)
values.append(values[0])
# Initialise the spider plot
ax = plt.subplot(polar=True)
# Plot data
ax.plot(df.angle/180*np.pi, df['measure'], linewidth=1, linestyle='solid', label="Spider chart")
ax.plot(angles, values, linewidth=1, linestyle='solid', label='what I want')
ax.legend()
# Fill area
ax.fill(angles, values, 'b', alpha=0.1)
plt.show()
the result is below, I want something similar to the orange line with some kind of spline to avoid sharp corners I currently get
I have a solution that is a patchwork of other solutions. It needs to be cleaned and optimized, but it does the job !
Comments and improvements are always welcome, see below
# https://stackoverflow.com/questions/33962717/interpolating-a-closed-curve-using-scipy
from scipy import interpolate
x=df.measure[:-1] * np.cos(df.angle[:-1]/180*np.pi)
y=df.measure[:-1] * np.sin(df.angle[:-1]/180*np.pi)
x = np.r_[x, x[0]]
y = np.r_[y, y[0]]
# fit splines to x=f(u) and y=g(u), treating both as periodic. also note that s=0
# is needed in order to force the spline fit to pass through all the input points.
tck, u = interpolate.splprep([x, y], s=0, per=True)
# evaluate the spline fits for 1000 evenly spaced distance values
xi, yi = interpolate.splev(np.linspace(0, 1, 1000), tck)
def cart2pol(x, y):
rho = np.sqrt(x**2 + y**2)
phi = np.arctan2(y, x)
return(rho, phi)
# Initialise the spider plot
plt.figure(figsize=(12,8))
ax = plt.subplot(polar=True)
# Plot data
ax.plot(df.angle/180*np.pi, df['measure'], linewidth=1, linestyle='solid', label="Spider chart")
ax.plot(angles, values, linewidth=1, linestyle='solid', label='Interval linearisation')
ax.plot(cart2pol(xi, yi)[1], cart2pol(xi, yi)[0], linewidth=1, linestyle='solid', label='Smooth interpolation')
ax.legend()
# Fill area
ax.fill(angles, values, 'b', alpha=0.1)
plt.show()

Polar plot - Put one grid line in bold

I am trying to make use the polar plot projection to make a radar chart. I would like to know how to put only one grid line in bold (while the others should remain standard).
For my specific case, I would like to highlight the gridline associated to the ytick "0".
from matplotlib import pyplot as plt
import pandas as pd
import numpy as np
#Variables
sespi = pd.read_csv("country_progress.csv")
labels = sespi.country
progress = sespi.progress
angles=np.linspace(0, 2*np.pi, len(labels), endpoint=False)
#Concatenation to close the plots
progress=np.concatenate((progress,[progress[0]]))
angles=np.concatenate((angles,[angles[0]]))
#Polar plot
fig=plt.figure()
ax = fig.add_subplot(111, polar=True)
ax.plot(angles, progress, '.--', linewidth=1, c="g")
#ax.fill(angles, progress, alpha=0.25)
ax.set_thetagrids(angles * 180/np.pi, labels)
ax.set_yticklabels([-200,-150,-100,-50,0,50,100,150,200])
#ax.set_title()
ax.grid(True)
plt.show()
The gridlines of a plot are Line2D objects. Therefore you can't make it bold. What you can do (as shown, in part, in the other answer) is to increase the linewidth and change the colour but rather than plot a new line you can do this to the specified gridline.
You first need to find the index of the y tick labels which you want to change:
y_tick_labels = [-100,-10,0,10]
ind = y_tick_labels.index(0) # find index of value 0
You can then get a list of the gridlines using gridlines = ax.yaxis.get_gridlines(). Then use the index you found previously on this list to change the properties of the correct gridline.
Using the example from the gallery as a basis, a full example is shown below:
r = np.arange(0, 2, 0.01)
theta = 2 * np.pi * r
ax = plt.subplot(111, projection='polar')
ax.set_rmax(2)
ax.set_rticks([0.5, 1, 1.5, 2]) # less radial ticks
ax.set_rlabel_position(-22.5) # get radial labels away from plotted line
ax.grid(True)
y_tick_labels = [-100, -10, 0, 10]
ax.set_yticklabels(y_tick_labels)
ind = y_tick_labels.index(0) # find index of value 0
gridlines = ax.yaxis.get_gridlines()
gridlines[ind].set_color("k")
gridlines[ind].set_linewidth(2.5)
plt.show()
Which gives:
It is just a trick, but I guess you could just plot a circle and change its linewidth and color to whatever could be bold for you.
For example:
import matplotlib.pyplot as plt
import numpy as np
Yline = 0
Npoints = 300
angles = np.linspace(0,360,Npoints)*np.pi/180
line = 0*angles + Yline
ax = plt.subplot(111, projection='polar')
plt.plot(angles, line, color = 'k', linewidth = 3)
plt.ylim([-1,1])
plt.grid(True)
plt.show()
In this piece of code, I plot a line using plt.plot between any point of the two vectors angles and line. The former is actually all the angles between 0 and 2*np.pi. The latter is constant, and equal to the 'height' you want to plot that line Yline.
I suggest you try to decrease and increase Npoints while having a look to the documentaion of np.linspace() in order to understand your problem with the roundness of the circle.

How to draw a line through a scatter graph with no overflow

So I am currently plotting a scatter graph with many x and ys in matplotlib:
plt.scatter(x, y)
I want to draw a line on this scatter graph that crosses through the whole graph (i.e hits two 'borders') I know the gradient and the intercept - m and the c in the equation y = mx +c.
I have thought about acquiring the 4 points of the plot (calculating the min and max scatter x and ys) and from that calculating the min and max coords for the line and then plotting but that seems very convoluted. Is there any better way to do this bearing in mind the line may not even be 'within' the 'plot'?
Example of scatter graph:
as identified visually in the plot the four bordering coordinates are ruffly:
bottom left: -1,-2
top left: -1,2
bottom right: 6,-2
top right 6,2
I now have a line that I need to plot that must not exceed these boundaries but if it enters the plot must touch two of the boundary points.
So I could check what y equals when x = -1 and then check if that value is between -1 and 6 and if it is the line must cross the left border, so plot it, and so on and so fourth.
Ideally though I would create a line from -infinity to infinity and then crop it to fit the plot.
The idea here is to draw a line of some equation y=m*x+y0 into the plot. This can be achieved by transforming a horizontal line, originally given in axes coordinates, into data coordinates, applying the Affine2D transform according to the line equation and transforming back to screen coordinates.
The advantage here is that you do not need to know the axes limits at all. You may also freely zoom or pan your plot; the line will always stay within the axes boundaries. It hence effectively implements a line ranging from -infinity to + inifinty.
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.transforms as mtransforms
def axaline(m,y0, ax=None, **kwargs):
if not ax:
ax = plt.gca()
tr = mtransforms.BboxTransformTo(
mtransforms.TransformedBbox(ax.viewLim, ax.transScale)) + \
ax.transScale.inverted()
aff = mtransforms.Affine2D.from_values(1,m,0,0,0,y0)
trinv = ax.transData
line = plt.Line2D([0,1],[0,0],transform=tr+aff+trinv, **kwargs)
ax.add_line(line)
x = np.random.rand(20)*6-0.7
y = (np.random.rand(20)-.5)*4
c = (x > 3).astype(int)
fig, ax = plt.subplots()
ax.scatter(x,y, c=c, cmap="bwr")
# draw y=m*x+y0 into the plot
m = 0.4; y0 = -1
axaline(m,y0, ax=ax, color="limegreen", linewidth=5)
plt.show()
While this solution looks a bit complicated on first sight, one does not need to fully understand it. Just copy the axaline function to your code and use it as it is.
In order to get the automatic updating working without the transforms doing this, one may add callbacks which would reset the transform every time something changes in the plot.
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import transforms
class axaline():
def __init__(self, m,y0, ax=None, **kwargs):
if not ax: ax = plt.gca()
self.ax = ax
self.aff = transforms.Affine2D.from_values(1,m,0,0,0,y0)
self.line = plt.Line2D([0,1],[0,0], **kwargs)
self.update()
self.ax.add_line(self.line)
self.ax.callbacks.connect('xlim_changed', self.update)
self.ax.callbacks.connect('ylim_changed', self.update)
def update(self, evt=None):
tr = ax.transAxes - ax.transData
trinv = ax.transData
self.line.set_transform(tr+self.aff+trinv)
x = np.random.rand(20)*6-0.7
y = (np.random.rand(20)-.5)*4
c = (x > 3).astype(int)
fig, ax = plt.subplots()
ax.scatter(x,y, c=c, cmap="bwr")
# draw y=m*x+y0 into the plot
m = 0.4; y0 = -1
al = axaline(m,y0, ax=ax, color="limegreen", linewidth=5)
plt.show()
You may try:
import matplotlib.pyplot as plt
import numpy as np
m=3
c=-2
x1Data= np.random.normal(scale=2, loc=.4, size=25)
y1Data= np.random.normal(scale=3, loc=1.2, size=25)
x2Data= np.random.normal(scale=1, loc=3.4, size=25)
y2Data= np.random.normal(scale=.65, loc=-.2, size=25)
fig = plt.figure()
ax = fig.add_subplot( 1, 1, 1 )
ax.scatter(x1Data, y1Data)
ax.scatter(x2Data, y2Data)
ylim = ax.get_ylim()
xlim = ax.get_xlim()
ax.plot( xlim, [ m * x + c for x in xlim ], 'r:' )
ax.set_ylim( ylim )
ax.set_xlim( xlim )
plt.show()
which gives:

Half or quarter polar plots in Matplotlib?

I am trying to make a polar plot that goes 180 degrees instead of 360 in Matplotlib similar to http://www.mathworks.com/matlabcentral/fileexchange/27230-half-polar-coordinates-figure-plot-function-halfpolar in MATLAB. Any ideas?
The following works in matplotlib 2.1 or higher. There is also an example on the matplotlib page.
You may use a usual polar plot, ax = fig.add_subplot(111, polar=True) and confine the theta range. For a half polar plot
ax.set_thetamin(0)
ax.set_thetamax(180)
or for a quarter polar plot
ax.set_thetamin(0)
ax.set_thetamax(90)
Complete example:
import matplotlib.pyplot as plt
import numpy as np
theta = np.linspace(0,np.pi)
r = np.sin(theta)
fig = plt.figure()
ax = fig.add_subplot(111, polar=True)
c = ax.scatter(theta, r, c=r, s=10, cmap='hsv', alpha=0.75)
ax.set_thetamin(0)
ax.set_thetamax(180)
plt.show()
The example code in official matplotlib documentation may obscure things a little bit if someone just needs a simple quarter of half plot.
I wrote a code snippet that may help someone who is not that familiar with AxisArtists here.
"""
Reference:
1. https://gist.github.com/ycopin/3342888
2. http://matplotlib.org/mpl_toolkits/axes_grid/users/overview.html#axisartist
"""
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.projections import PolarAxes
from mpl_toolkits.axisartist.floating_axes import GridHelperCurveLinear, FloatingSubplot
import mpl_toolkits.axisartist.grid_finder as gf
def generate_polar_axes():
polar_trans = PolarAxes.PolarTransform()
# Setup the axis, here we map angles in degrees to angles in radius
phi_degree = np.arange(0, 90, 10)
tlocs = phi_degree * np.pi / 180
gl1 = gf.FixedLocator(tlocs) # Positions
tf1 = gf.DictFormatter(dict(zip(tlocs, map(str, phi_degree))))
# Standard deviation axis extent
radius_min = 0
radius_max = 1
# Set up the axes range in the parameter "extremes"
ghelper = GridHelperCurveLinear(polar_trans, extremes=(0, np.pi / 2, # 1st quadrant
radius_min, radius_max),
grid_locator1=gl1,
tick_formatter1=tf1,
)
figure = plt.figure()
floating_ax = FloatingSubplot(figure, 111, grid_helper=ghelper)
figure.add_subplot(floating_ax)
# Adjust axes
floating_ax.axis["top"].set_axis_direction("bottom") # "Angle axis"
floating_ax.axis["top"].toggle(ticklabels=True, label=True)
floating_ax.axis["top"].major_ticklabels.set_axis_direction("top")
floating_ax.axis["top"].label.set_axis_direction("top")
floating_ax.axis["top"].label.set_text("angle (deg)")
floating_ax.axis["left"].set_axis_direction("bottom") # "X axis"
floating_ax.axis["left"].label.set_text("radius")
floating_ax.axis["right"].set_axis_direction("top") # "Y axis"
floating_ax.axis["right"].toggle(ticklabels=True)
floating_ax.axis["right"].major_ticklabels.set_axis_direction("left")
floating_ax.axis["bottom"].set_visible(False) # Useless
# Contours along standard deviations
floating_ax.grid(True)
floating_ax.set_title("Quarter polar plot")
data_ax = floating_ax.get_aux_axes(polar_trans) # return the axes that can be plotted on
return figure, data_ax
if __name__ == "__main__":
# Plot data onto the defined polar axes
fig, ax = generate_polar_axes()
theta = np.random.rand(10) * np.pi / 2
radius = np.random.rand(10)
ax.scatter(theta, radius)
fig.savefig("test.png")

Categories