Apply distribution from all columns in a pandas df - python

I am trying to plot a multivariate distribution that is produced from multiple xy coordinates.
The code below aims to get each coordinate and apply it with a radius ([_Rad]). The COV matrix is then adjusted by scaling factor ([_Scaling]) to expand the radius in x-direction and contract in y-direction. The direction of this is measured by the rotation angle ([_Rotation]).
The output is expressed as a probability function, which represents the influence of each groups coordinates over a certain space.
Although, at present I can only get the code to apply this to the last set of coordinates in the df. So using the input below, only A3_X, A3_Y is working. A1_X, A1_Y, A2_X, A2_Y and B1_X, B1_Y, B2_X, B2_Y. Please see attached figure for a visual representation.
Note: Apologies for the long df. It was the only way to replicate my dataset.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as sts
def rot(theta):
theta = np.deg2rad(theta)
return np.array([
[np.cos(theta), -np.sin(theta)],
[np.sin(theta), np.cos(theta)]
])
def getcov(radius=1, scale=1, theta=0):
cov = np.array([
[radius*(scale + 1), 0],
[0, radius/(scale + 1)]
])
r = rot(theta)
return r # cov # r.T
def datalimits(*data, pad=.15):
dmin,dmax = min(d.values.min() for d in data), max(d.values.max() for d in data)
spad = pad*(dmax - dmin)
return dmin - spad, dmax + spad
d = ({
'Time' : [1],
'A1_Y' : [5883.102906],
'A1_X' : [3321.527705],
'A2_Y' : [5898.467202],
'A2_X' : [3328.331657],
'A3_Y' : [5886.270552],
'A3_X' : [3366.777169],
'B1_Y' : [5897.925245],
'B1_X' : [3297.143092],
'B2_Y' : [5905.137781],
'B2_X' : [3321.167842],
'B3_Y' : [5888.291025],
'B3_X' : [3347.263205],
'A1_Radius' : [10.3375199],
'A2_Radius' : [10.0171423],
'A3_Radius' : [11.42129333],
'B1_Radius' : [18.69514267],
'B2_Radius' : [10.65877044],
'B3_Radius' : [9.947025444],
'A1_Scaling' : [0.0716513620],
'A2_Scaling' : [0.0056262380],
'A3_Scaling' : [0.0677243260,],
'B1_Scaling' : [0.0364290850],
'B2_Scaling' : [0.0585827450],
'B3_Scaling' : [0.0432806750],
'A1_Rotation' : [20.58078926],
'A2_Rotation' : [173.5056346],
'A3_Rotation' : [36.23648405],
'B1_Rotation' : [79.81849817],
'B2_Rotation' : [132.2437404],
'B3_Rotation' : [44.28198078],
})
df = pd.DataFrame(data=d)
A_Y = df[df.columns[1::2][:3]]
A_X = df[df.columns[2::2][:3]]
B_Y = df[df.columns[7::2][:3]]
B_X = df[df.columns[8::2][:3]]
A_Radius = df[df.columns[13:16]]
B_Radius = df[df.columns[16:19]]
A_Scaling = df[df.columns[19:22]]
B_Scaling = df[df.columns[22:25]]
A_Rotation = df[df.columns[25:28]]
B_Rotation = df[df.columns[28:31]]
limitpad = .5
clevels = 5
cflevels = 50
xmin,xmax = datalimits(A_X, B_X, pad=limitpad)
ymin,ymax = datalimits(A_Y, B_Y, pad=limitpad)
X,Y = np.meshgrid(np.linspace(xmin, xmax), np.linspace(ymin, ymax))
fig = plt.figure(figsize=(10,6))
ax = plt.gca()
Zs = []
for l,color in zip('AB', ('red', 'blue')):
ax.plot(A_X.iloc[0], A_Y.iloc[0], '.', c='red', ms=10, label=l, alpha = 0.6)
ax.plot(B_X.iloc[0], B_Y.iloc[0], '.', c='blue', ms=10, label=l, alpha = 0.6)
Zrows = []
for _,row in df.iterrows():
for i in [1,2,3]:
x,y = row['{}{}_X'.format(l,i)], row['{}{}_Y'.format(l,i)]
cov = getcov(radius=row['{}{}_Radius'.format(l,i)],scale=row['{}{}_Scaling'.format(l,i)], theta=row['{}{}_Rotation'.format(l,i)])
mnorm = sts.multivariate_normal([x, y], cov)
Z = mnorm.pdf(np.stack([X, Y], 2))
Zrows.append(Z)
Zs.append(np.sum(Zrows, axis=0))
Z = Zs[0] - Zs[1]
normZ = Z - Z.min()
normZ = normZ/normZ.max()
cs = ax.contour(X, Y, normZ, levels=clevels, colors='w', alpha=.5)
ax.clabel(cs, fmt='%2.1f', colors='w')#, fontsize=14)
cfs = ax.contourf(X, Y, normZ, levels=cflevels, cmap='viridis', vmin=0, vmax=1)
cbar = fig.colorbar(cfs, ax=ax)
cbar.set_ticks([0, .2, .4, .6, .8, 1])
As you can see below. The code is only applying to A3_X, A3_Y and B3_X, B3_Y.
It's not applying to coordinates A1_X, A1_Y, A2_X, A2_Y and B1_X, B1_Y, B2_X, B2_Y.

There's an error in the way that you're iterating over the point data. The way that you have your dataframe organized makes it hard to figure out the appropriate way to iterate over the data, and makes it easy to run into errors of the kind you're getting. It would be better if your df was organized such that you could easily iterate over the subsets of your data representing each group A and B at each time. If you split out the times from your data dictionary d, here's how you can construct an easier to work with df:
import pandas as pd
time = [1]
d = ({
'A1_Y' : [5883.102906],
'A1_X' : [3321.527705],
'A2_Y' : [5898.467202],
'A2_X' : [3328.331657],
'A3_Y' : [5886.270552],
'A3_X' : [3366.777169],
'B1_Y' : [5897.925245],
'B1_X' : [3297.143092],
'B2_Y' : [5905.137781],
'B2_X' : [3321.167842],
'B3_Y' : [5888.291025],
'B3_X' : [3347.263205],
'A1_Radius' : [10.3375199],
'A2_Radius' : [10.0171423],
'A3_Radius' : [11.42129333],
'B1_Radius' : [18.69514267],
'B2_Radius' : [10.65877044],
'B3_Radius' : [9.947025444],
'A1_Scaling' : [0.0716513620],
'A2_Scaling' : [0.0056262380],
'A3_Scaling' : [0.0677243260,],
'B1_Scaling' : [0.0364290850],
'B2_Scaling' : [0.0585827450],
'B3_Scaling' : [0.0432806750],
'A1_Rotation' : [20.58078926],
'A2_Rotation' : [173.5056346],
'A3_Rotation' : [36.23648405],
'B1_Rotation' : [79.81849817],
'B2_Rotation' : [132.2437404],
'B3_Rotation' : [44.28198078],
})
# a list of tuples of the form ((time, group_id, point_id, value_label), value)
tuples = [((t, k.split('_')[0][0], int(k.split('_')[0][1]), k.split('_')[1]), v[i]) for k,v in d.items() for i,t in enumerate(time)]
df = pd.Series(dict(tuples)).unstack(-1)
df.index.names = ['time', 'group', 'id']
print(df)
Output:
Radius Rotation Scaling X Y
time group id
1 A 1 10.337520 20.580789 0.071651 3321.527705 5883.102906
2 10.017142 173.505635 0.005626 3328.331657 5898.467202
3 11.421293 36.236484 0.067724 3366.777169 5886.270552
B 1 18.695143 79.818498 0.036429 3297.143092 5897.925245
2 10.658770 132.243740 0.058583 3321.167842 5905.137781
3 9.947025 44.281981 0.043281 3347.263205 5888.291025
This will make it much easier to iterate over the subsets in your data. Here's how you would iterate over the sub-dataframes for each group at each timepoint:
for time, tdf in df.groupby('time'):
for group, gdf in tdf.groupby('group'):
...
Here's an updated version of my code from your previous question that uses this better-organized dataframe to create the plot you want at every time point:
for time,subdf in df.groupby('time'):
plotmvs(subdf)
Output:
Here's the complete code of the above plotmvs function:
import numpy as np
import pandas as pd
from mpl_toolkits.axes_grid1 import make_axes_locatable
import matplotlib.pyplot as plt
import scipy.stats as sts
def datalimits(*data, pad=.15):
dmin,dmax = min(d.min() for d in data), max(d.max() for d in data)
spad = pad*(dmax - dmin)
return dmin - spad, dmax + spad
def rot(theta):
theta = np.deg2rad(theta)
return np.array([
[np.cos(theta), -np.sin(theta)],
[np.sin(theta), np.cos(theta)]
])
def getcov(radius=1, scale=1, theta=0):
cov = np.array([
[radius*(scale + 1), 0],
[0, radius/(scale + 1)]
])
r = rot(theta)
return r # cov # r.T
def mvpdf(x, y, xlim, ylim, radius=1, velocity=0, scale=0, theta=0):
"""Creates a grid of data that represents the PDF of a multivariate gaussian.
x, y: The center of the returned PDF
(xy)lim: The extent of the returned PDF
radius: The PDF will be dilated by this factor
scale: The PDF be stretched by a factor of (scale + 1) in the x direction, and squashed by a factor of 1/(scale + 1) in the y direction
theta: The PDF will be rotated by this many degrees
returns: X, Y, PDF. X and Y hold the coordinates of the PDF.
"""
# create the coordinate grids
X,Y = np.meshgrid(np.linspace(*xlim), np.linspace(*ylim))
# stack them into the format expected by the multivariate pdf
XY = np.stack([X, Y], 2)
# displace xy by half the velocity
x,y = rot(theta) # (velocity/2, 0) + (x, y)
# get the covariance matrix with the appropriate transforms
cov = getcov(radius=radius, scale=scale, theta=theta)
# generate the data grid that represents the PDF
PDF = sts.multivariate_normal([x, y], cov).pdf(XY)
return X, Y, PDF
def mvpdfs(xs, ys, xlim, ylim, radius=None, velocity=None, scale=None, theta=None):
PDFs = []
for i,(x,y) in enumerate(zip(xs,ys)):
kwargs = {
'radius': radius[i] if radius is not None else 1,
'velocity': velocity[i] if velocity is not None else 0,
'scale': scale[i] if scale is not None else 0,
'theta': theta[i] if theta is not None else 0,
'xlim': xlim,
'ylim': ylim
}
X, Y, PDF = mvpdf(x, y, **kwargs)
PDFs.append(PDF)
return X, Y, np.sum(PDFs, axis=0)
def plotmvs(df, xlim=None, ylim=None, fig=None, ax=None):
"""Plot an xy point with an appropriately tranformed 2D gaussian around it.
Also plots other related data like the reference point.
"""
if xlim is None: xlim = datalimits(df['X'])
if ylim is None: ylim = datalimits(df['Y'])
if fig is None:
fig = plt.figure(figsize=(8,8))
ax = fig.gca()
elif ax is None:
ax = fig.gca()
PDFs = []
for (group,gdf),color in zip(df.groupby('group'), ('red', 'blue')):
# plot the xy points of each group
ax.plot(*gdf[['X','Y']].values.T, '.', c=color)
# fetch the PDFs of the 2D gaussian for each group
kwargs = {
'radius': gdf['Radius'].values if 'Radius' in gdf else None,
'velocity': gdf['Velocity'].values if 'Velocity' in gdf else None,
'scale': gdf['Scaling'].values if 'Scaling' in gdf else None,
'theta': gdf['Rotation'].values if 'Rotation' in gdf else None,
'xlim': xlim,
'ylim': ylim
}
X, Y, PDF = mvpdfs(gdf['X'].values, gdf['Y'].values, **kwargs)
PDFs.append(PDF)
# create the PDF for all points from the difference of the sums of the 2D Gaussians from group A and group B
PDF = PDFs[0] - PDFs[1]
# normalize PDF by shifting and scaling, so that the smallest value is 0 and the largest is 1
normPDF = PDF - PDF.min()
normPDF = normPDF/normPDF.max()
# plot and label the contour lines of the 2D gaussian
cs = ax.contour(X, Y, normPDF, levels=6, colors='w', alpha=.5)
ax.clabel(cs, fmt='%.3f', fontsize=12)
# plot the filled contours of the 2D gaussian. Set levels high for smooth contours
cfs = ax.contourf(X, Y, normPDF, levels=50, cmap='viridis')
# create the colorbar and ensure that it goes from 0 -> 1
divider = make_axes_locatable(ax)
cax = divider.append_axes("right", size="5%", pad=0.1)
cbar = fig.colorbar(cfs, ax=ax, cax=cax)
cbar.set_ticks([0, .2, .4, .6, .8, 1])
# ensure that x vs y scaling doesn't disrupt the transforms applied to the 2D gaussian
ax.set_aspect('equal', 'box')
return fig, ax

Simply adjust indentation especially with middle inner nested for loop and reset Zrows list when iterating across data frame rows. See comments in code for specific changes:
...
for _, row in df.iterrows():
# MOVE ZROWS INSIDE
Zrows = []
for i in [1,2,3]:
x,y = row['{}{}_X'.format(l,i)], row['{}{}_Y'.format(l,i)]
# INDENT cov AND LATER CALCS TO RUN ACROSS ALL 1,2,3
cov = getcov(radius=row['{}{}_Radius'.format(l,i)],
scale=row['{}{}_Scaling'.format(l,i)],
theta=row['{}{}_Rotation'.format(l,i)])
mnorm = sts.multivariate_normal([x, y], cov)
Z = mnorm.pdf(np.stack([X, Y], 2))
# APPEND TO BE CLEANED OUT WITH EACH ROW
Zrows.append(Z)
Zs.append(np.sum(Zrows, axis=0))
...

There is a lot going on in this code. A small thing I noticed was that it looks like you are not using the df.columns indexing correctly. If you look at A_Y the output is:
A1_Rotation A1_X A2_Radius
0 20.580789 3321.527705 10.017142
I think you are mixing columns. Maybe use df[['A1_Y', 'A2_Y', 'A3_Y']] to get the exact columns or just put all the A_Y values into a single column.

Related

Converting 1D distribution into matplotlib gradient

I have a 1D distribution (x values vs probability, shown below) and I would like to convert that to a 2D plot like the one shown below in which the color gradient is based on the values probabilities.
Currently, my code just plot in a qualitative manner because I am manually defining the array v1 and the color list. I tried my best to crack this and understand how to do it, but I failed. Does anyone have a suggestion?
def gradient_image(ax, extent, direction=0.3, cmap_range=(0, 1), **kwargs):
"""
Draw a gradient image based on a colormap.
Parameters
----------
ax : Axes
The axes to draw on.
extent
The extent of the image as (xmin, xmax, ymin, ymax).
By default, this is in Axes coordinates but may be
changed using the *transform* keyword argument.
direction : float
The direction of the gradient. This is a number in
range 0 (=vertical) to 1 (=horizontal).
cmap_range : float, float
The fraction (cmin, cmax) of the colormap that should be
used for the gradient, where the complete colormap is (0, 1).
**kwargs
Other parameters are passed on to `.Axes.imshow()`.
In particular useful is *cmap*.
"""
phi = direction * np.pi / 2
v = np.array([np.cos(phi), np.sin(phi)])
X = np.array([[v # [1, 0], v # [1, 1]],
[v # [0, 0], v # [0, 1]]])
a, b = cmap_range
X = a + (b - a) / X.max() * X
im = ax.imshow(X, extent=extent, interpolation='bicubic',
vmin=0, vmax=1, **kwargs)
return im
v1 = [0, 0.15, 0.5, 0.85, 1.0] # | Those two lines here
b = ["white","lightblue", "dodgerblue","lightblue", "white"] # | were the best I could do
bl = list(zip(v1,b))
blue_grad=LinearSegmentedColormap.from_list('custom',bl, N=256)
xmin, xmax = xlim = 0, 4
ymin, ymax = ylim = -300, 300
fig, ax = plt.subplots()
ax.set(xlim=xlim, ylim=ylim, autoscale_on=False)
gradient_image(ax, direction=1, extent=(0 , 2, -300, 300), cmap=blue_grad, cmap_range=(0., 1), alpha=0.5)
Here is a minimal example with a gaussian distribution (code for generating the gaussian distribution was adapted from this):
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as stats
mu=0 #Create gaussian distribution
sigma=1
x = np.linspace(mu - 3*sigma, mu + 3*sigma, 100)
y=stats.norm.pdf(x, mu, sigma)
y_expand=np.expand_dims(y, axis=0) #expanding dimensions to use imshow
plt.plot(x, stats.norm.pdf(x, mu, sigma),color='k',lw=3,ls='--')# plot distribution
extent = np.min(x), np.max(x), np.min(y), np.max(y)
plt.imshow(y_expand,interpolation=None,aspect='auto',cmap='Blues',extent=extent) #plot imshow
plt.colorbar()
plt.show()

How do I fill/shade a cetain section of my graph in python?

I have a function that I'd like to plot in python and shade the region of interest. I've tried using pyplot.fill_between() but can not quite get what I want. I've attached an image and shaded in orange the region I want to be filled:
I plot the function (in blue) and then the graph is bounded by y=0, y ≈ 0.05 and x = 0.And I wish to shade the relevant region (in orange).
Any tips as to how to go about this?
Thanks in advance.
import numpy as np
import matplotlib.pyplot as plt
def fn (M, r_min):
d = (1- 2*M/ r_min)
x = M/(r_min)**2
A_0 = d**-0.5
A_dot = np.arange(-0.6,0.5,0.0001) #X axis
a = np.zeros(len(A_dot))
for i in range(1,len(A_dot)):
a[i] = -3*d*A_dot[i]**2 -2*x*A_dot[i] + A_0**2*x**2 #Y axis
plt.plot(A_dot, a)
plt.xlim(-0.55,0.55)
plt.axhline(y = 0, color='black', linestyle='--')
plt.axhline(y = 0.049382716, color = 'black', linestyle = '--')
plt.axvline(x = 0,color ='black', linestyle = '--')
idx = np.argwhere(np.diff(np.sign(a))).flatten() #Finding intersection on x+axis
plt.plot(A_dot[idx], a[idx], 'ro')
plt.xlabel('$\\frac{dA_0}{d\tau}$')
plt.ylabel('$|a|^2$')
plt.show()
return(A_dot,a)
fn(1,3)
You need to give the x and y vectors as inputs to fill_between. To do that, you can define a mask selecting between the interception point and 0 (add to your fn function):
x_min = A_dot[idx[1]]
x_max = 0.0
mask_x = np.logical_and(A_dot >= x_min, A_dot <= x_max)
plt.fill_between(x=A_dot[mask_x], y1=a[mask_x], y2=0, color='orange')
Result:

Draw error shading bands on line plot - python

Let's say I have 25 lines like this:
x = np.linspace(0, 30, 60)
y = np.sin(x/6*np.pi)
error = np.random.normal(0.1, 0.02, size=y.shape)
y1 = y+ np.random.normal(0, 0.1, size=y.shape)
y2= y+ np.random.normal(0, 0.1, size=y.shape)
plt.plot(x, y, 'k-')
plt.plot(x, y1, 'k-')
plt.plot(x, y2,'k-')
.
.
.
Now, I'd like to make a plot like this: . How do I automatically make these error bars and make the shading given just a bunch of lines, all carrying the same overall shape but with slight variations.
It is not very clear to me how the error variable in your code sample relates to the variations of the y variables. So here I give an example of how to compute and draw an error band based on the random variations of 25 y variables, and I use these same variations to create y error bars on top of the band. The same logic would apply to variations/errors on the x-axis.
Let's first create some random data and see what a line plot of 25 similar lines looks like:
import numpy as np # v 1.19.2
import matplotlib.pyplot as plt # v 3.3.2
rng = np.random.default_rng(seed=1)
x = np.linspace(0, 5*np.pi, 50)
y = np.sin(x)
# error = np.random.normal(0.1, 0.02, size=x.shape) # I leave this out
nb_yfuncs = 25
ynoise = rng.normal(1, 0.1, size=(nb_yfuncs, y.size))
yfuncs = nb_yfuncs*[y] + ynoise
fig, ax = plt.subplots(figsize=(10,4))
for yfunc in yfuncs:
plt.plot(x, yfunc, 'k-')
plt.show()
I use the mean of yfuncs as the baseline variable. I extract the minimum and maximum of yfuncs for each x to compute the error band. I compute error bars that cover the same extent as the error band. Therefore, the errors are asymmetrical relative to the mean which is why they are entered as a 2-D array in the plotting function. The error band is drawn with fill_between and the error bars with errorbar. Here is what the code looks like:
ymean = yfuncs.mean(axis=0)
ymin = yfuncs.min(axis=0)
ymax = yfuncs.max(axis=0)
yerror = np.stack((ymean-ymin, ymax-ymean))
fig, ax = plt.subplots(figsize=(10,4))
plt.fill_between(x, ymin, ymax, alpha=0.2, label='error band')
plt.errorbar(x, ymean, yerror, color='tab:blue', ecolor='tab:blue',
capsize=3, linewidth=1, label='mean with error bars')
plt.legend()
plt.show()
You can do it only with matplot lib as follows:
def plot_with_error_bands(x: np.ndarray, y: np.ndarray, yerr: np.ndarray,
xlabel: str, ylabel: str,
title: str,
curve_label: Optional[str] = None,
error_band_label: Optional[str] = None,
color: Optional[str] = None, ecolor: Optional[str] = None,
linewidth: float = 1.0,
style: Optional[str] = 'default',
capsize: float = 3.0,
alpha: float = 0.2,
show: bool = False
):
"""
note:
- example values for color and ecolor:
color='tab:blue', ecolor='tab:blue'
- capsize is the length of the horizontal line for the error bar. Larger number makes it longer horizontally.
- alpha value create than 0.2 make the error bands color for filling it too dark. Really consider not changing.
- sample values for curves and error_band labels:
curve_label: str = 'mean with error bars',
error_band_label: str = 'error band',
refs:
- for making the seaborn and matplot lib look the same see: https://stackoverflow.com/questions/54522709/my-seaborn-and-matplotlib-plots-look-the-same
"""
if style == 'default':
# use the standard matplotlib
plt.style.use("default")
elif style == 'seaborn' or style == 'sns':
# looks idential to seaborn
import seaborn as sns
sns.set()
elif style == 'seaborn-darkgrid':
# uses the default colours of matplot but with blue background of seaborn
plt.style.use("seaborn-darkgrid")
elif style == 'ggplot':
# other alternative to something that looks like seaborn
plt.style.use('ggplot')
# ax = plt.gca()
# fig = plt.gcf(
# fig, axs = plt.subplots(nrows=1, ncols=1, sharex=True, tight_layout=True)
plt.errorbar(x=x, y=y, yerr=yerr, color=color, ecolor=ecolor,
capsize=capsize, linewidth=linewidth, label=curve_label)
plt.fill_between(x=x, y1=y - yerr, y2=y + yerr, alpha=alpha, label=error_band_label)
plt.grid(True)
if curve_label or error_band_label:
plt.legend()
plt.title(title)
plt.xlabel(xlabel)
plt.ylabel(ylabel)
if show:
plt.show()
e.g.
def plot_with_error_bands_test():
import numpy as np # v 1.19.2
import matplotlib.pyplot as plt # v 3.3.2
# the number of x values to consider in a given range e.g. [0,1] will sample 10 raw features x sampled at in [0,1] interval
num_x: int = 30
# the repetitions for each x feature value e.g. multiple measurements for sample x=0.0 up to x=1.0 at the end
rep_per_x: int = 5
total_size_data_set: int = num_x * rep_per_x
print(f'{total_size_data_set=}')
# - create fake data set
# only consider 10 features from 0 to 1
x = np.linspace(start=0.0, stop=2*np.pi, num=num_x)
# to introduce fake variation add uniform noise to each feature and pretend each one is a new observation for that feature
noise_uniform: np.ndarray = np.random.rand(rep_per_x, num_x)
# same as above but have the noise be the same for each x (thats what the 1 means)
noise_normal: np.ndarray = np.random.randn(rep_per_x, 1)
# signal function
sin_signal: np.ndarray = np.sin(x)
cos_signal: np.ndarray = np.cos(x)
# [rep_per_x, num_x]
y1: np.ndarray = sin_signal + noise_uniform + noise_normal
y2: np.ndarray = cos_signal + noise_uniform + noise_normal
y1mean = y1.mean(axis=0)
y1err = y1.std(axis=0)
y2mean = y2.mean(axis=0)
y2err = y2.std(axis=0)
plot_with_error_bands(x=x, y=y1mean, yerr=y1err, xlabel='x', ylabel='y', title='Custom Seaborn')
plot_with_error_bands(x=x, y=y2mean, yerr=y2err, xlabel='x', ylabel='y', title='Custom Seaborn')
plt.show()
looks as follows:
if you want to use seaborn check this question out: How to show error bands for pure matrices [Samples, X_Range] with Seaborn error bands?

Plot scaled and rotated bivariate distribution using matplotlib

I am trying to plot a bivariate gaussian distribution using matplotlib. I want to do this using the xy coordinates of two scatter points (Group A), (Group B).
I want to adjust the distribution by adjusting the COV matrix to account for each Groups velocity and their distance to an additional xy coordinate used as a reference point.
I've calculated the distance of each groups xy coordinate to that of the reference point. The distance is expressed as a radius, labelled [GrA_Rad],[GrB_Rad].
So the further they are away from the reference point the greater the radius. I've also calculated velocity labelled [GrA_Vel],[GrB_Vel]. The direction of each group is expressed as the orientation. This is labelled [GrA_Rotation],[GrB_Rotation]
Question on how I want the distribution to be adjusted for velocity and distance (radius):
I'm hoping to use SVD. Specifically, if I have the rotation angle of each scatter, this provides the direction. The velocity can be used to describe a scaling matrix [GrA_Scaling],[GrB_Scaling]. So this scaling matrix can be used to expand the radius in the x-direction and contract the radius in the y-direction. This expresses the COV matrix.
Finally, the distribution mean value is found by translating the groups location (x,y) by half the velocity.
Put simply: the radius is applied to each group's scatter point. The COV matrix is adjusted by the radius and velocity. So using the scaling matrix to expand the radius in x-direction and contract in y-direction. The direction is measured from the rotation angle. Then determine the distribution mean value by translating the groups location (x,y) by half the velocity.
Below is the df of these variables
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.animation as animation
d = ({
'Time' : [1,2,3,4,5,6,7,8],
'GrA_X' : [10,12,17,16,16,14,12,8],
'GrA_Y' : [10,12,13,7,6,7,8,8],
'GrB_X' : [5,8,13,16,19,15,13,5],
'GrB_Y' : [6,15,12,7,8,9,10,8],
'Reference_X' : [6,8,14,18,13,11,16,15],
'Reference_Y' : [10,12,8,12,15,12,10,8],
'GrA_Rad' : [8.3,8.25,8.2,8,8.15,8.15,8.2,8.3],
'GrB_Rad' : [8.3,8.25,8.3,8.4,8.6,8.4,8.3,8.65],
'GrA_Vel' : [0,2.8,5.1,6.1,1.0,2.2,2.2,4.0],
'GrB_Vel' : [0,9.5,5.8,5.8,3.16,4.12,2.2,8.2],
'GrA_Scaling' : [0,0.22,0.39,0.47,0.07,0.17,0.17,0.31],
'GrB_Scaling' : [0,0.53,0.2,0.2,0.06,0.1,0.03,0.4],
'GrA_Rotation' : [0,45,23.2,-26.56,-33.69,-36.86,-45,-135],
'GrB_Rotation' : [0,71.6,36.87,5.2,8.13,16.70,26.57,90],
})
df = pd.DataFrame(data = d)
I've made an animated plot of each xy coordinate.
GrA_X = [10,12,17,16,16,14,12,8]
GrA_Y = [10,12,13,7,6,7,8,8]
GrB_X = [5,8,13,16,19,15,13,5]
GrB_Y = [6,15,12,10,8,9,10,8]
Item_X = [6,8,14,18,13,11,16,15]
Item_Y = [10,12,8,12,15,12,10,8]
scatter_GrA = ax.scatter(GrA_X, GrA_Y)
scatter_GrB = ax.scatter(GrB_X, GrB_Y)
scatter_Item = ax.scatter(Item_X, Item_Y)
def animate(i) :
scatter_GrA.set_offsets([[GrA_X[0+i], GrA_Y[0+i]]])
scatter_GrB.set_offsets([[GrB_X[0+i], GrB_Y[0+i]]])
scatter_Item.set_offsets([[Item_X[0+i], Item_Y[0+i]]])
ani = animation.FuncAnimation(fig, animate, np.arange(0,9),
interval = 1000, blit = False)
Update
The question has been updated, and has gotten somewhat clearer. I've updated my code to match. Here's the latest output:
Aside from the styling, I think this matches what the OP described.
Here's the code that was used to produce the above plot:
dfake = ({
'GrA_X' : [15,15],
'GrA_Y' : [15,15],
'Reference_X' : [15,3],
'Reference_Y' : [15,15],
'GrA_Rad' : [15,25],
'GrA_Vel' : [0,10],
'GrA_Scaling' : [0,0.5],
'GrA_Rotation' : [0,45]
})
dffake = pd.DataFrame(dfake)
fig,axs = plt.subplots(1, 2, figsize=(16,8))
fig.subplots_adjust(0,0,1,1)
plotone(dffake, 'A', 0, xlim=(0,30), ylim=(0,30), fig=fig, ax=axs[0])
plotone(dffake, 'A', 1, xlim=(0,30), ylim=(0,30), fig=fig, ax=axs[1])
plt.show()
and the complete implementation of the plotone function that I used is in the code block below. If you just want to know about the math used to generate and transform the 2D gaussian PDF, check out the mvpdf function (and the rot and getcov functions it depends on):
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as sts
def rot(theta):
theta = np.deg2rad(theta)
return np.array([
[np.cos(theta), -np.sin(theta)],
[np.sin(theta), np.cos(theta)]
])
def getcov(radius=1, scale=1, theta=0):
cov = np.array([
[radius*(scale + 1), 0],
[0, radius/(scale + 1)]
])
r = rot(theta)
return r # cov # r.T
def mvpdf(x, y, xlim, ylim, radius=1, velocity=0, scale=0, theta=0):
"""Creates a grid of data that represents the PDF of a multivariate gaussian.
x, y: The center of the returned PDF
(xy)lim: The extent of the returned PDF
radius: The PDF will be dilated by this factor
scale: The PDF be stretched by a factor of (scale + 1) in the x direction, and squashed by a factor of 1/(scale + 1) in the y direction
theta: The PDF will be rotated by this many degrees
returns: X, Y, PDF. X and Y hold the coordinates of the PDF.
"""
# create the coordinate grids
X,Y = np.meshgrid(np.linspace(*xlim), np.linspace(*ylim))
# stack them into the format expected by the multivariate pdf
XY = np.stack([X, Y], 2)
# displace xy by half the velocity
x,y = rot(theta) # (velocity/2, 0) + (x, y)
# get the covariance matrix with the appropriate transforms
cov = getcov(radius=radius, scale=scale, theta=theta)
# generate the data grid that represents the PDF
PDF = sts.multivariate_normal([x, y], cov).pdf(XY)
return X, Y, PDF
def plotmv(x, y, xlim=None, ylim=None, radius=1, velocity=0, scale=0, theta=0, xref=None, yref=None, fig=None, ax=None):
"""Plot an xy point with an appropriately tranformed 2D gaussian around it.
Also plots other related data like the reference point.
"""
if xlim is None: xlim = (x - 5, x + 5)
if ylim is None: ylim = (y - 5, y + 5)
if fig is None:
fig = plt.figure(figsize=(8,8))
ax = fig.gca()
elif ax is None:
ax = fig.gca()
# plot the xy point
ax.plot(x, y, '.', c='C0', ms=20)
if not (xref is None or yref is None):
# plot the reference point, if supplied
ax.plot(xref, yref, '.', c='w', ms=12)
# plot the arrow leading from the xy point
if velocity > 0:
ax.arrow(x, y, *rot(theta) # (velocity, 0),
width=.4, length_includes_head=True, ec='C0', fc='C0')
# fetch the PDF of the 2D gaussian
X, Y, PDF = mvpdf(x, y, xlim=xlim, ylim=ylim, radius=radius, velocity=velocity, scale=scale, theta=theta)
# normalize PDF by shifting and scaling, so that the smallest value is 0 and the largest is 1
normPDF = PDF - PDF.min()
normPDF = normPDF/normPDF.max()
# plot and label the contour lines of the 2D gaussian
cs = ax.contour(X, Y, normPDF, levels=6, colors='w', alpha=.5)
ax.clabel(cs, fmt='%.3f', fontsize=12)
# plot the filled contours of the 2D gaussian. Set levels high for smooth contours
cfs = ax.contourf(X, Y, normPDF, levels=50, cmap='viridis', vmin=-.9, vmax=1)
# create the colorbar and ensure that it goes from 0 -> 1
cbar = fig.colorbar(cfs, ax=ax)
cbar.set_ticks([0, .2, .4, .6, .8, 1])
# add some labels
ax.grid()
ax.set_xlabel('X distance (M)')
ax.set_ylabel('Y distance (M)')
# ensure that x vs y scaling doesn't disrupt the transforms applied to the 2D gaussian
ax.set_aspect('equal', 'box')
return fig, ax
def fetchone(df, l, i, **kwargs):
"""Fetch all the needed data for one xy point
"""
keytups = (
('x', 'Gr%s_X'%l),
('y', 'Gr%s_Y'%l),
('radius', 'Gr%s_Rad'%l),
('velocity', 'Gr%s_Vel'%l),
('scale', 'Gr%s_Scaling'%l),
('theta', 'Gr%s_Rotation'%l),
('xref', 'Reference_X'),
('yref', 'Reference_Y')
)
ret = {k:df.loc[i, l] for k,l in keytups}
# add in any overrides
ret.update(kwargs)
return ret
def plotone(df, l, i, xlim=None, ylim=None, fig=None, ax=None, **kwargs):
"""Plot exactly one point from the dataset
"""
# look up all the data to plot one datapoint
xydata = fetchone(df, l, i, **kwargs)
# do the plot
return plotmv(xlim=xlim, ylim=ylim, fig=fig, ax=ax, **xydata)
Old answer -2
I've adjusted my answer to match the example the OP posted:
Here's the code that produced the above image:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as sts
def rot(theta):
theta = np.deg2rad(theta)
return np.array([
[np.cos(theta), -np.sin(theta)],
[np.sin(theta), np.cos(theta)]
])
def getcov(radius=1, scale=1, theta=0):
cov = np.array([
[radius*(scale + 1), 0],
[0, radius/(scale + 1)]
])
r = rot(theta)
return r # cov # r.T
def datalimits(*data, pad=.15):
dmin,dmax = min(d.min() for d in data), max(d.max() for d in data)
spad = pad*(dmax - dmin)
return dmin - spad, dmax + spad
d = ({
'Time' : [1,2,3,4,5,6,7,8],
'GrA_X' : [10,12,17,16,16,14,12,8],
'GrA_Y' : [10,12,13,7,6,7,8,8],
'GrB_X' : [5,8,13,16,19,15,13,5],
'GrB_Y' : [6,15,12,7,8,9,10,8],
'Reference_X' : [6,8,14,18,13,11,16,15],
'Reference_Y' : [10,12,8,12,15,12,10,8],
'GrA_Rad' : [8.3,8.25,8.2,8,8.15,8.15,8.2,8.3],
'GrB_Rad' : [8.3,8.25,8.3,8.4,8.6,8.4,8.3,8.65],
'GrA_Vel' : [0,2.8,5.1,6.1,1.0,2.2,2.2,4.0],
'GrB_Vel' : [0,9.5,5.8,5.8,3.16,4.12,2.2,8.2],
'GrA_Scaling' : [0,0.22,0.39,0.47,0.07,0.17,0.17,0.31],
'GrB_Scaling' : [0,0.53,0.2,0.2,0.06,0.1,0.03,0.4],
'GrA_Rotation' : [0,45,23.2,-26.56,-33.69,-36.86,-45,-135],
'GrB_Rotation' : [0,71.6,36.87,5.2,8.13,16.70,26.57,90],
})
df = pd.DataFrame(data=d)
limitpad = .5
clevels = 5
cflevels = 50
xmin,xmax = datalimits(df['GrA_X'], df['GrB_X'], pad=limitpad)
ymin,ymax = datalimits(df['GrA_Y'], df['GrB_Y'], pad=limitpad)
X,Y = np.meshgrid(np.linspace(xmin, xmax), np.linspace(ymin, ymax))
fig = plt.figure(figsize=(10,6))
ax = plt.gca()
Zs = []
for l,color in zip('AB', ('red', 'yellow')):
# plot all of the points from a single group
ax.plot(df['Gr%s_X'%l], df['Gr%s_Y'%l], '.', c=color, ms=15, label=l)
Zrows = []
for _,row in df.iterrows():
x,y = row['Gr%s_X'%l], row['Gr%s_Y'%l]
cov = getcov(radius=row['Gr%s_Rad'%l], scale=row['Gr%s_Scaling'%l], theta=row['Gr%s_Rotation'%l])
mnorm = sts.multivariate_normal([x, y], cov)
Z = mnorm.pdf(np.stack([X, Y], 2))
Zrows.append(Z)
Zs.append(np.sum(Zrows, axis=0))
# plot the reference points
# create Z from the difference of the sums of the 2D Gaussians from group A and group B
Z = Zs[0] - Zs[1]
# normalize Z by shifting and scaling, so that the smallest value is 0 and the largest is 1
normZ = Z - Z.min()
normZ = normZ/normZ.max()
# plot and label the contour lines
cs = ax.contour(X, Y, normZ, levels=clevels, colors='w', alpha=.5)
ax.clabel(cs, fmt='%2.1f', colors='w')#, fontsize=14)
# plot the filled contours. Set levels high for smooth contours
cfs = ax.contourf(X, Y, normZ, levels=cflevels, cmap='viridis', vmin=0, vmax=1)
# create the colorbar and ensure that it goes from 0 -> 1
cbar = fig.colorbar(cfs, ax=ax)
cbar.set_ticks([0, .2, .4, .6, .8, 1])
ax.set_aspect('equal', 'box')
Old answer -1
It's a little hard to tell exactly what you're after. It is possible to scale and rotate a multivariate gaussian distribution via its covariance matrix. Here's an example of how to do so based on your data:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as sts
def rot(theta):
theta = np.deg2rad(theta)
return np.array([
[np.cos(theta), -np.sin(theta)],
[np.sin(theta), np.cos(theta)]
])
def getcov(scale, theta):
cov = np.array([
[1*(scale + 1), 0],
[0, 1/(scale + 1)]
])
r = rot(theta)
return r # cov # r.T
d = ({
'Time' : [1,2,3,4,5,6,7,8],
'GrA_X' : [10,12,17,16,16,14,12,8],
'GrA_Y' : [10,12,13,7,6,7,8,8],
'GrB_X' : [5,8,13,16,19,15,13,5],
'GrB_Y' : [6,15,12,7,8,9,10,8],
'Reference_X' : [6,8,14,18,13,11,16,15],
'Reference_Y' : [10,12,8,12,15,12,10,8],
'GrA_Rad' : [8.3,8.25,8.2,8,8.15,8.15,8.2,8.3],
'GrB_Rad' : [8.3,8.25,8.3,8.4,8.6,8.4,8.3,8.65],
'GrA_Vel' : [0,2.8,5.1,6.1,1.0,2.2,2.2,4.0],
'GrB_Vel' : [0,9.5,5.8,5.8,3.16,4.12,2.2,8.2],
'GrA_Scaling' : [0,0.22,0.39,0.47,0.07,0.17,0.17,0.31],
'GrB_Scaling' : [0,0.53,0.2,0.2,0.06,0.1,0.03,0.4],
'GrA_Rotation' : [0,45,23.2,-26.56,-33.69,-36.86,-45,-135],
'GrB_Rotation' : [0,71.6,36.87,5.2,8.13,16.70,26.57,90],
})
df = pd.DataFrame(data=d)
xmin,xmax = min(df['GrA_X'].min(), df['GrB_X'].min()), max(df['GrA_X'].max(), df['GrB_X'].max())
ymin,ymax = min(df['GrA_Y'].min(), df['GrB_Y'].min()), max(df['GrA_Y'].max(), df['GrB_Y'].max())
X,Y = np.meshgrid(
np.linspace(xmin - (xmax - xmin)*.1, xmax + (xmax - xmin)*.1),
np.linspace(ymin - (ymax - ymin)*.1, ymax + (ymax - ymin)*.1)
)
fig,axs = plt.subplots(df.shape[0], sharex=True, figsize=(4, 4*df.shape[0]))
fig.subplots_adjust(0,0,1,1,0,-.82)
for (_,row),ax in zip(df.iterrows(), axs):
for c in 'AB':
x,y = row['Gr%s_X'%c], row['Gr%s_Y'%c]
cov = getcov(scale=row['Gr%s_Scaling'%c], theta=row['Gr%s_Rotation'%c])
mnorm = sts.multivariate_normal([x, y], cov)
Z = mnorm.pdf(np.stack([X, Y], 2))
ax.contour(X, Y, Z)
ax.plot(row['Gr%s_X'%c], row['Gr%s_Y'%c], 'x')
ax.set_aspect('equal', 'box')
This outputs:

matplotlib: continuous colormap fill between two lines

It's possible to fill between lines with a color:
http://matplotlib.sourceforge.net/examples/pylab_examples/fill_between_demo.html
It's also possible to use a continuous colormap for a line:
http://matplotlib.sourceforge.net/examples/pylab_examples/multicolored_line.html
Is it possible (and reasonably easy) to use a continuous colormap for the colored fill between two lines? For example, the color fill may change along x based on the difference between the two lines at x (or based on another set of data).
I found a solution to this problem. It builds on the brilliant but hacky solution of #Hooked. You create a 2D grid filed from lots of small boxes. It's not the fastest solution but it should be pretty flexible (more so than solutions which apply imshow to the patches).
import numpy as np
import pylab as plt
#Plot a rectangle
def rect(ax, x, y, w, h, c,**kwargs):
#Varying only in x
if len(c.shape) is 1:
rect = plt.Rectangle((x, y), w, h, color=c, ec=c,**kwargs)
ax.add_patch(rect)
#Varying in x and y
else:
#Split into a number of bins
N = c.shape[0]
hb = h/float(N); yl = y
for i in range(N):
yl += hb
rect = plt.Rectangle((x, yl), w, hb,
color=c[i,:], ec=c[i,:],**kwargs)
ax.add_patch(rect)
#Fill a contour between two lines
def rainbow_fill_between(ax, X, Y1, Y2, colors=None,
cmap=plt.get_cmap("Reds"),**kwargs):
plt.plot(X,Y1,lw=0) # Plot so the axes scale correctly
dx = X[1]-X[0]
N = X.size
#Pad a float or int to same size as x
if (type(Y2) is float or type(Y2) is int):
Y2 = np.array([Y2]*N)
#No colors -- specify linear
if colors is None:
colors = []
for n in range(N):
colors.append(cmap(n/float(N)))
#Varying only in x
elif len(colors.shape) is 1:
colors = cmap((colors-colors.min())
/(colors.max()-colors.min()))
#Varying only in x and y
else:
cnp = np.array(colors)
colors = np.empty([colors.shape[0],colors.shape[1],4])
for i in range(colors.shape[0]):
for j in range(colors.shape[1]):
colors[i,j,:] = cmap((cnp[i,j]-cnp[:,:].min())
/(cnp[:,:].max()-cnp[:,:].min()))
colors = np.array(colors)
#Create the patch objects
for (color,x,y1,y2) in zip(colors,X,Y1,Y2):
rect(ax,x,y2,dx,y1-y2,color,**kwargs)
# Some Test data
X = np.linspace(0,10,100)
Y1 = .25*X**2 - X
Y2 = X
g = np.exp(-.3*(X-5)**2)
#Plot fill and curves changing in x only
fig, axs =plt.subplots(1,2)
colors = g
rainbow_fill_between(axs[0],X,Y1,Y2,colors=colors)
axs[0].plot(X,Y1,'k-',lw=4)
axs[0].plot(X,Y2,'k-',lw=4)
#Plot fill and curves changing in x and y
colors = np.outer(g,g)
rainbow_fill_between(axs[1],X,Y1,Y2,colors=colors)
axs[1].plot(X,Y1,'k-',lw=4)
axs[1].plot(X,Y2,'k-',lw=4)
plt.show()
The result is,
Your solution is great and flexible ! In particular the 2D case is really nice. Such a feature could be added to fill_between maybe if the colors kwargs of the function would accept an array of the same length of x and y ?
Here is a simpler case for the 1D case using the fill_between function. It does the same but as it use trapezes instead of rectangle the result is smoother.
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import norm
# Select a color map
cmap = mpl.cm.bwr
# Some Test data
npts = 100
x = np.linspace(-4, 4, npts)
y = norm.pdf(x)
z = np.sin(2 * x)
normalize = mpl.colors.Normalize(vmin=z.min(), vmax=z.max())
# The plot
fig = plt.figure()
ax = fig.add_axes([0.12, 0.12, 0.68, 0.78])
plt.plot(x, y, color="gray")
for i in range(npts - 1):
plt.fill_between([x[i], x[i+1]], [y[i], y[i+1]], color=cmap(normalize(z[i])))
cbax = fig.add_axes([0.85, 0.12, 0.05, 0.78])
cb = mpl.colorbar.ColorbarBase(cbax, cmap=cmap, norm=normalize, orientation='vertical')
cb.set_label("Sin function", rotation=270, labelpad=15)
plt.show()

Categories