numpy: get the indices within a polygon without creating a mask - python

I have a polygon vector formatted as follows (x1,y1,x2,y2, ...., xn,yn). As an example, consider this polygon array:
polyPoints = [3,5,7,8,9,5]
How do I get all the indices (or the coordinates) that are within the polygon generated from these points ?
The answers I looked so far requires you to create the 2D mask before you can get the indices within the polygon.

You can use scikit-image:
import numpy as np
from skimage.draw import polygon
points = [3,5,7,8,9,5]
r, c = polygon(points[1::2], points[::2])
print(r, c)
the output is:
[5 5 5 5 5 5 6 6 6 6 7 7] [3 4 5 6 7 8 5 6 7 8 6 7]

Using a mask is probably as efficient as you can get. This is some algorithm you which is rather inefficient, but probably can be optimized to be close to the mask approach. This essentially does a mask but on lines.
The approach is:
Find equations of lines of all edges
Find bounding box
For each y within bounding box (or x, whichever is smaller), compute the edges which intersect with the horizontal line (y=yi) at that y, and find at which x they intersect.
For each x within the bounding box, find which number of edges to the right of x which the line y=yi intersect. If the number of edges is odd, then the point (x,y) is inside the polygon.
It does work on a simple square geometry.
import numpy as np
# taken from: https://stackoverflow.com/questions/20677795/how-do-i-compute-the-intersection-point-of-two-lines-in-python
def line(p1, p2):
A = (p1[1] - p2[1])
B = (p2[0] - p1[0])
C = (p1[0]*p2[1] - p2[0]*p1[1])
return A, B, -C
def intersection(L1, L2):
D = L1[0] * L2[1] - L1[1] * L2[0]
Dx = L1[2] * L2[1] - L1[1] * L2[2]
Dy = L1[0] * L2[2] - L1[2] * L2[0]
if D != 0:
x = Dx / D
y = Dy / D
return x,y
else:
return False
# polyPoints = np.array([0, 0, 4, 0,4, 4, 0, 4])
polyPoints = np.array([[3,5,7,8,9,5]])
polyPoints = polyPoints.reshape(-1, 2)
npoints = polyPoints.shape[0]
polyEgdes = []
for i in range(npoints):
point1, point2 = polyPoints[i, :], polyPoints[(i+1) % npoints, :]
polyEgdes.append(line(point1, point2))
# bounding box
boundingBox = np.vstack((polyPoints.min(axis=0), polyPoints.max(axis=0)))
inside_points = []
for y in range(boundingBox[0, 1], boundingBox[1, 1]):
x_intersect = []
for l in polyEgdes:
# y_ins should be same as y
insect_point = intersection(l, [0, y, 0])
if insect_point:
x_intersect.append(insect_point[0])
x_intersect = np.array(x_intersect)
for x in range(boundingBox[0, 0]+1, boundingBox[1, 0]-1):
x_int_points = x_intersect[(x_intersect - x) >= 0]
if len(x_int_points) % 2 == 1:
inside_points.append((x, y))
print(inside_points)

Related

Python Binning using triangular bins

I'm trying to find a simple python module/package that has implemented 2D triangular bins so that it can be use in a similar fashion to scipy binned_statistic_dd. Is anyone aware of such a tool? I've searched but not found anything: the closest I've found is matplotlib's hexbin.
If I have to create a home-made solution, generating the vertex points for the triangular grid is easy, but how would you efficiently (need to avoid slow loops if possible as datasets are about 100K points) search which triangle a point lies in?
import matplotlib.pyplot as plt
import matplotlib.tri as tri
import numpy as np
def plot_triangular_bin_freq(x,y,Vx,Vy):
X, Y = np.meshgrid(x, y)
Ny, Nx = X.shape
iy,ix = np.indices((Ny-1, Nx-1))
# max vertice is supposed to be
# max(iy)*Nx + max(ix) + (Nx+1)
# = (Ny-2)*Nx + (Nx-2) + (Nx+1)
# = Ny * Nx - 1
assert iy.max() == Ny-2
assert ix.max() == Nx-2
# build square grid and split it in a lower-left, upper-right triangles
# and construct the triangulation
vertices = (((iy * Nx) + ix)[:,:,None] + np.array([0,1,Nx,Nx,Nx+1,1])[None,None,:]).reshape(-1, 3)
triangles = tri.Triangulation(X.flatten(), Y.flatten(), vertices)
# Normalized point coordinates
Vx = (np.asarray(Vx).flatten() - x[0]) * ((Nx-1) / (x[-1] - x[0]))
Vy = (np.asarray(Vy).flatten() - y[0]) * ((Ny-1) / (y[-1] - y[0]))
m = (0 <= Vx) & (Vx < Nx-1) & (0 <= Vy) & (Vy < Ny-1)
# get indices on the x,y boxes
Ix, Rx = divmod(Vx[m], 1)
Iy, Ry = divmod(Vy[m], 1)
# (Rx+Ry)=1 is the boundary between the two triangles
# w indicates the index of the triangle where the point lies on
w = ((Rx+Ry)>=1) + 2*(Ix + (Nx-1)*Iy)
assert max(Ix) < Nx-1
assert max(Iy) < Ny-1
assert max(Ix + Iy*(Nx-1)) < (Nx-1)*(Ny-1)
# z[i] is the number of points that lies inside z[i]
z = np.bincount(w.astype(np.int64), minlength=2*(Nx-1)*(Ny-1))
plt.tripcolor(triangles, z, shading='flat')
x = np.arange(15)/2.
y = np.arange(10)/2.
Vx = np.random.randn(1000) + 3
Vy = np.random.randn(1000) + 1
plot_triangular_bin_freq(x,y,Vx,Vy)

Find coordinates of points given their distances

I have to find in Python the coordinates of the points A, B, C, D given their distances and the gradient of the line L (the one that passes through the center), which is parallel to segments AD and BC and orthogonal to segments AB and CD.
That is the code I wrote:
import numpy as np
# Gradient of the known line
l_gradient = 0.17
l_angle = np.arctan(l_gradient)
# Length of the segments
ad_distance = 1
ab_distance = 2
# Gradient and Intercept of lines AB and DC with the y axes
ab_gradient = dc_gradient = -1 / l_gradient # orthogonal to L
dc_intercept = (ad_distance / 2) / np.sin(l_angle) # Inverse formula of the right triangle
ab_intercept = - dc_intercept
# Gradient and Intercept of lines AD and BC with the y axes
ad_gradient = bc_gradient = l_gradient # parallel to L
ad_intercept = (ab_distance / 2) / np.cos(l_angle) # Inverse formula of the right triangle
bc_intercept = - ad_intercept
I think the easiest way to do this is first assume the gradient is 0. Then we have our points:
ad_distance = 1
ab_distance = 2
points = np.array([
[-ad_distance / 2, +ab_distance / 2], # A
[-ad_distance / 2, -ab_distance / 2], # B
[+ad_distance / 2, -ab_distance / 2], # C
[+ad_distance / 2, +ab_distance / 2], # D
])
Note that at the bottom we have a triangle with sides (x, l_gradient x, sqrt(1 + l_gradient^2) x). And remember cos(angle) = adjacent / hypot.
Thus we have:
l_gradient = 0.17
l_cos = 1 / np.sqrt(1 + l_gradient**2)
l_sin = l_gradient * l_cos
Now we can use those to construct a rotation matrix, and rotate our points into the correct positions:
l_rot = np.array([[l_cos , -l_sin], [l_sin, l_cos]])
points = (l_rot # points.T).T
No trigonometry functions required!

How to vectorize a nested "for" loop with multiple "if" statements using Numpy?

I have a simple 2D ray-casting routine that gets terribly slow as soon as the number of obstacles increases.
This routine is made up of:
2 for loops (outer loop iterates over each ray/direction, then inner loop iterates over each line obstacle)
multiple if statements (check if a value is > or < than another value or if an array is empty)
Question: How can I condense all these operations into 1 single block of vectorized instructions using Numpy ?
More specifically, I am facing 2 issues:
I have managed to vectorize the inner loop (intersection between a ray and each obstacle) but I am unable to run this operation for all rays at once.
The only workaround I found to deal with the if statements is to use masked arrays. Something tells me it is not the proper way to handle these statements in this case (it seems clumsy, cumbersome and unpythonic)
Original code:
from math import radians, cos, sin
import matplotlib.pyplot as plt
import numpy as np
N = 10 # dimensions of canvas (NxN)
sides = np.array([[0, N, 0, 0], [0, N, N, N], [0, 0, 0, N], [N, N, 0, N]])
edges = np.random.rand(5, 4) * N # coordinates of 5 random segments (x1, x2, y1, y2)
edges = np.concatenate((edges, sides))
center = np.array([N/2, N/2]) # coordinates of center point
directions = np.array([(cos(radians(a)), sin(radians(a))) for a in range(0, 360, 10)]) # vectors pointing in all directions
intersections = []
# for each direction
for d in directions:
min_dist = float('inf')
# for each edge
for e in edges:
p1x, p1y = e[0], e[2]
p2x, p2y = e[1], e[3]
p3x, p3y = center
p4x, p4y = center + d
# find intersection point
den = (p1x - p2x) * (p3y - p4y) - (p1y - p2y) * (p3x - p4x)
if den:
t = ((p1x - p3x) * (p3y - p4y) - (p1y - p3y) * (p3x - p4x)) / den
u = -((p1x - p2x) * (p1y - p3y) - (p1y - p2y) * (p1x - p3x)) / den
# if any:
if t > 0 and t < 1 and u > 0:
sx = p1x + t * (p2x - p1x)
sy = p1y + t * (p2y - p1y)
isec = np.array([sx, sy])
dist = np.linalg.norm(isec-center)
# make sure to select the nearest one (from center)
if dist < min_dist:
min_dist = dist
nearest = isec
# store nearest interesection point for each ray
intersections.append(nearest)
# Render
plt.axis('off')
for x, y in zip(edges[:,:2], edges[:,2:]):
plt.plot(x, y)
for isec in np.array(intersections):
plt.plot((center[0], isec[0]), (center[1], isec[1]), '--', color="#aaaaaa", linewidth=.8)
Vectorized version (attempt):
from math import radians, cos, sin
import matplotlib.pyplot as plt
from scipy import spatial
import numpy as np
N = 10 # dimensions of canvas (NxN)
sides = np.array([[0, N, 0, 0], [0, N, N, N], [0, 0, 0, N], [N, N, 0, N]])
edges = np.random.rand(5, 4) * N # coordinates of 5 random segments (x1, x2, y1, y2)
edges = np.concatenate((edges, sides))
center = np.array([N/2, N/2]) # coordinates of center point
directions = np.array([(cos(radians(a)), sin(radians(a))) for a in range(0, 360, 10)]) # vectors pointing in all directions
intersections = []
# Render edges
plt.axis('off')
for x, y in zip(edges[:,:2], edges[:,2:]):
plt.plot(x, y)
# for each direction
for d in directions:
p1x, p1y = edges[:,0], edges[:,2]
p2x, p2y = edges[:,1], edges[:,3]
p3x, p3y = center
p4x, p4y = center + d
# denominator
den = (p1x - p2x) * (p3y - p4y) - (p1y - p2y) * (p3x - p4x)
# first 'if' statement -> if den > 0
mask = den > 0
den = den[mask]
p1x = p1x[mask]
p1y = p1y[mask]
p2x = p2x[mask]
p2y = p2y[mask]
t = ((p1x - p3x) * (p3y - p4y) - (p1y - p3y) * (p3x - p4x)) / den
u = -((p1x - p2x) * (p1y - p3y) - (p1y - p2y) * (p1x - p3x)) / den
# second 'if' statement -> if (t>0) & (t<1) & (u>0)
mask2 = (t > 0) & (t < 1) & (u > 0)
t = t[mask2]
p1x = p1x[mask2]
p1y = p1y[mask2]
p2x = p2x[mask2]
p2y = p2y[mask2]
# x, y coordinates of all intersection points in the current direction
sx = p1x + t * (p2x - p1x)
sy = p1y + t * (p2y - p1y)
pts = np.c_[sx, sy]
# if any:
if pts.size > 0:
# find nearest intersection point
tree = spatial.KDTree(pts)
nearest = pts[tree.query(center)[1]]
# Render
plt.plot((center[0], nearest[0]), (center[1], nearest[1]), '--', color="#aaaaaa", linewidth=.8)
Reformulation of the problem – Finding the intersection between a line segment and a line ray
Let q and q2 be the endpoints of a segment (obstacle). For convenience let's define a class to represent points and vectors in the plane. In addition to the usual operations, a vector multiplication is defined by u × v = u.x * v.y - u.y * v.x.
Caution: here Coord(2, 1) * 3 returns Coord(6, 3) while Coord(2, 1) * Coord(-1, 4) outputs 9. To avoid this confusion it might have been possible to restrict * to the scalar multiplication and use ^ via __xor__ for the vector multiplication.
class Coord:
def __init__(self, x, y):
self.x = x
self.y = y
#property
def radius(self):
return np.sqrt(self.x ** 2 + self.y ** 2)
def _cross_product(self, other):
assert isinstance(other, Coord)
return self.x * other.y - self.y * other.x
def __mul__(self, other):
if isinstance(other, Coord):
# 2D "cross"-product
return self._cross_product(other)
elif isinstance(other, int) or isinstance(other, float):
# scalar multiplication
return Coord(self.x * other, self.y * other)
def __rmul__(self, other):
return self * other
def __sub__(self, other):
return Coord(self.x - other.x, self.y - other.y)
def __add__(self, other):
return Coord(self.x + other.x, self.y + other.y)
def __repr__(self):
return f"Coord({self.x}, {self.y})"
Now, I find it easier to handle a ray in polar coordinates: For a given angle theta (direction) the goal is to determine if it intersects the segment, and if so determine the corresponding radius. Here is a function to find that. See here for an explanation of why and how. I tried to use the same variable names as in the previous link.
def find_intersect_btw_ray_and_sgmt(q, q2, theta):
"""
Args:
q (Coord): first endpoint of the segment
q2 (Coord): second endpoint of the segment
theta (float): angle of the ray
Returns:
(float): np.inf if the ray does not intersect the segment,
the distance from the origin of the intersection otherwise
"""
assert isinstance(q, Coord) and isinstance(q2, Coord)
s = q2 - q
r = Coord(np.cos(theta), np.sin(theta))
cross = r * s # 2d cross-product
t_num = q * s
u_num = q * r
## the intersection point is roughly at a distance t_num / cross
## from the origin. But some cases must be checked beforehand.
## (1) the segment [PQ2] is aligned with the ray
if np.isclose(cross, 0) and np.isclose(u_num, 0):
return min(q.radius, q2.radius)
## (2) the segment [PQ2] is parallel with the ray
elif np.isclose(cross, 0):
return np.inf
t, u = t_num / cross, u_num / cross
## There is actually an intersection point
if t >= 0 and 0 <= u <= 1:
return t
## (3) No intersection point
return np.inf
For instance find_intersect_btw_ray_and_sgmt(Coord(1, 2), Coord(-1, 2), np.pi / 2) should returns 2.
Note that here for simplicity, I only considered the case where the origin of the rays is at Coord(0, 0). This can be easily extended to the general case by setting t_num = (q - origin) * s and u_num = (q - origin) * r.
Let's vectorize it!
What is very interesting here is that the operations defined in the Coord class also apply to cases where x and y are numpy arrays! Hence applying any defined operation on Coord(np.array([1, 2, 0]), np.array([2, -1, 3])) amounts applying it elementwise to the points (1, 2), (2, -1) and (0, 3). The operations of Coord are therefore already vectorized. The constructor can be modified into:
def __init__(self, x, y):
x, y = np.array(x), np.array(y)
assert x.shape == y.shape
self.x, self.y = x, y
self.shape = x.shape
Now, we would like the function find_intersect_btw_ray_and_sgmt to be able to handle the case where the parameters q and q2contains sequences of endpoints. Before the sanity checks, all the operations are working properly since, as we have mentioned, they are already vectorized. As you mentionned the conditional statements can be "vectorized" using masks. Here is what I propose:
def find_intersect_btw_ray_and_sgmts(q, q2, theta):
assert isinstance(q, Coord) and isinstance(q2, Coord)
assert q.shape == q2.shape
EPS = 1e-14
s = q2 - q
r = Coord(np.cos(theta), np.sin(theta))
cross = r * s
cross_sign = np.sign(cross)
cross = cross * cross_sign
t_num = (q * s) * cross_sign
u_num = (q * r) * cross_sign
radii = np.zeros_like(t_num)
mask = ~np.isclose(cross, 0) & (t_num >= -EPS) & (-EPS <= u_num) & (u_num <= cross + EPS)
radii[~mask] = np.inf # no intersection
radii[mask] = t_num[mask] / cross[mask] # intersection
return radii
Note that cross, t_num and u_num are multiplied by the sign of cross to ensure that the division by cross keeps the sign of the dividends. Hence conditions of the form ((t_num >= 0) & (cross >= 0)) | ((t_num <= 0) & (cross <= 0)) can be replaced by (t_num >= 0).
For simplicity, we omitted the case (1) where the radius and the segment were aligned ((cross == 0) & (u_num == 0)). This could be incorporated by carefully adding a second mask.
For a given value of theta, we are able to determine if the corresponing ray intersects with several segments at once.
## Some useful functions
def polar_to_cartesian(r, theta):
return Coord(r * np.cos(theta), r * np.sin(theta))
def plot_segments(p, q, *args, **kwargs):
plt.plot([p.x, q.x], [p.y, q.y], *args, **kwargs)
def plot_rays(radii, thetas, *args, **kwargs):
endpoints = polar_to_cartesian(radii, thetas)
n = endpoints.shape
origin = Coord(np.zeros(n), np.zeros(n))
plot_segments(origin, endpoints, *args, **kwargs)
## Data generation
M = 5 # size of the canvas
N = 10 # number of segments
K = 16 # number of rays
q = Coord(*np.random.uniform(-M/2, M/2, size=(2, N)))
p = q + Coord(*np.random.uniform(-M/2, M/2, size=(2, N)))
thetas = np.linspace(0, 2 * np.pi, K, endpoint=False)
## For each ray, find the minimal distance of intersection
## with all segments
plt.figure(figsize=(5, 5))
plot_segments(p, q, "royalblue", marker=".")
for theta in thetas:
radii = find_intersect_btw_ray_and_sgmts(p, q, theta)
radius = np.min(radii)
if not np.isinf(radius):
plot_rays(radius, theta, color="orange")
else:
plot_rays(2*M, theta, ':', c='orange')
plt.plot(0, 0, 'kx')
plt.xlim(-M, M)
plt.ylim(-M, M)
And that's not all! Thanks to the broadcasting of python, it is possible to avoid iteration on theta values. For example, recall that np.array([1, 2, 3]) * np.array([[1], [2], [3], [4]]) produces a matrix of size 4 × 3 of the pairwise products. In the same way Coord([[5],[7]], [[5],[1]]) * Coord([2, 4, 6], [-2, 4, 0]) outputs a 2 × 3 matrix containing all the pairwise cross product between vectors (5, 5), (7, 1) and (2, -2), (4, 4), (6, 0).
Finally, the intersections can be determined in the following way:
radii_all = find_intersect_btw_ray_and_sgmts(p, q, np.vstack(thetas))
# p and q have a shape of (N,) and np.vstack(thetas) of (K, 1)
# this radii_all have a shape of (K, N)
# radii_all[k, n] contains the distance from the origin of the intersection
# between k-th ray and n-th segment (or np.inf if there is no intersection point)
radii = np.min(radii_all, axis=1)
# radii[k] contains the distance from the origin of the closest intersection
# between k-th ray and all segments
do_intersect = ~np.isinf(radii)
plot_rays(radii[do_intersect], thetas[do_intersect], color="orange")
plot_rays(2*M, thetas[~do_intersect], ":", color="orange")

Find how many random points lie inside ellipse centered at a point

The below code generates set of random x,y coordinates and uses the equation of an ellipse to compare how many of those points lie inside ellipse centered at (1,1) and a rectangle of area 2a*2b constructed around the ellipse whose semimajor and semiminor axis are a and b but b is variable and takes a value from the list b every single time. I want to have all the values of b for which the ratio of all the points lying inside the ellipse to the points lying inside the rectangle is greater than 0.5.
The problem I'm facing is If I check for a single value of b = 0.63. the condition ellipse_points/rectangle_points is approximately equal to 0.5 but when I loop throught the list b and use the If statement to get all the points for which ellipse_points/rectangle_points > 0.5, I do not see any value close to 0.63 instead I see values from 1.2 till 1.9, I do not understand why when I loop through a list of values for b the if statement seems to give faulty values. please refer to the next set of code where I set value of b = 0.63 and find ratio ellipse_points/rectangle_points
import numpy as np
x = np.random.uniform(0, 2, 10000) #generates random x coordinates
y = np.random.uniform(0, 2, 10000) #generates random y coordinates
ellipse_points, rectangle_points = 0, 0
a = 1
b = []
for i in range(1, 200):
b.append(i/100)
#print(b)
for p in b:
for i, j in zip(x, y):
if (((i - 1) ** 2) / a ** 2 + ((j - 1) ** 2) / p ** 2) < 1:
ellipse_points += 1
rectangle_points += 1
if ellipse_points/rectangle_point > 0.5:
print(p)
OUTPUT: 1.2, 1.21.............1.9
#
x = np.random.uniform(0, 2, 10000) #generates random x coordinates
y = np.random.uniform(0, 2, 10000) #generates random y coordinates
ellipse_points, rectangle_points = 0, 0
a = 1
b = 0.63
for i, j in zip(x, y):
if (((i - 1) ** 2) / a ** 2 + ((j - 1) ** 2) / b ** 2) < 1:
ellipse_points += 1
rectangle_points += 1
print(ellipse_points/rectangle_points)
OUTPUT 0.5001
If I understood your problem correctly, here's a vectorized solution.
It creates a binary mask for points inside the ellipse, counts where the mask is True and divides it by the total number of points.
# np.random.seed(42)
N = 10000
x = np.random.uniform(0, 2, N) #generates random x coordinates
y = np.random.uniform(0, 2, N) #generates random y coordinates
a = 1
b = 0.63
ratio = ((((x - 1)/a)**2 + ((y - 1)/b)**2) < 1).sum()/N
>>> print(ratio)
0.4954

Procrustes Analysis with NumPy?

Is there something like Matlab's procrustes function in NumPy/SciPy or related libraries?
For reference. Procrustes analysis aims to align 2 sets of points (in other words, 2 shapes) to minimize square distance between them by removing scale, translation and rotation warp components.
Example in Matlab:
X = [0 1; 2 3; 4 5; 6 7; 8 9]; % first shape
R = [1 2; 2 1]; % rotation matrix
t = [3 5]; % translation vector
Y = X * R + repmat(t, 5, 1); % warped shape, no scale and no distortion
[d Z] = procrustes(X, Y); % Z is Y aligned back to X
Z
Z =
0.0000 1.0000
2.0000 3.0000
4.0000 5.0000
6.0000 7.0000
8.0000 9.0000
Same task in NumPy:
X = arange(10).reshape((5, 2))
R = array([[1, 2], [2, 1]])
t = array([3, 5])
Y = dot(X, R) + t
Z = ???
Note: I'm only interested in aligned shape, since square error (variable d in Matlab code) is easily computed from 2 shapes.
I'm not aware of any pre-existing implementation in Python, but it's easy to take a look at the MATLAB code using edit procrustes.m and port it to Numpy:
def procrustes(X, Y, scaling=True, reflection='best'):
"""
A port of MATLAB's `procrustes` function to Numpy.
Procrustes analysis determines a linear transformation (translation,
reflection, orthogonal rotation and scaling) of the points in Y to best
conform them to the points in matrix X, using the sum of squared errors
as the goodness of fit criterion.
d, Z, [tform] = procrustes(X, Y)
Inputs:
------------
X, Y
matrices of target and input coordinates. they must have equal
numbers of points (rows), but Y may have fewer dimensions
(columns) than X.
scaling
if False, the scaling component of the transformation is forced
to 1
reflection
if 'best' (default), the transformation solution may or may not
include a reflection component, depending on which fits the data
best. setting reflection to True or False forces a solution with
reflection or no reflection respectively.
Outputs
------------
d
the residual sum of squared errors, normalized according to a
measure of the scale of X, ((X - X.mean(0))**2).sum()
Z
the matrix of transformed Y-values
tform
a dict specifying the rotation, translation and scaling that
maps X --> Y
"""
n,m = X.shape
ny,my = Y.shape
muX = X.mean(0)
muY = Y.mean(0)
X0 = X - muX
Y0 = Y - muY
ssX = (X0**2.).sum()
ssY = (Y0**2.).sum()
# centred Frobenius norm
normX = np.sqrt(ssX)
normY = np.sqrt(ssY)
# scale to equal (unit) norm
X0 /= normX
Y0 /= normY
if my < m:
Y0 = np.concatenate((Y0, np.zeros(n, m-my)),0)
# optimum rotation matrix of Y
A = np.dot(X0.T, Y0)
U,s,Vt = np.linalg.svd(A,full_matrices=False)
V = Vt.T
T = np.dot(V, U.T)
if reflection != 'best':
# does the current solution use a reflection?
have_reflection = np.linalg.det(T) < 0
# if that's not what was specified, force another reflection
if reflection != have_reflection:
V[:,-1] *= -1
s[-1] *= -1
T = np.dot(V, U.T)
traceTA = s.sum()
if scaling:
# optimum scaling of Y
b = traceTA * normX / normY
# standarised distance between X and b*Y*T + c
d = 1 - traceTA**2
# transformed coords
Z = normX*traceTA*np.dot(Y0, T) + muX
else:
b = 1
d = 1 + ssY/ssX - 2 * traceTA * normY / normX
Z = normY*np.dot(Y0, T) + muX
# transformation matrix
if my < m:
T = T[:my,:]
c = muX - b*np.dot(muY, T)
#transformation values
tform = {'rotation':T, 'scale':b, 'translation':c}
return d, Z, tform
There is a Scipy function for it: scipy.spatial.procrustes
I'm just posting its example here:
>>> import numpy as np
>>> from scipy.spatial import procrustes
>>> a = np.array([[1, 3], [1, 2], [1, 1], [2, 1]], 'd')
>>> b = np.array([[4, -2], [4, -4], [4, -6], [2, -6]], 'd')
>>> mtx1, mtx2, disparity = procrustes(a, b)
>>> round(disparity)
0.0
You can have both Ordinary Procrustes Analysis and Generalized Procrustes Analysis in python with something like this:
import numpy as np
def opa(a, b):
aT = a.mean(0)
bT = b.mean(0)
A = a - aT
B = b - bT
aS = np.sum(A * A)**.5
bS = np.sum(B * B)**.5
A /= aS
B /= bS
U, _, V = np.linalg.svd(np.dot(B.T, A))
aR = np.dot(U, V)
if np.linalg.det(aR) < 0:
V[1] *= -1
aR = np.dot(U, V)
aS = aS / bS
aT-= (bT.dot(aR) * aS)
aD = (np.sum((A - B.dot(aR))**2) / len(a))**.5
return aR, aS, aT, aD
def gpa(v, n=-1):
if n < 0:
p = avg(v)
else:
p = v[n]
l = len(v)
r, s, t, d = np.ndarray((4, l), object)
for i in range(l):
r[i], s[i], t[i], d[i] = opa(p, v[i])
return r, s, t, d
def avg(v):
v_= np.copy(v)
l = len(v_)
R, S, T = [list(np.zeros(l)) for _ in range(3)]
for i, j in np.ndindex(l, l):
r, s, t, _ = opa(v_[i], v_[j])
R[j] += np.arccos(min(1, max(-1, np.trace(r[:1])))) * np.sign(r[1][0])
S[j] += s
T[j] += t
for i in range(l):
a = R[i] / l
r = [np.cos(a), -np.sin(a)], [np.sin(a), np.cos(a)]
v_[i] = v_[i].dot(r) * (S[i] / l) + (T[i] / l)
return v_.mean(0)
For testing purposes, the output of each algorithm can be visualized as follows:
import matplotlib.pyplot as p; p.rcParams['toolbar'] = 'None';
def plt(o, e, b):
p.figure(figsize=(10, 10), dpi=72, facecolor='w').add_axes([0.05, 0.05, 0.9, 0.9], aspect='equal')
p.plot(0, 0, marker='x', mew=1, ms=10, c='g', zorder=2, clip_on=False)
p.gcf().canvas.set_window_title('%f' % e)
x = np.ravel(o[0].T[0])
y = np.ravel(o[0].T[1])
p.xlim(min(x), max(x))
p.ylim(min(y), max(y))
a = []
for i, j in np.ndindex(len(o), 2):
a.append(o[i].T[j])
O = p.plot(*a, marker='x', mew=1, ms=10, lw=.25, c='b', zorder=0, clip_on=False)
O[0].set(c='r', zorder=1)
if not b:
O[2].set_color('b')
O[2].set_alpha(0.4)
p.axis('off')
p.show()
# Fly wings example (Klingenberg, 2015 | https://en.wikipedia.org/wiki/Procrustes_analysis)
arr1 = np.array([[588.0, 443.0], [178.0, 443.0], [56.0, 436.0], [50.0, 376.0], [129.0, 360.0], [15.0, 342.0], [92.0, 293.0], [79.0, 269.0], [276.0, 295.0], [281.0, 331.0], [785.0, 260.0], [754.0, 174.0], [405.0, 233.0], [386.0, 167.0], [466.0, 59.0]])
arr2 = np.array([[477.0, 557.0], [130.129, 374.307], [52.0, 334.0], [67.662, 306.953], [111.916, 323.0], [55.119, 275.854], [107.935, 277.723], [101.899, 259.73], [175.0, 329.0], [171.0, 345.0], [589.0, 527.0], [591.0, 468.0], [299.0, 363.0], [306.0, 317.0], [406.0, 288.0]])
def opa_out(a):
r, s, t, d = opa(a[0], a[1])
a[1] = a[1].dot(r) * s + t
return a, d, False
plt(*opa_out([arr1, arr2, np.matrix.copy(arr2)]))
def gpa_out(a):
g = gpa(a, -1)
D = [avg(a)]
for i in range(len(a)):
D.append(a[i].dot(g[0][i]) * g[1][i] + g[2][i])
return D, sum(g[3])/len(a), True
plt(*gpa_out([arr1, arr2]))
Probably you want to try this package with various flavors of different Procrustes methods, https://github.com/theochem/procrustes.

Categories