Difficulties in understanding Python code that computes transformed mesh - python

There's some code that I have basically translated from its original version in MATLAB to Python but there is a part that I don't understand. I have one triangle mesh that I want to transform. For this, the transformations for each triangle face is computed. After that, the edges of the transformed triangle are specified. However, what happens then is unclear to me. If you have any mathematical or geometric background to share concerning the following steps, I would appreciate it very much!
weight = np.ones(1)
fixvertex = 1
fixto = np.zeros((3))
M = scipy.sparse.lil_matrix((len(template) + fixvertex, len(template)))
dx = scipy.sparse.lil_matrix((len(template) + fixvertex, 3))
for i in range(len(faces)):
v = faces[i, :]
### ... what happens inbetween is irrelevant
# Transform triangle
x = T # np.array([template[v[0], :], template[v[1], :], template[v[2], :]]).reshape(3, 3).T
p12 = x[:, 0] - x[:, 1]
p13 = x[:, 0] - x[:, 2]
p23 = x[:, 1] - x[:, 2]
p12 = p12 / np.linalg.norm(p12)
p13 = p13 / np.linalg.norm(p13)
p23 = p23 / np.linalg.norm(p23)
# I do not get the point of the following lines
wts = [(p13.T).dot(p23), -(p12.T).dot(p23), (p12.T).dot(p13)]
wij3 = [[0, wts[0] / np.sqrt(1 - wts[0] ** 2), wts[1] / np.sqrt(1 - wts[1] ** 2)],
[0, 0, wts[2] / np.sqrt(1 - wts[2] ** 2)],
[0, 0, 0]]
wij3 = np.asarray(wij3)
wij3 = wij3 + wij3.T
WIJ = wij3 - np.diag([sum(x) for x in zip(*wij3)])
M[np.ix_(v, v)] = M[np.ix_(v, v)] + WIJ
dx[v, :] = dx[v, :] + (WIJ # x.T)
weight = np.ones((fixvertex)) * weight
for i in range(fixvertex):
M[len(template) + i, fixvertex - 1] = weight[i]
dx[len(template):len(template) + fixvertex, :] = np.multiply(fixto, np.tile(weight, (3)))
M = np.real(M)
dx = np.real(dx)
Mt = M.T
model = scipy.sparse.linalg.spsolve(Mt # M, Mt # dx)

Related

Python implementation of Francis double step QR iteration algorithm does not converge

I am implementing the Francis double step QR Iteration algorithm using the notes and psuedocode from lecture https://people.inf.ethz.ch/arbenz/ewp/Lnotes/chapter4.pdf - Algorithm 4.5
The psuedocode is provided in Matlab I believe.
Below is the implementation of my code.
# compute upper hessenberg form of matrix
def hessenberg(A):
m,n = A.shape
H = A.astype(np.float64)
for k in range(n-2):
x = H[k+1:, k]
v = np.concatenate([np.array([np.sign(x[0]) * np.linalg.norm(x)]), x[1:]])
v = v / np.linalg.norm(v)
H[k+1:, k:] -= 2 * np.outer(v, np.dot(v, H[k+1:, k:]))
H[:, k+1:] -= 2 * np.outer(np.dot(H[:, k+1:], v), v)
return(H)
# compute first three elements of M
def first_three_M(T,s,t):
x = T[0, 0]**2 + T[0, 1] * T[1, 0] - s * T[0, 0] + t
y = T[1, 0] * (T[0, 0] + T[1, 1] - s)
z = T[1, 0] * T[2, 1]
return(x,y,z)
# householder reflection
def householder_reflection_step(x_1):
v = x_1[0] + np.sign(x_1[0]) * np.linalg.norm(x_1)
v = v / np.linalg.norm(v)
P = np.eye(3) - 2 * np.outer(v, v)
return(P)
# update elements of M
def update_M(T,k,p):
x = T[k+1, k]
y = T[k+2, k]
if k < p - 3:
z = T[k+3, k]
else:
z = 0
return(x,y,z)
# givens rotation
def givens_step(T,x_2,x,y,p,q,n):
# calculate c and s
c = x / np.sqrt(x**2 + y**2)
s = -y / np.sqrt(x**2 + y**2)
P = np.array([[c, s], [-s, c]])
T[q-1:p, p-3:n] = P.T # T[q-1:p, p-3:n]
T[0:p, p-2:p] = T[0:p, p-2:p] # P
return(T)
# deflation step
def deflation_step(T,p,q,epsilon):
if abs(T[p-1, p-2]) < epsilon * (abs(T[p-2, p-2]) + abs(T[p-1, p-1])):
T[p-1, p-2] = 0
p = p - 1
q = p - 1
elif abs(T[p-2, p-3]) < epsilon * (abs(T[p-3, p-3]) + abs(T[p-2, p-2])):
T[p-2, p-3] = 0
p = p - 2
q = p - 1
return(T,p,q)
# francis qr step
def francis_step(H, epsilon=0.90):
n = H.shape[0]
T = H.copy().astype(np.float64)
p = n - 1
while p > 2:
q = p - 1
s = T[q, q] + T[p, p]
t = T[q, q] * T[p, p] - T[q, p] * T[p, q]
# Compute M
x,y,z = first_three_M(T,s,t)
x_1 = np.transpose([[x], [y], [z]])
# Bulge chasing
for k in range(p - 3):
# Compute Householder reflector
P = householder_reflection_step(x_1)
r = max(1, k-1)
T[k:k+3, r:] = P.T # T[k:k+3, r:]
r = min(k + 3, p)
T[0:r, k:k+3] = T[0:r, k:k+3] # P
# Update M
x,y,z = update_M(T,k,p)
x_2 = np.transpose([[x], [y]])
# Compute Givens rotation
T = givens_step(T,x_2,x,y,p,q,n)
# Check for convergence
T,p,q = deflation_step(T,p,q,epsilon)
return(T)
# francis qr iteration
def francis_qr_iteration(A):
m,n = A.shape
H = hessenberg(A)
eigvals = []
iters = 0
max_iters = 100
while iters<max_iters:
# Perform Francis step
T = francis_step(H)
eigvals.append(np.diag(T))
iters+=1
return(eigvals)
# for quick testing
A = np.array([[2, 2, 3, 4, 2],
[1, 2, 4, 2, 3],
[4, 1, 2, 1, 5],
[5, 2, 5, 2, 1],
[3, 6, 3, 1, 4]])
eigenvals = francis_qr_iteration(A)
#comparing our method to scipy - final eigvals obtained
print(len(eigenvals))
print(sorted(eigenvals[-1]))
print(sorted(scipy.linalg.eig(A)[0].real))
And this is the output I am getting.
100
[-4.421235127393854, -0.909209110641351, -0.8342390091346807, 3.7552499102751575, 8.215454029003958]
[-3.0411228516834217, -1.143605409373778, -1.143605409373778, 3.325396565009845, 14.002937105421134]
The matrix T is not changing and hence it does not converge to the Schur form through which I can obtain the eigenvalues by using np.diag(T). I believe the error is coming either from the Givens rotation step or the Householder reflection step. It could be an indexing issue since I tried to work in python using matlab psuedocode. Please let me know where I am going wrong so I can improve the code and make it converge.

Index out of bounds Error while implementing Nelder-Mead algorithm

I am trying to minimize a function using the Nelder-Mead method in Python.
Here is the function I am trying to minimize
I am getting the following error:
Here is my code:
import numpy as np
def f(x):
return np.sum(x**2/2)
def nelder_mead(f, x0, args=(), iters=1000, tol=1e-6):
n = len(x0)
x0 = np.asarray(x0)
x1 = np.zeros((n, n+1))
x1[:, 0] = x0
f1 = np.zeros(n+1)
f1[0] = f(x0)
for i in range(iters):
x_bar = np.mean(x1[:, :n], axis=1)
xr = 2*x_bar - x1[:, n]
fr = f(xr)
if fr < f1[n]:
xe = 2*xr - x_bar
fe = f(xe)
if fe < fr:
x1[:, n+1] = xe
f1[n+1] = fe
else:
x1[:, n+1] = xr
f1[n+1] = fr
else:
xc = (x_bar + x1[:, n])/2
fc = f(xc)
if fc < f1[n]:
x1[:, n+1] = xc
f1[n+1] = fc
x1[:, n] = xr
f1[n] = fr
else:
x1[:, 1:] = x1[:, :n]
f1[1:] = f1[:n]
x1[:, 0] = (x1[:, n] + x1[:, n+1])/2
f1[0] = f(x1[:, 0])
"x1[:, n+1] = x1[:, n]"
"f1[n+1] = f1[n]"
"x1[:, n] = (x1[:, 0] + x1[:, n+1])/2"
"f1[n] = f(x1[:, n])"
if np.abs(f1[n] - f1[n+1]) < tol:
return x1[:, n], f1[n]
return x1[:, n], f1[n]
x0 = np.random.rand(2)*4 - 2
print(nelder_mead(f, x0))
I am unsure how to change the code. I believe the error is coming up because I think I am identifying index 2, which is the third position. Somehow I am identifying the third index.

How to fix error when inversing image maps in OpenCV

I am trying to inverse a set of maps using this answer here. I used two of his methods so there is more detail as to how they work in his answer. I also left some comments out to shorten the code.
I have my own camera matrix and distortion coeff's that I use to create an x and y map with cv2.initUndistortRectifyMap(), but when I pass them to invert_maps() I get an out-of-bounds error shown below.
None of this (except the bottom part) is my code and its pretty advanced stuff so I have no clue how to debug it. And I dont have enough credit to comment on the orignal answer. Anyone got a solution?
import numpy as np
import cv2 as cv2
from scipy import ndimage as ndi
from matplotlib import pyplot as plt
import glob
def bilinear_inverse(p, vertices, numiter=4):
p = np.asarray(p)
v = np.asarray(vertices)
sh = p.shape[1:]
if v.ndim == 2:
v = np.expand_dims(v, axis=tuple(range(2, 2 + len(sh))))
# Start in the center
s = .5 * np.ones((2,) + sh)
s0, s1 = s
for k in range(numiter):
# Residual
r = v[0] * (1 - s0) * (1 - s1) + v[1] * s0 * (1 - s1) + v[2] * s0 * s1 + v[3] * (1 - s0) * s1 - p
# Jacobian
J11 = -v[0, 0] * (1 - s1) + v[1, 0] * (1 - s1) + v[2, 0] * s1 - v[3, 0] * s1
J21 = -v[0, 1] * (1 - s1) + v[1, 1] * (1 - s1) + v[2, 1] * s1 - v[3, 1] * s1
J12 = -v[0, 0] * (1 - s0) - v[1, 0] * s0 + v[2, 0] * s0 + v[3, 0] * (1 - s0)
J22 = -v[0, 1] * (1 - s0) - v[1, 1] * s0 + v[2, 1] * s0 + v[3, 1] * (1 - s0)
inv_detJ = 1. / (J11 * J22 - J12 * J21)
s0 -= inv_detJ * (J22 * r[0] - J12 * r[1])
s1 -= inv_detJ * (-J21 * r[0] + J11 * r[1])
return s
def invert_map(xmap, ymap, diagnostics=False):
"""
Generate the inverse of deformation map defined by (xmap, ymap) using inverse bilinear interpolation.
"""
# Generate quadrilaterals from mapped grid points.
quads = np.array([[ymap[:-1, :-1], xmap[:-1, :-1]],
[ymap[1:, :-1], xmap[1:, :-1]],
[ymap[1:, 1:], xmap[1:, 1:]],
[ymap[:-1, 1:], xmap[:-1, 1:]]])
# Range of indices possibly within each quadrilateral
x0 = np.floor(quads[:, 1, ...].min(axis=0)).astype(int)
x1 = np.ceil(quads[:, 1, ...].max(axis=0)).astype(int)
y0 = np.floor(quads[:, 0, ...].min(axis=0)).astype(int)
y1 = np.ceil(quads[:, 0, ...].max(axis=0)).astype(int)
# Quad indices
i0, j0 = np.indices(x0.shape)
# Offset of destination map
x0_offset = x0.min()
y0_offset = y0.min()
# Index range in x and y (per quad)
xN = x1 - x0 + 1
yN = y1 - y0 + 1
# Shape of destination array
sh_dest = (1 + x1.max() - x0_offset, 1 + y1.max() - y0_offset)
# Coordinates of destination array
yy_dest, xx_dest = np.indices(sh_dest)
xmap1 = np.zeros(sh_dest)
ymap1 = np.zeros(sh_dest)
TN = np.zeros(sh_dest, dtype=int)
# Smallish number to avoid missing point lying on edges
epsilon = .01
# Loop through indices possibly within quads
for ix in range(xN.max()):
for iy in range(yN.max()):
# Work only with quads whose bounding box contain indices
valid = (xN > ix) * (yN > iy)
# Local points to check
p = np.array([y0[valid] + ix, x0[valid] + iy])
# Map the position of the point in the quad
s = bilinear_inverse(p, quads[:, :, valid])
# s out of unit square means p out of quad
# Keep some epsilon around to avoid missing edges
in_quad = np.all((s > -epsilon) * (s < (1 + epsilon)), axis=0)
# Add found indices
ii = p[0, in_quad] - y0_offset
jj = p[1, in_quad] - x0_offset
ymap1[ii, jj] += i0[valid][in_quad] + s[0][in_quad]
xmap1[ii, jj] += j0[valid][in_quad] + s[1][in_quad]
# Increment count
TN[ii, jj] += 1
ymap1 /= TN + (TN == 0)
xmap1 /= TN + (TN == 0)
if diagnostics:
diag = {'x_offset': x0_offset,
'y_offset': y0_offset,
'mask': TN > 0}
return xmap1, ymap1, diag
else:
return xmap1, ymap1
# cam matrix and dist coeff's that I brought
cam_matrix = np.array([ [1223.07784, 0, 926.80065],
[ 0, 1231.71291, 546.10496],
[ 0, 0, 1]], dtype='float32')
distortion_profile = np.array([-0.32077, 0.15041, 0.001004, 0.00028, -0.04252], dtype='float32')
# get current maps
mapx, mapy = cv2.initUndistortRectifyMap(cam_matrix, distortion, None, cam_matrix, (1920, 1080), 5)
# invert the maps
mapx_invert, mapy_invert = invert_map(mapx, mapy)
# apply mapping to image
inversed = cv2.remap(img, mapx_invert, mapy_invert ,cv2.INTER_LINEAR)
cv2.imwrite('inversed.png', inversed)
Error:
File "c:\Users\...\redist_image2.py", line 121, in invert_map
ymap1[ii, jj] += i0[valid][in_quad] + s[0][in_quad]
IndexError: index 1382 is out of bounds for axis 1 with size 1020

Get the area for a specific point's corresponding region in a Voronoi diagram

Using this answer, I can create a bounded Voronoi diagram (credit to #Flabetvibes for this code):
import matplotlib.pyplot as plt
import numpy as np
import scipy as sp
import scipy.spatial
import sys
eps = sys.float_info.epsilon
def in_box(towers, bounding_box):
return np.logical_and(np.logical_and(bounding_box[0] <= towers[:, 0],
towers[:, 0] <= bounding_box[1]),
np.logical_and(bounding_box[2] <= towers[:, 1],
towers[:, 1] <= bounding_box[3]))
def voronoi(towers, bounding_box):
# Select towers inside the bounding box
i = in_box(towers, bounding_box)
# Mirror points
points_center = towers[i, :]
points_left = np.copy(points_center)
points_left[:, 0] = bounding_box[0] - (points_left[:, 0] - bounding_box[0])
points_right = np.copy(points_center)
points_right[:, 0] = bounding_box[1] + (bounding_box[1] - points_right[:, 0])
points_down = np.copy(points_center)
points_down[:, 1] = bounding_box[2] - (points_down[:, 1] - bounding_box[2])
points_up = np.copy(points_center)
points_up[:, 1] = bounding_box[3] + (bounding_box[3] - points_up[:, 1])
points = np.append(points_center,
np.append(np.append(points_left,
points_right,
axis=0),
np.append(points_down,
points_up,
axis=0),
axis=0),
axis=0)
# Compute Voronoi
vor = sp.spatial.Voronoi(points)
# Filter regions
regions = []
for region in vor.regions:
flag = True
for index in region:
if index == -1:
flag = False
break
else:
x = vor.vertices[index, 0]
y = vor.vertices[index, 1]
if not(bounding_box[0] - eps <= x and x <= bounding_box[1] + eps and
bounding_box[2] - eps <= y and y <= bounding_box[3] + eps):
flag = False
break
if region != [] and flag:
regions.append(region)
vor.filtered_points = points_center
vor.filtered_regions = regions
return vor
def centroid_region(vertices):
# Polygon's signed area
A = 0
# Centroid's x
C_x = 0
# Centroid's y
C_y = 0
for i in range(0, len(vertices) - 1):
s = (vertices[i, 0] * vertices[i + 1, 1] - vertices[i + 1, 0] * vertices[i, 1])
A = A + s
C_x = C_x + (vertices[i, 0] + vertices[i + 1, 0]) * s
C_y = C_y + (vertices[i, 1] + vertices[i + 1, 1]) * s
A = 0.5 * A
C_x = (1.0 / (6.0 * A)) * C_x
C_y = (1.0 / (6.0 * A)) * C_y
return np.array([[C_x, C_y]])
points = np.array([[0.17488374, 0.36498964],
[0.94904866, 0.80085891],
[0.89265224, 0.4160692 ],
[0.17035869, 0.82769497],
[0.30274931, 0.04572908],
[0.40515272, 0.1445514 ],
[0.23191921, 0.08250689],
[0.48713553, 0.94806717],
[0.77714412, 0.46517511],
[0.25945989, 0.76444964]])
vor = voronoi(points,(0,1,0,1))
fig = plt.figure()
ax = fig.gca()
# Plot initial points
ax.plot(vor.filtered_points[:, 0], vor.filtered_points[:, 1], 'b.')
# Plot ridges points
for region in vor.filtered_regions:
vertices = vor.vertices[region, :]
ax.plot(vertices[:, 0], vertices[:, 1], 'go')
# Plot ridges
for region in vor.filtered_regions:
vertices = vor.vertices[region + [region[0]], :]
ax.plot(vertices[:, 0], vertices[:, 1], 'k-')
Now, I want to get the area of the region containing one of the original points in blue, such as points[0]. In this example, points[0] is the point (0.17488374, 0.36498964). I thought I could find the area for this point with the following code:
area = ConvexHull(vor.vertices[vor.filtered_regions[0], :]).volume
Because I figured that the index of 0 in points[0] would correspond with the index of 0 in vor.filtered_regions[0]. But it doesn't -- vor.filtered_regions[9] is actually what I'm looking for (which I figured out manually but I'd like for it to be automated). In another example, the region with index 2 was the one I was looking for, so it doesn't appear consistent either.
Is there a way to find the index for vor.filtered_regions that'll give me the area I want? Or is there another way to go about this? Even though I'm creating the entire Voronoi diagram with all 10 points, the area of the region with points[0] is all I'm actually looking for (while still being bounded), so I'm assuming there might be a quicker way to go about doing this but I have no idea what that may be.
The point_region attribute of the scipy Voronoi diagram tells you which region is associated to which point. So you can use that data to look up the associated regions.
Here is a much simplified version of your voronoi function which uses that attribute to ensure that filted_points and filtered_regions are constructed consistently, i.e., the first region is the one associated with the first point.
def voronoi(towers, bounding_box):
# Select towers inside the bounding box
i = in_box(towers, bounding_box)
# Mirror points
points_center = towers[i, :]
points_left = np.copy(points_center)
points_left[:, 0] = bounding_box[0] - (points_left[:, 0] - bounding_box[0])
points_right = np.copy(points_center)
points_right[:, 0] = bounding_box[1] + (bounding_box[1] - points_right[:, 0])
points_down = np.copy(points_center)
points_down[:, 1] = bounding_box[2] - (points_down[:, 1] - bounding_box[2])
points_up = np.copy(points_center)
points_up[:, 1] = bounding_box[3] + (bounding_box[3] - points_up[:, 1])
points = np.append(points_center,
np.append(np.append(points_left,
points_right,
axis=0),
np.append(points_down,
points_up,
axis=0),
axis=0),
axis=0)
# Compute Voronoi
vor = sp.spatial.Voronoi(points)
# Filter regions
regions = []
[vor.point_region[i] for i in range(10)]
vor.filtered_points = points_center
vor.filtered_regions = [vor.regions[vor.point_region[i]] for i in range(len(points_center))]
return vor

Encoded and decoded version of bouding box regression offsets are different

I'm trying to replicate bounding box regression technique used in faster-rcnn as given here. I've made a decoding fuunction and an encoding function. Ideally, when passing a bounding box to the encoder and then decoding it, I should get the same bounding box.
Here, are my input bounding boxes:
import numpy as np
import tensorflow as tf
def make_anchors(img_size, conv_h, conv_w, scale, aspect_ratios):
prior_data = []
# Iteration order is important (it has to sync up with the convout)
for j, i in product(range(conv_h), range(conv_w)):
# + 0.5 because priors are in center
x = (i + 0.5) / conv_w
y = (j + 0.5) / conv_h
for ar in aspect_ratios:
ar = sqrt(ar)
w = scale * ar / img_size
h = scale / ar / img_size
prior_data += [x, y, w, h]
return prior_data
test_bbox = tf.convert_to_tensor((np.array([[204.044, 253.8351, 487.8226, 427.06363],
[0, 140.01741, 550, 290.21936],
[40.005028, 117.37102, 255.7913, 205.13097],
[263.31314, 67.0434, 514.04736, 124.48139],
[0, 503.79834, 487.0279, 550]])), dtype=tf.float32)
test_labels = tf.convert_to_tensor((np.array([[1],
[2],
[3],
[4],
[5]])), dtype=tf.float32)
feature_map_size=[[69,69], [35,35], [18,18], [9,9], [5,5]]
aspect_ratios=[1, 0.5, 2]
scales=[24, 48, 96, 192, 384]
anchors = []
for i, shape in enumerate(feature_map_size):
anchors += make_anchors(550, shape[0], shape[1], scales[i], aspect_ratios)
anchors = tf.reshape(tf.convert_to_tensor(anchors), [-1, 4])
I'm using 550x550 image size as input and calculated the feature-map size accordingly.
The encoding is done as follows:
def encode(map_loc, center_anchors, include_variances=False):
# center_gt = tf.map_fn(lambda x: map_to_center_form(x), map_loc)
h = map_loc[:, 2] - map_loc[:, 0]
w = map_loc[:, 3] - map_loc[:, 1]
center_gt = tf.cast(tf.stack([map_loc[:, 1] + (w / 2), map_loc[:, 0] + (h / 2), w, h], axis=-1), tf.float32)
variances = [0.1, 0.2]
# calculate offset
if include_variances:
g_hat_cx = (center_gt[:, 0] - center_anchors[:, 0]) / center_anchors[:, 2] / variances[0]
g_hat_cy = (center_gt[:, 1] - center_anchors[:, 1]) / center_anchors[:, 3] / variances[0]
else:
g_hat_cx = (center_gt[:, 0] - center_anchors[:, 0]) / center_anchors[:, 2]
g_hat_cy = (center_gt[:, 1] - center_anchors[:, 1]) / center_anchors[:, 3]
tf.debugging.assert_non_negative(center_anchors[:, 2] / center_gt[:, 2])
tf.debugging.assert_non_negative(center_anchors[:, 3] / center_gt[:, 3])
if include_variances:
g_hat_w = tf.math.log(center_gt[:, 2] / center_anchors[:, 2]) / variances[1]
g_hat_h = tf.math.log(center_gt[:, 3] / center_anchors[:, 3]) / variances[1]
else:
g_hat_w = tf.math.log(center_gt[:, 2] / center_anchors[:, 2])
g_hat_h = tf.math.log(center_gt[:, 3] / center_anchors[:, 3])
offsets = tf.stack([g_hat_cx, g_hat_cy, g_hat_w, g_hat_h], axis=-1)
return offsets
def area(boxlist, scope=None):
# https://github.com/tensorflow/models/blob/831281cedfc8a4a0ad7c0c37173963fafb99da37/official/vision/detection/utils/object_detection/box_list_ops.py#L48
"""Computes area of boxes.
Args:
boxlist: BoxList holding N boxes
scope: name scope.
Returns:
a tensor with shape [N] representing box areas.
"""
y_min, x_min, y_max, x_max = tf.split(
value=boxlist, num_or_size_splits=4, axis=1)
return tf.squeeze((y_max - y_min) * (x_max - x_min), [1])
def intersection(boxlist1, boxlist2, scope=None):
# https://github.com/tensorflow/models/blob/831281cedfc8a4a0ad7c0c37173963fafb99da37/official/vision/detection/utils/object_detection/box_list_ops.py#L209
"""Compute pairwise intersection areas between boxes.
Args:
boxlist1: BoxList holding N boxes
boxlist2: BoxList holding M boxes
scope: name scope.
Returns:
a tensor with shape [N, M] representing pairwise intersections
"""
y_min1, x_min1, y_max1, x_max1 = tf.split(
value=boxlist1, num_or_size_splits=4, axis=1)
y_min2, x_min2, y_max2, x_max2 = tf.split(
value=boxlist2, num_or_size_splits=4, axis=1)
all_pairs_min_ymax = tf.minimum(y_max1, tf.transpose(y_max2))
all_pairs_max_ymin = tf.maximum(y_min1, tf.transpose(y_min2))
intersect_heights = tf.maximum(0.0, all_pairs_min_ymax - all_pairs_max_ymin)
all_pairs_min_xmax = tf.minimum(x_max1, tf.transpose(x_max2))
all_pairs_max_xmin = tf.maximum(x_min1, tf.transpose(x_min2))
intersect_widths = tf.maximum(0.0, all_pairs_min_xmax - all_pairs_max_xmin)
return intersect_heights * intersect_widths
def iou(boxlist1, boxlist2, scope=None):
# https://github.com/tensorflow/models/blob/831281cedfc8a4a0ad7c0c37173963fafb99da37/official/vision/detection/utils/object_detection/box_list_ops.py#L259
"""Computes pairwise intersection-over-union between box collections.
Args:
boxlist1: BoxList holding N boxes
boxlist2: BoxList holding M boxes
scope: name scope.
Returns:
a tensor with shape [N, M] representing pairwise iou scores.
"""
intersections = intersection(boxlist1, boxlist2)
areas1 = area(boxlist1)
areas2 = area(boxlist2)
unions = (
tf.expand_dims(areas1, 1) + tf.expand_dims(areas2, 0) - intersections)
return tf.where(
tf.equal(intersections, 0.0),
tf.zeros_like(intersections), tf.truediv(intersections, unions))
def matching(pos_thresh, neg_thresh, gt_bbox, gt_labels, priors):
pairwise_iou = iou(priors, gt_bbox) # # size: [num_objects, num_priors]; anchors along the row and ground_truth clong the columns
each_prior_max = tf.reduce_max(pairwise_iou, axis=-1) # size [num_priors]; iou with ground truth with the anchors
each_prior_index = tf.math.argmax(pairwise_iou, axis=-1) # size [num_priors]; id of groud truth having max iou with the anchors
each_box_max = tf.reduce_max(pairwise_iou, axis=0)
each_box_index = tf.math.argmax(pairwise_iou, axis=0)
# For the max IoU prior for each gt box, set its IoU to 2. This ensures that it won't be filtered
# in the threshold step even if the IoU is under the negative threshold. This is because that we want
# at least one prior to match with each gt box or else we'd be wasting training data.
indices = tf.expand_dims(each_box_index,axis=-1)
updates = tf.cast(tf.tile(tf.constant([2]), each_box_index.shape), dtype=tf.float32)
each_prior_max = tf.tensor_scatter_nd_update(each_prior_max, indices, updates)
# Set the index of the pair (prior, gt) we set the overlap for above.
updates = tf.cast(tf.range(0,each_box_index.shape),dtype=tf.int64)
each_prior_index = tf.tensor_scatter_nd_update(each_prior_index, indices, updates)
each_prior_box = tf.gather(gt_bbox, each_prior_index) # size: [num_priors, 4]
conf = tf.squeeze(tf.gather(gt_labels, each_prior_index) + 1) # the class of the max IoU gt box for each prior, size: [num_priors]
neutral_label_index = tf.where(each_prior_max < pos_thresh)
background_label_index = tf.where(each_prior_max < neg_thresh)
conf = tf.tensor_scatter_nd_update(conf, neutral_label_index, -1*tf.ones(tf.size(neutral_label_index)))
conf = tf.tensor_scatter_nd_update(conf, background_label_index, tf.zeros(tf.size(background_label_index)))
offsets = encode(each_prior_box, priors)
return offsets, conf, each_prior_box, each_prior_index
offsets, conf, each_prior_box, each_prior_index = \
matching(0.5, 0.5, test_bbox/550, test_labels, anchors)
If I try to redraw the offset I got after encoding i'm getting image as follows:
def _decode(box_p, priors, include_variances=False):
# https://github.com/feiyuhuahuo/Yolact_minimal/blob/9299a0cf346e455d672fadd796ac748871ba85e4/utils/box_utils.py#L151
"""
Decode predicted bbox coordinates using the scheme
employed at https://lilianweng.github.io/lil-log/2017/12/31/object-recognition-for-dummies-part-3.html
b_x = prior_w*loc_x + prior_x
b_y = prior_h*loc_y + prior_y
b_w = prior_w * exp(loc_w)
b_h = prior_h * exp(loc_h)
Note that loc is inputed as [c_x, x_y, w, h]
while priors are inputed as [c_x, c_y, w, h] where each coordinate
is relative to size of the image.
Also note that prior_x and prior_y are center coordinates.
"""
variances = [0.1, 0.2]
box_p = tf.cast(box_p, tf.float32)
priors = tf.cast(priors, tf.float32)
if include_variances:
b_x_y = priors[:, :2] + box_p[:, :2] * priors[:, 2:]* variances[0]
b_w_h = priors[:, 2:] * tf.math.exp(box_p[:, 2:]* variances[1])
else:
b_x_y = priors[:, :2] + box_p[:, :2] * priors[:, 2:]
b_w_h = priors[:, 2:] * tf.math.exp(box_p[:, 2:])
boxes = tf.concat([b_x_y, b_w_h], axis=1)
# [x_min, y_min, x_max, y_max]
boxes = tf.concat([boxes[:, :2] - boxes[:, 2:] / 2, boxes[:, 2:] + boxes[:, :2]], axis=1)
# [y_min, x_min, y_max, x_max]
return tf.transpose(tf.stack([boxes[:, 1], boxes[:, 0],boxes[:, 3], boxes[:, 2]]))
_idx = tf.where(conf > 0.5)
_test = _decode(offsets, anchorobj.anchors)
_out = tf.squeeze(tf.gather(_test, _idx)).numpy()*550
img_test = 255*np.ones((1000,1000,3), dtype=np.int8)
for box in _out:
box = np.round(box).astype(int)
image = cv2.rectangle(img_test, (box[1], box[0]), (box[3], box[2]), (0, 255, 0), 2)
plt.imshow(image)
As one can see the output is crossing the image input dimensions of 550. Why is that happening?
The problem was in my decode function in calculating [x_min, y_min, x_max, y_max]. It should have been like this:
# [x_min, y_min, x_max, y_max]
boxes = tf.concat([boxes[:, :2] - boxes[:, 2:] / 2, boxes[:, 2:] / 2 + boxes[:, :2]], axis=1)

Categories