Related
I am implementing the Francis double step QR Iteration algorithm using the notes and psuedocode from lecture https://people.inf.ethz.ch/arbenz/ewp/Lnotes/chapter4.pdf - Algorithm 4.5
The psuedocode is provided in Matlab I believe.
Below is the implementation of my code.
# compute upper hessenberg form of matrix
def hessenberg(A):
m,n = A.shape
H = A.astype(np.float64)
for k in range(n-2):
x = H[k+1:, k]
v = np.concatenate([np.array([np.sign(x[0]) * np.linalg.norm(x)]), x[1:]])
v = v / np.linalg.norm(v)
H[k+1:, k:] -= 2 * np.outer(v, np.dot(v, H[k+1:, k:]))
H[:, k+1:] -= 2 * np.outer(np.dot(H[:, k+1:], v), v)
return(H)
# compute first three elements of M
def first_three_M(T,s,t):
x = T[0, 0]**2 + T[0, 1] * T[1, 0] - s * T[0, 0] + t
y = T[1, 0] * (T[0, 0] + T[1, 1] - s)
z = T[1, 0] * T[2, 1]
return(x,y,z)
# householder reflection
def householder_reflection_step(x_1):
v = x_1[0] + np.sign(x_1[0]) * np.linalg.norm(x_1)
v = v / np.linalg.norm(v)
P = np.eye(3) - 2 * np.outer(v, v)
return(P)
# update elements of M
def update_M(T,k,p):
x = T[k+1, k]
y = T[k+2, k]
if k < p - 3:
z = T[k+3, k]
else:
z = 0
return(x,y,z)
# givens rotation
def givens_step(T,x_2,x,y,p,q,n):
# calculate c and s
c = x / np.sqrt(x**2 + y**2)
s = -y / np.sqrt(x**2 + y**2)
P = np.array([[c, s], [-s, c]])
T[q-1:p, p-3:n] = P.T # T[q-1:p, p-3:n]
T[0:p, p-2:p] = T[0:p, p-2:p] # P
return(T)
# deflation step
def deflation_step(T,p,q,epsilon):
if abs(T[p-1, p-2]) < epsilon * (abs(T[p-2, p-2]) + abs(T[p-1, p-1])):
T[p-1, p-2] = 0
p = p - 1
q = p - 1
elif abs(T[p-2, p-3]) < epsilon * (abs(T[p-3, p-3]) + abs(T[p-2, p-2])):
T[p-2, p-3] = 0
p = p - 2
q = p - 1
return(T,p,q)
# francis qr step
def francis_step(H, epsilon=0.90):
n = H.shape[0]
T = H.copy().astype(np.float64)
p = n - 1
while p > 2:
q = p - 1
s = T[q, q] + T[p, p]
t = T[q, q] * T[p, p] - T[q, p] * T[p, q]
# Compute M
x,y,z = first_three_M(T,s,t)
x_1 = np.transpose([[x], [y], [z]])
# Bulge chasing
for k in range(p - 3):
# Compute Householder reflector
P = householder_reflection_step(x_1)
r = max(1, k-1)
T[k:k+3, r:] = P.T # T[k:k+3, r:]
r = min(k + 3, p)
T[0:r, k:k+3] = T[0:r, k:k+3] # P
# Update M
x,y,z = update_M(T,k,p)
x_2 = np.transpose([[x], [y]])
# Compute Givens rotation
T = givens_step(T,x_2,x,y,p,q,n)
# Check for convergence
T,p,q = deflation_step(T,p,q,epsilon)
return(T)
# francis qr iteration
def francis_qr_iteration(A):
m,n = A.shape
H = hessenberg(A)
eigvals = []
iters = 0
max_iters = 100
while iters<max_iters:
# Perform Francis step
T = francis_step(H)
eigvals.append(np.diag(T))
iters+=1
return(eigvals)
# for quick testing
A = np.array([[2, 2, 3, 4, 2],
[1, 2, 4, 2, 3],
[4, 1, 2, 1, 5],
[5, 2, 5, 2, 1],
[3, 6, 3, 1, 4]])
eigenvals = francis_qr_iteration(A)
#comparing our method to scipy - final eigvals obtained
print(len(eigenvals))
print(sorted(eigenvals[-1]))
print(sorted(scipy.linalg.eig(A)[0].real))
And this is the output I am getting.
100
[-4.421235127393854, -0.909209110641351, -0.8342390091346807, 3.7552499102751575, 8.215454029003958]
[-3.0411228516834217, -1.143605409373778, -1.143605409373778, 3.325396565009845, 14.002937105421134]
The matrix T is not changing and hence it does not converge to the Schur form through which I can obtain the eigenvalues by using np.diag(T). I believe the error is coming either from the Givens rotation step or the Householder reflection step. It could be an indexing issue since I tried to work in python using matlab psuedocode. Please let me know where I am going wrong so I can improve the code and make it converge.
I am trying to minimize a function using the Nelder-Mead method in Python.
Here is the function I am trying to minimize
I am getting the following error:
Here is my code:
import numpy as np
def f(x):
return np.sum(x**2/2)
def nelder_mead(f, x0, args=(), iters=1000, tol=1e-6):
n = len(x0)
x0 = np.asarray(x0)
x1 = np.zeros((n, n+1))
x1[:, 0] = x0
f1 = np.zeros(n+1)
f1[0] = f(x0)
for i in range(iters):
x_bar = np.mean(x1[:, :n], axis=1)
xr = 2*x_bar - x1[:, n]
fr = f(xr)
if fr < f1[n]:
xe = 2*xr - x_bar
fe = f(xe)
if fe < fr:
x1[:, n+1] = xe
f1[n+1] = fe
else:
x1[:, n+1] = xr
f1[n+1] = fr
else:
xc = (x_bar + x1[:, n])/2
fc = f(xc)
if fc < f1[n]:
x1[:, n+1] = xc
f1[n+1] = fc
x1[:, n] = xr
f1[n] = fr
else:
x1[:, 1:] = x1[:, :n]
f1[1:] = f1[:n]
x1[:, 0] = (x1[:, n] + x1[:, n+1])/2
f1[0] = f(x1[:, 0])
"x1[:, n+1] = x1[:, n]"
"f1[n+1] = f1[n]"
"x1[:, n] = (x1[:, 0] + x1[:, n+1])/2"
"f1[n] = f(x1[:, n])"
if np.abs(f1[n] - f1[n+1]) < tol:
return x1[:, n], f1[n]
return x1[:, n], f1[n]
x0 = np.random.rand(2)*4 - 2
print(nelder_mead(f, x0))
I am unsure how to change the code. I believe the error is coming up because I think I am identifying index 2, which is the third position. Somehow I am identifying the third index.
I am trying to inverse a set of maps using this answer here. I used two of his methods so there is more detail as to how they work in his answer. I also left some comments out to shorten the code.
I have my own camera matrix and distortion coeff's that I use to create an x and y map with cv2.initUndistortRectifyMap(), but when I pass them to invert_maps() I get an out-of-bounds error shown below.
None of this (except the bottom part) is my code and its pretty advanced stuff so I have no clue how to debug it. And I dont have enough credit to comment on the orignal answer. Anyone got a solution?
import numpy as np
import cv2 as cv2
from scipy import ndimage as ndi
from matplotlib import pyplot as plt
import glob
def bilinear_inverse(p, vertices, numiter=4):
p = np.asarray(p)
v = np.asarray(vertices)
sh = p.shape[1:]
if v.ndim == 2:
v = np.expand_dims(v, axis=tuple(range(2, 2 + len(sh))))
# Start in the center
s = .5 * np.ones((2,) + sh)
s0, s1 = s
for k in range(numiter):
# Residual
r = v[0] * (1 - s0) * (1 - s1) + v[1] * s0 * (1 - s1) + v[2] * s0 * s1 + v[3] * (1 - s0) * s1 - p
# Jacobian
J11 = -v[0, 0] * (1 - s1) + v[1, 0] * (1 - s1) + v[2, 0] * s1 - v[3, 0] * s1
J21 = -v[0, 1] * (1 - s1) + v[1, 1] * (1 - s1) + v[2, 1] * s1 - v[3, 1] * s1
J12 = -v[0, 0] * (1 - s0) - v[1, 0] * s0 + v[2, 0] * s0 + v[3, 0] * (1 - s0)
J22 = -v[0, 1] * (1 - s0) - v[1, 1] * s0 + v[2, 1] * s0 + v[3, 1] * (1 - s0)
inv_detJ = 1. / (J11 * J22 - J12 * J21)
s0 -= inv_detJ * (J22 * r[0] - J12 * r[1])
s1 -= inv_detJ * (-J21 * r[0] + J11 * r[1])
return s
def invert_map(xmap, ymap, diagnostics=False):
"""
Generate the inverse of deformation map defined by (xmap, ymap) using inverse bilinear interpolation.
"""
# Generate quadrilaterals from mapped grid points.
quads = np.array([[ymap[:-1, :-1], xmap[:-1, :-1]],
[ymap[1:, :-1], xmap[1:, :-1]],
[ymap[1:, 1:], xmap[1:, 1:]],
[ymap[:-1, 1:], xmap[:-1, 1:]]])
# Range of indices possibly within each quadrilateral
x0 = np.floor(quads[:, 1, ...].min(axis=0)).astype(int)
x1 = np.ceil(quads[:, 1, ...].max(axis=0)).astype(int)
y0 = np.floor(quads[:, 0, ...].min(axis=0)).astype(int)
y1 = np.ceil(quads[:, 0, ...].max(axis=0)).astype(int)
# Quad indices
i0, j0 = np.indices(x0.shape)
# Offset of destination map
x0_offset = x0.min()
y0_offset = y0.min()
# Index range in x and y (per quad)
xN = x1 - x0 + 1
yN = y1 - y0 + 1
# Shape of destination array
sh_dest = (1 + x1.max() - x0_offset, 1 + y1.max() - y0_offset)
# Coordinates of destination array
yy_dest, xx_dest = np.indices(sh_dest)
xmap1 = np.zeros(sh_dest)
ymap1 = np.zeros(sh_dest)
TN = np.zeros(sh_dest, dtype=int)
# Smallish number to avoid missing point lying on edges
epsilon = .01
# Loop through indices possibly within quads
for ix in range(xN.max()):
for iy in range(yN.max()):
# Work only with quads whose bounding box contain indices
valid = (xN > ix) * (yN > iy)
# Local points to check
p = np.array([y0[valid] + ix, x0[valid] + iy])
# Map the position of the point in the quad
s = bilinear_inverse(p, quads[:, :, valid])
# s out of unit square means p out of quad
# Keep some epsilon around to avoid missing edges
in_quad = np.all((s > -epsilon) * (s < (1 + epsilon)), axis=0)
# Add found indices
ii = p[0, in_quad] - y0_offset
jj = p[1, in_quad] - x0_offset
ymap1[ii, jj] += i0[valid][in_quad] + s[0][in_quad]
xmap1[ii, jj] += j0[valid][in_quad] + s[1][in_quad]
# Increment count
TN[ii, jj] += 1
ymap1 /= TN + (TN == 0)
xmap1 /= TN + (TN == 0)
if diagnostics:
diag = {'x_offset': x0_offset,
'y_offset': y0_offset,
'mask': TN > 0}
return xmap1, ymap1, diag
else:
return xmap1, ymap1
# cam matrix and dist coeff's that I brought
cam_matrix = np.array([ [1223.07784, 0, 926.80065],
[ 0, 1231.71291, 546.10496],
[ 0, 0, 1]], dtype='float32')
distortion_profile = np.array([-0.32077, 0.15041, 0.001004, 0.00028, -0.04252], dtype='float32')
# get current maps
mapx, mapy = cv2.initUndistortRectifyMap(cam_matrix, distortion, None, cam_matrix, (1920, 1080), 5)
# invert the maps
mapx_invert, mapy_invert = invert_map(mapx, mapy)
# apply mapping to image
inversed = cv2.remap(img, mapx_invert, mapy_invert ,cv2.INTER_LINEAR)
cv2.imwrite('inversed.png', inversed)
Error:
File "c:\Users\...\redist_image2.py", line 121, in invert_map
ymap1[ii, jj] += i0[valid][in_quad] + s[0][in_quad]
IndexError: index 1382 is out of bounds for axis 1 with size 1020
Using this answer, I can create a bounded Voronoi diagram (credit to #Flabetvibes for this code):
import matplotlib.pyplot as plt
import numpy as np
import scipy as sp
import scipy.spatial
import sys
eps = sys.float_info.epsilon
def in_box(towers, bounding_box):
return np.logical_and(np.logical_and(bounding_box[0] <= towers[:, 0],
towers[:, 0] <= bounding_box[1]),
np.logical_and(bounding_box[2] <= towers[:, 1],
towers[:, 1] <= bounding_box[3]))
def voronoi(towers, bounding_box):
# Select towers inside the bounding box
i = in_box(towers, bounding_box)
# Mirror points
points_center = towers[i, :]
points_left = np.copy(points_center)
points_left[:, 0] = bounding_box[0] - (points_left[:, 0] - bounding_box[0])
points_right = np.copy(points_center)
points_right[:, 0] = bounding_box[1] + (bounding_box[1] - points_right[:, 0])
points_down = np.copy(points_center)
points_down[:, 1] = bounding_box[2] - (points_down[:, 1] - bounding_box[2])
points_up = np.copy(points_center)
points_up[:, 1] = bounding_box[3] + (bounding_box[3] - points_up[:, 1])
points = np.append(points_center,
np.append(np.append(points_left,
points_right,
axis=0),
np.append(points_down,
points_up,
axis=0),
axis=0),
axis=0)
# Compute Voronoi
vor = sp.spatial.Voronoi(points)
# Filter regions
regions = []
for region in vor.regions:
flag = True
for index in region:
if index == -1:
flag = False
break
else:
x = vor.vertices[index, 0]
y = vor.vertices[index, 1]
if not(bounding_box[0] - eps <= x and x <= bounding_box[1] + eps and
bounding_box[2] - eps <= y and y <= bounding_box[3] + eps):
flag = False
break
if region != [] and flag:
regions.append(region)
vor.filtered_points = points_center
vor.filtered_regions = regions
return vor
def centroid_region(vertices):
# Polygon's signed area
A = 0
# Centroid's x
C_x = 0
# Centroid's y
C_y = 0
for i in range(0, len(vertices) - 1):
s = (vertices[i, 0] * vertices[i + 1, 1] - vertices[i + 1, 0] * vertices[i, 1])
A = A + s
C_x = C_x + (vertices[i, 0] + vertices[i + 1, 0]) * s
C_y = C_y + (vertices[i, 1] + vertices[i + 1, 1]) * s
A = 0.5 * A
C_x = (1.0 / (6.0 * A)) * C_x
C_y = (1.0 / (6.0 * A)) * C_y
return np.array([[C_x, C_y]])
points = np.array([[0.17488374, 0.36498964],
[0.94904866, 0.80085891],
[0.89265224, 0.4160692 ],
[0.17035869, 0.82769497],
[0.30274931, 0.04572908],
[0.40515272, 0.1445514 ],
[0.23191921, 0.08250689],
[0.48713553, 0.94806717],
[0.77714412, 0.46517511],
[0.25945989, 0.76444964]])
vor = voronoi(points,(0,1,0,1))
fig = plt.figure()
ax = fig.gca()
# Plot initial points
ax.plot(vor.filtered_points[:, 0], vor.filtered_points[:, 1], 'b.')
# Plot ridges points
for region in vor.filtered_regions:
vertices = vor.vertices[region, :]
ax.plot(vertices[:, 0], vertices[:, 1], 'go')
# Plot ridges
for region in vor.filtered_regions:
vertices = vor.vertices[region + [region[0]], :]
ax.plot(vertices[:, 0], vertices[:, 1], 'k-')
Now, I want to get the area of the region containing one of the original points in blue, such as points[0]. In this example, points[0] is the point (0.17488374, 0.36498964). I thought I could find the area for this point with the following code:
area = ConvexHull(vor.vertices[vor.filtered_regions[0], :]).volume
Because I figured that the index of 0 in points[0] would correspond with the index of 0 in vor.filtered_regions[0]. But it doesn't -- vor.filtered_regions[9] is actually what I'm looking for (which I figured out manually but I'd like for it to be automated). In another example, the region with index 2 was the one I was looking for, so it doesn't appear consistent either.
Is there a way to find the index for vor.filtered_regions that'll give me the area I want? Or is there another way to go about this? Even though I'm creating the entire Voronoi diagram with all 10 points, the area of the region with points[0] is all I'm actually looking for (while still being bounded), so I'm assuming there might be a quicker way to go about doing this but I have no idea what that may be.
The point_region attribute of the scipy Voronoi diagram tells you which region is associated to which point. So you can use that data to look up the associated regions.
Here is a much simplified version of your voronoi function which uses that attribute to ensure that filted_points and filtered_regions are constructed consistently, i.e., the first region is the one associated with the first point.
def voronoi(towers, bounding_box):
# Select towers inside the bounding box
i = in_box(towers, bounding_box)
# Mirror points
points_center = towers[i, :]
points_left = np.copy(points_center)
points_left[:, 0] = bounding_box[0] - (points_left[:, 0] - bounding_box[0])
points_right = np.copy(points_center)
points_right[:, 0] = bounding_box[1] + (bounding_box[1] - points_right[:, 0])
points_down = np.copy(points_center)
points_down[:, 1] = bounding_box[2] - (points_down[:, 1] - bounding_box[2])
points_up = np.copy(points_center)
points_up[:, 1] = bounding_box[3] + (bounding_box[3] - points_up[:, 1])
points = np.append(points_center,
np.append(np.append(points_left,
points_right,
axis=0),
np.append(points_down,
points_up,
axis=0),
axis=0),
axis=0)
# Compute Voronoi
vor = sp.spatial.Voronoi(points)
# Filter regions
regions = []
[vor.point_region[i] for i in range(10)]
vor.filtered_points = points_center
vor.filtered_regions = [vor.regions[vor.point_region[i]] for i in range(len(points_center))]
return vor
I'm trying to replicate bounding box regression technique used in faster-rcnn as given here. I've made a decoding fuunction and an encoding function. Ideally, when passing a bounding box to the encoder and then decoding it, I should get the same bounding box.
Here, are my input bounding boxes:
import numpy as np
import tensorflow as tf
def make_anchors(img_size, conv_h, conv_w, scale, aspect_ratios):
prior_data = []
# Iteration order is important (it has to sync up with the convout)
for j, i in product(range(conv_h), range(conv_w)):
# + 0.5 because priors are in center
x = (i + 0.5) / conv_w
y = (j + 0.5) / conv_h
for ar in aspect_ratios:
ar = sqrt(ar)
w = scale * ar / img_size
h = scale / ar / img_size
prior_data += [x, y, w, h]
return prior_data
test_bbox = tf.convert_to_tensor((np.array([[204.044, 253.8351, 487.8226, 427.06363],
[0, 140.01741, 550, 290.21936],
[40.005028, 117.37102, 255.7913, 205.13097],
[263.31314, 67.0434, 514.04736, 124.48139],
[0, 503.79834, 487.0279, 550]])), dtype=tf.float32)
test_labels = tf.convert_to_tensor((np.array([[1],
[2],
[3],
[4],
[5]])), dtype=tf.float32)
feature_map_size=[[69,69], [35,35], [18,18], [9,9], [5,5]]
aspect_ratios=[1, 0.5, 2]
scales=[24, 48, 96, 192, 384]
anchors = []
for i, shape in enumerate(feature_map_size):
anchors += make_anchors(550, shape[0], shape[1], scales[i], aspect_ratios)
anchors = tf.reshape(tf.convert_to_tensor(anchors), [-1, 4])
I'm using 550x550 image size as input and calculated the feature-map size accordingly.
The encoding is done as follows:
def encode(map_loc, center_anchors, include_variances=False):
# center_gt = tf.map_fn(lambda x: map_to_center_form(x), map_loc)
h = map_loc[:, 2] - map_loc[:, 0]
w = map_loc[:, 3] - map_loc[:, 1]
center_gt = tf.cast(tf.stack([map_loc[:, 1] + (w / 2), map_loc[:, 0] + (h / 2), w, h], axis=-1), tf.float32)
variances = [0.1, 0.2]
# calculate offset
if include_variances:
g_hat_cx = (center_gt[:, 0] - center_anchors[:, 0]) / center_anchors[:, 2] / variances[0]
g_hat_cy = (center_gt[:, 1] - center_anchors[:, 1]) / center_anchors[:, 3] / variances[0]
else:
g_hat_cx = (center_gt[:, 0] - center_anchors[:, 0]) / center_anchors[:, 2]
g_hat_cy = (center_gt[:, 1] - center_anchors[:, 1]) / center_anchors[:, 3]
tf.debugging.assert_non_negative(center_anchors[:, 2] / center_gt[:, 2])
tf.debugging.assert_non_negative(center_anchors[:, 3] / center_gt[:, 3])
if include_variances:
g_hat_w = tf.math.log(center_gt[:, 2] / center_anchors[:, 2]) / variances[1]
g_hat_h = tf.math.log(center_gt[:, 3] / center_anchors[:, 3]) / variances[1]
else:
g_hat_w = tf.math.log(center_gt[:, 2] / center_anchors[:, 2])
g_hat_h = tf.math.log(center_gt[:, 3] / center_anchors[:, 3])
offsets = tf.stack([g_hat_cx, g_hat_cy, g_hat_w, g_hat_h], axis=-1)
return offsets
def area(boxlist, scope=None):
# https://github.com/tensorflow/models/blob/831281cedfc8a4a0ad7c0c37173963fafb99da37/official/vision/detection/utils/object_detection/box_list_ops.py#L48
"""Computes area of boxes.
Args:
boxlist: BoxList holding N boxes
scope: name scope.
Returns:
a tensor with shape [N] representing box areas.
"""
y_min, x_min, y_max, x_max = tf.split(
value=boxlist, num_or_size_splits=4, axis=1)
return tf.squeeze((y_max - y_min) * (x_max - x_min), [1])
def intersection(boxlist1, boxlist2, scope=None):
# https://github.com/tensorflow/models/blob/831281cedfc8a4a0ad7c0c37173963fafb99da37/official/vision/detection/utils/object_detection/box_list_ops.py#L209
"""Compute pairwise intersection areas between boxes.
Args:
boxlist1: BoxList holding N boxes
boxlist2: BoxList holding M boxes
scope: name scope.
Returns:
a tensor with shape [N, M] representing pairwise intersections
"""
y_min1, x_min1, y_max1, x_max1 = tf.split(
value=boxlist1, num_or_size_splits=4, axis=1)
y_min2, x_min2, y_max2, x_max2 = tf.split(
value=boxlist2, num_or_size_splits=4, axis=1)
all_pairs_min_ymax = tf.minimum(y_max1, tf.transpose(y_max2))
all_pairs_max_ymin = tf.maximum(y_min1, tf.transpose(y_min2))
intersect_heights = tf.maximum(0.0, all_pairs_min_ymax - all_pairs_max_ymin)
all_pairs_min_xmax = tf.minimum(x_max1, tf.transpose(x_max2))
all_pairs_max_xmin = tf.maximum(x_min1, tf.transpose(x_min2))
intersect_widths = tf.maximum(0.0, all_pairs_min_xmax - all_pairs_max_xmin)
return intersect_heights * intersect_widths
def iou(boxlist1, boxlist2, scope=None):
# https://github.com/tensorflow/models/blob/831281cedfc8a4a0ad7c0c37173963fafb99da37/official/vision/detection/utils/object_detection/box_list_ops.py#L259
"""Computes pairwise intersection-over-union between box collections.
Args:
boxlist1: BoxList holding N boxes
boxlist2: BoxList holding M boxes
scope: name scope.
Returns:
a tensor with shape [N, M] representing pairwise iou scores.
"""
intersections = intersection(boxlist1, boxlist2)
areas1 = area(boxlist1)
areas2 = area(boxlist2)
unions = (
tf.expand_dims(areas1, 1) + tf.expand_dims(areas2, 0) - intersections)
return tf.where(
tf.equal(intersections, 0.0),
tf.zeros_like(intersections), tf.truediv(intersections, unions))
def matching(pos_thresh, neg_thresh, gt_bbox, gt_labels, priors):
pairwise_iou = iou(priors, gt_bbox) # # size: [num_objects, num_priors]; anchors along the row and ground_truth clong the columns
each_prior_max = tf.reduce_max(pairwise_iou, axis=-1) # size [num_priors]; iou with ground truth with the anchors
each_prior_index = tf.math.argmax(pairwise_iou, axis=-1) # size [num_priors]; id of groud truth having max iou with the anchors
each_box_max = tf.reduce_max(pairwise_iou, axis=0)
each_box_index = tf.math.argmax(pairwise_iou, axis=0)
# For the max IoU prior for each gt box, set its IoU to 2. This ensures that it won't be filtered
# in the threshold step even if the IoU is under the negative threshold. This is because that we want
# at least one prior to match with each gt box or else we'd be wasting training data.
indices = tf.expand_dims(each_box_index,axis=-1)
updates = tf.cast(tf.tile(tf.constant([2]), each_box_index.shape), dtype=tf.float32)
each_prior_max = tf.tensor_scatter_nd_update(each_prior_max, indices, updates)
# Set the index of the pair (prior, gt) we set the overlap for above.
updates = tf.cast(tf.range(0,each_box_index.shape),dtype=tf.int64)
each_prior_index = tf.tensor_scatter_nd_update(each_prior_index, indices, updates)
each_prior_box = tf.gather(gt_bbox, each_prior_index) # size: [num_priors, 4]
conf = tf.squeeze(tf.gather(gt_labels, each_prior_index) + 1) # the class of the max IoU gt box for each prior, size: [num_priors]
neutral_label_index = tf.where(each_prior_max < pos_thresh)
background_label_index = tf.where(each_prior_max < neg_thresh)
conf = tf.tensor_scatter_nd_update(conf, neutral_label_index, -1*tf.ones(tf.size(neutral_label_index)))
conf = tf.tensor_scatter_nd_update(conf, background_label_index, tf.zeros(tf.size(background_label_index)))
offsets = encode(each_prior_box, priors)
return offsets, conf, each_prior_box, each_prior_index
offsets, conf, each_prior_box, each_prior_index = \
matching(0.5, 0.5, test_bbox/550, test_labels, anchors)
If I try to redraw the offset I got after encoding i'm getting image as follows:
def _decode(box_p, priors, include_variances=False):
# https://github.com/feiyuhuahuo/Yolact_minimal/blob/9299a0cf346e455d672fadd796ac748871ba85e4/utils/box_utils.py#L151
"""
Decode predicted bbox coordinates using the scheme
employed at https://lilianweng.github.io/lil-log/2017/12/31/object-recognition-for-dummies-part-3.html
b_x = prior_w*loc_x + prior_x
b_y = prior_h*loc_y + prior_y
b_w = prior_w * exp(loc_w)
b_h = prior_h * exp(loc_h)
Note that loc is inputed as [c_x, x_y, w, h]
while priors are inputed as [c_x, c_y, w, h] where each coordinate
is relative to size of the image.
Also note that prior_x and prior_y are center coordinates.
"""
variances = [0.1, 0.2]
box_p = tf.cast(box_p, tf.float32)
priors = tf.cast(priors, tf.float32)
if include_variances:
b_x_y = priors[:, :2] + box_p[:, :2] * priors[:, 2:]* variances[0]
b_w_h = priors[:, 2:] * tf.math.exp(box_p[:, 2:]* variances[1])
else:
b_x_y = priors[:, :2] + box_p[:, :2] * priors[:, 2:]
b_w_h = priors[:, 2:] * tf.math.exp(box_p[:, 2:])
boxes = tf.concat([b_x_y, b_w_h], axis=1)
# [x_min, y_min, x_max, y_max]
boxes = tf.concat([boxes[:, :2] - boxes[:, 2:] / 2, boxes[:, 2:] + boxes[:, :2]], axis=1)
# [y_min, x_min, y_max, x_max]
return tf.transpose(tf.stack([boxes[:, 1], boxes[:, 0],boxes[:, 3], boxes[:, 2]]))
_idx = tf.where(conf > 0.5)
_test = _decode(offsets, anchorobj.anchors)
_out = tf.squeeze(tf.gather(_test, _idx)).numpy()*550
img_test = 255*np.ones((1000,1000,3), dtype=np.int8)
for box in _out:
box = np.round(box).astype(int)
image = cv2.rectangle(img_test, (box[1], box[0]), (box[3], box[2]), (0, 255, 0), 2)
plt.imshow(image)
As one can see the output is crossing the image input dimensions of 550. Why is that happening?
The problem was in my decode function in calculating [x_min, y_min, x_max, y_max]. It should have been like this:
# [x_min, y_min, x_max, y_max]
boxes = tf.concat([boxes[:, :2] - boxes[:, 2:] / 2, boxes[:, 2:] / 2 + boxes[:, :2]], axis=1)