Encoded and decoded version of bouding box regression offsets are different - python

I'm trying to replicate bounding box regression technique used in faster-rcnn as given here. I've made a decoding fuunction and an encoding function. Ideally, when passing a bounding box to the encoder and then decoding it, I should get the same bounding box.
Here, are my input bounding boxes:
import numpy as np
import tensorflow as tf
def make_anchors(img_size, conv_h, conv_w, scale, aspect_ratios):
prior_data = []
# Iteration order is important (it has to sync up with the convout)
for j, i in product(range(conv_h), range(conv_w)):
# + 0.5 because priors are in center
x = (i + 0.5) / conv_w
y = (j + 0.5) / conv_h
for ar in aspect_ratios:
ar = sqrt(ar)
w = scale * ar / img_size
h = scale / ar / img_size
prior_data += [x, y, w, h]
return prior_data
test_bbox = tf.convert_to_tensor((np.array([[204.044, 253.8351, 487.8226, 427.06363],
[0, 140.01741, 550, 290.21936],
[40.005028, 117.37102, 255.7913, 205.13097],
[263.31314, 67.0434, 514.04736, 124.48139],
[0, 503.79834, 487.0279, 550]])), dtype=tf.float32)
test_labels = tf.convert_to_tensor((np.array([[1],
[2],
[3],
[4],
[5]])), dtype=tf.float32)
feature_map_size=[[69,69], [35,35], [18,18], [9,9], [5,5]]
aspect_ratios=[1, 0.5, 2]
scales=[24, 48, 96, 192, 384]
anchors = []
for i, shape in enumerate(feature_map_size):
anchors += make_anchors(550, shape[0], shape[1], scales[i], aspect_ratios)
anchors = tf.reshape(tf.convert_to_tensor(anchors), [-1, 4])
I'm using 550x550 image size as input and calculated the feature-map size accordingly.
The encoding is done as follows:
def encode(map_loc, center_anchors, include_variances=False):
# center_gt = tf.map_fn(lambda x: map_to_center_form(x), map_loc)
h = map_loc[:, 2] - map_loc[:, 0]
w = map_loc[:, 3] - map_loc[:, 1]
center_gt = tf.cast(tf.stack([map_loc[:, 1] + (w / 2), map_loc[:, 0] + (h / 2), w, h], axis=-1), tf.float32)
variances = [0.1, 0.2]
# calculate offset
if include_variances:
g_hat_cx = (center_gt[:, 0] - center_anchors[:, 0]) / center_anchors[:, 2] / variances[0]
g_hat_cy = (center_gt[:, 1] - center_anchors[:, 1]) / center_anchors[:, 3] / variances[0]
else:
g_hat_cx = (center_gt[:, 0] - center_anchors[:, 0]) / center_anchors[:, 2]
g_hat_cy = (center_gt[:, 1] - center_anchors[:, 1]) / center_anchors[:, 3]
tf.debugging.assert_non_negative(center_anchors[:, 2] / center_gt[:, 2])
tf.debugging.assert_non_negative(center_anchors[:, 3] / center_gt[:, 3])
if include_variances:
g_hat_w = tf.math.log(center_gt[:, 2] / center_anchors[:, 2]) / variances[1]
g_hat_h = tf.math.log(center_gt[:, 3] / center_anchors[:, 3]) / variances[1]
else:
g_hat_w = tf.math.log(center_gt[:, 2] / center_anchors[:, 2])
g_hat_h = tf.math.log(center_gt[:, 3] / center_anchors[:, 3])
offsets = tf.stack([g_hat_cx, g_hat_cy, g_hat_w, g_hat_h], axis=-1)
return offsets
def area(boxlist, scope=None):
# https://github.com/tensorflow/models/blob/831281cedfc8a4a0ad7c0c37173963fafb99da37/official/vision/detection/utils/object_detection/box_list_ops.py#L48
"""Computes area of boxes.
Args:
boxlist: BoxList holding N boxes
scope: name scope.
Returns:
a tensor with shape [N] representing box areas.
"""
y_min, x_min, y_max, x_max = tf.split(
value=boxlist, num_or_size_splits=4, axis=1)
return tf.squeeze((y_max - y_min) * (x_max - x_min), [1])
def intersection(boxlist1, boxlist2, scope=None):
# https://github.com/tensorflow/models/blob/831281cedfc8a4a0ad7c0c37173963fafb99da37/official/vision/detection/utils/object_detection/box_list_ops.py#L209
"""Compute pairwise intersection areas between boxes.
Args:
boxlist1: BoxList holding N boxes
boxlist2: BoxList holding M boxes
scope: name scope.
Returns:
a tensor with shape [N, M] representing pairwise intersections
"""
y_min1, x_min1, y_max1, x_max1 = tf.split(
value=boxlist1, num_or_size_splits=4, axis=1)
y_min2, x_min2, y_max2, x_max2 = tf.split(
value=boxlist2, num_or_size_splits=4, axis=1)
all_pairs_min_ymax = tf.minimum(y_max1, tf.transpose(y_max2))
all_pairs_max_ymin = tf.maximum(y_min1, tf.transpose(y_min2))
intersect_heights = tf.maximum(0.0, all_pairs_min_ymax - all_pairs_max_ymin)
all_pairs_min_xmax = tf.minimum(x_max1, tf.transpose(x_max2))
all_pairs_max_xmin = tf.maximum(x_min1, tf.transpose(x_min2))
intersect_widths = tf.maximum(0.0, all_pairs_min_xmax - all_pairs_max_xmin)
return intersect_heights * intersect_widths
def iou(boxlist1, boxlist2, scope=None):
# https://github.com/tensorflow/models/blob/831281cedfc8a4a0ad7c0c37173963fafb99da37/official/vision/detection/utils/object_detection/box_list_ops.py#L259
"""Computes pairwise intersection-over-union between box collections.
Args:
boxlist1: BoxList holding N boxes
boxlist2: BoxList holding M boxes
scope: name scope.
Returns:
a tensor with shape [N, M] representing pairwise iou scores.
"""
intersections = intersection(boxlist1, boxlist2)
areas1 = area(boxlist1)
areas2 = area(boxlist2)
unions = (
tf.expand_dims(areas1, 1) + tf.expand_dims(areas2, 0) - intersections)
return tf.where(
tf.equal(intersections, 0.0),
tf.zeros_like(intersections), tf.truediv(intersections, unions))
def matching(pos_thresh, neg_thresh, gt_bbox, gt_labels, priors):
pairwise_iou = iou(priors, gt_bbox) # # size: [num_objects, num_priors]; anchors along the row and ground_truth clong the columns
each_prior_max = tf.reduce_max(pairwise_iou, axis=-1) # size [num_priors]; iou with ground truth with the anchors
each_prior_index = tf.math.argmax(pairwise_iou, axis=-1) # size [num_priors]; id of groud truth having max iou with the anchors
each_box_max = tf.reduce_max(pairwise_iou, axis=0)
each_box_index = tf.math.argmax(pairwise_iou, axis=0)
# For the max IoU prior for each gt box, set its IoU to 2. This ensures that it won't be filtered
# in the threshold step even if the IoU is under the negative threshold. This is because that we want
# at least one prior to match with each gt box or else we'd be wasting training data.
indices = tf.expand_dims(each_box_index,axis=-1)
updates = tf.cast(tf.tile(tf.constant([2]), each_box_index.shape), dtype=tf.float32)
each_prior_max = tf.tensor_scatter_nd_update(each_prior_max, indices, updates)
# Set the index of the pair (prior, gt) we set the overlap for above.
updates = tf.cast(tf.range(0,each_box_index.shape),dtype=tf.int64)
each_prior_index = tf.tensor_scatter_nd_update(each_prior_index, indices, updates)
each_prior_box = tf.gather(gt_bbox, each_prior_index) # size: [num_priors, 4]
conf = tf.squeeze(tf.gather(gt_labels, each_prior_index) + 1) # the class of the max IoU gt box for each prior, size: [num_priors]
neutral_label_index = tf.where(each_prior_max < pos_thresh)
background_label_index = tf.where(each_prior_max < neg_thresh)
conf = tf.tensor_scatter_nd_update(conf, neutral_label_index, -1*tf.ones(tf.size(neutral_label_index)))
conf = tf.tensor_scatter_nd_update(conf, background_label_index, tf.zeros(tf.size(background_label_index)))
offsets = encode(each_prior_box, priors)
return offsets, conf, each_prior_box, each_prior_index
offsets, conf, each_prior_box, each_prior_index = \
matching(0.5, 0.5, test_bbox/550, test_labels, anchors)
If I try to redraw the offset I got after encoding i'm getting image as follows:
def _decode(box_p, priors, include_variances=False):
# https://github.com/feiyuhuahuo/Yolact_minimal/blob/9299a0cf346e455d672fadd796ac748871ba85e4/utils/box_utils.py#L151
"""
Decode predicted bbox coordinates using the scheme
employed at https://lilianweng.github.io/lil-log/2017/12/31/object-recognition-for-dummies-part-3.html
b_x = prior_w*loc_x + prior_x
b_y = prior_h*loc_y + prior_y
b_w = prior_w * exp(loc_w)
b_h = prior_h * exp(loc_h)
Note that loc is inputed as [c_x, x_y, w, h]
while priors are inputed as [c_x, c_y, w, h] where each coordinate
is relative to size of the image.
Also note that prior_x and prior_y are center coordinates.
"""
variances = [0.1, 0.2]
box_p = tf.cast(box_p, tf.float32)
priors = tf.cast(priors, tf.float32)
if include_variances:
b_x_y = priors[:, :2] + box_p[:, :2] * priors[:, 2:]* variances[0]
b_w_h = priors[:, 2:] * tf.math.exp(box_p[:, 2:]* variances[1])
else:
b_x_y = priors[:, :2] + box_p[:, :2] * priors[:, 2:]
b_w_h = priors[:, 2:] * tf.math.exp(box_p[:, 2:])
boxes = tf.concat([b_x_y, b_w_h], axis=1)
# [x_min, y_min, x_max, y_max]
boxes = tf.concat([boxes[:, :2] - boxes[:, 2:] / 2, boxes[:, 2:] + boxes[:, :2]], axis=1)
# [y_min, x_min, y_max, x_max]
return tf.transpose(tf.stack([boxes[:, 1], boxes[:, 0],boxes[:, 3], boxes[:, 2]]))
_idx = tf.where(conf > 0.5)
_test = _decode(offsets, anchorobj.anchors)
_out = tf.squeeze(tf.gather(_test, _idx)).numpy()*550
img_test = 255*np.ones((1000,1000,3), dtype=np.int8)
for box in _out:
box = np.round(box).astype(int)
image = cv2.rectangle(img_test, (box[1], box[0]), (box[3], box[2]), (0, 255, 0), 2)
plt.imshow(image)
As one can see the output is crossing the image input dimensions of 550. Why is that happening?

The problem was in my decode function in calculating [x_min, y_min, x_max, y_max]. It should have been like this:
# [x_min, y_min, x_max, y_max]
boxes = tf.concat([boxes[:, :2] - boxes[:, 2:] / 2, boxes[:, 2:] / 2 + boxes[:, :2]], axis=1)

Related

NotImplementedError: Cannot convert a symbolic tf.Tensor (Log_2:0) to a numpy array

I have the following code which is based on https://github.com/leonard-seydoux/scatnet. The original code was based on TF v1 and I am in the process of migrating it to TF v2. However, I am facing some issues while trying to perform a numpy operation on a tensor. I am running the code on Google Colab.
The reproducible code is as below, sorry it is quite long:
import tensorflow as tf
import numpy as np
import scipy as sp
from datetime import datetime
from sklearn.decomposition import PCA
from sklearn.mixture import GaussianMixture
HERMITE = [[1, 0, -3, 2], [0, 0, 3, -2], [0, 1, -2, 1], [0, 0, -1, 1]]
FORMAT = 'float32'
def real_hermite_interp(xi, x, m, p):
# Hermite polynomial coefficients
h = tf.Variable(np.array(HERMITE).astype(FORMAT), trainable=False)
xx = tf.stack([x[:, :-1], x[:, 1:]], axis=2)
# The concatenated coefficients are of shape (n_knots - 1, 2)
mm = tf.stack([m[:-1], m[1:]], axis=1)
pp = tf.stack([p[:-1], p[1:]], axis=1)
y = tf.concat([mm, pp], axis=1)
# Extract Hermite polynomial coefficients from y (n_knots - 1, 4)
yh = tf.matmul(y, h)
xi_ = tf.expand_dims(tf.expand_dims(xi, 0), 0)
x0_ = tf.expand_dims(xx[:, :, 0], 2)
x1_ = tf.expand_dims(xx[:, :, 1], 2)
xn = (xi_ - x0_) / (x1_ - x0_)
# Calculate powers of normalized interpolation vector
mask = tf.logical_and(tf.greater_equal(xn, 0.), tf.less(xn, 1.))
mask = tf.cast(mask, tf.float32)
xp = tf.pow(tf.expand_dims(xn, -1), [0, 1, 2, 3])
# Interpolate
return tf.einsum('rf,srtf->st', yh, xp * tf.expand_dims(mask, -1))
class Scattering:
"""Learnable scattering network layer."""
def __init__(self, x, j=None, q=None, k=None, pooling_type='average',
decimation=2, pooling=2, index=0, **filters_kw):
"""Scattering network layer.
Computes the convolution modulus and scattering coefficients of the
input signal.
Arguments
---------
x: :class:`~tensorflow.Tensor()`
Input data of shape ``(batch_size, channels, patch_shape).
"""
# Filter bank properties
self.shape_input = x.get_shape().as_list()
self.j = j = j[index] if type(j) is list else j
self.q = q = q[index] if type(q) is list else q
self.k = k = k[index] if type(k) is list else k
filters = self.init_filters(j, q, k, **filters_kw)
n_filters, kernel_size = filters.get_shape().as_list()
filters_concat = tf.concat([tf.math.real(filters), tf.math.imag(filters)], 0)
filters_kernel = tf.expand_dims(tf.transpose(filters_concat), 1)
# Pad input in the time dimension before convolution with half the size
# of filters temporal dimension (kernel_size).
shape_fast = [np.prod(self.shape_input[:-1]), 1, self.shape_input[-1]]
paddings = [0, 0], [0, 0], [kernel_size // 2 - 1, kernel_size // 2 + 1]
x_reshape = tf.reshape(x, shape_fast)
x_pad = tf.pad(x_reshape, paddings=paddings, mode='SYMMETRIC')
# Differentiate the case of one input channel or multiple
# which needs reshaping in order to treat them independently
# The "NCW" format stores data as batch_shape + [in_channels, in_width]
x_conv = tf.nn.conv1d(x_pad, filters_kernel, stride=decimation,
padding='VALID', data_format='NCW')
u = tf.sqrt(tf.square(x_conv[:, :n_filters]) +
tf.square(x_conv[:, n_filters:]))
self.u = tf.reshape(u, (*self.shape_input[:-1], n_filters, -1))
pool = tf.keras.layers.AveragePooling1D
# Pooling for the scattering coefficients
if pooling > 1:
pooled = pool(
pooling // (decimation ** (index + 1)),
pooling // (decimation ** (index + 1)),
padding='valid', data_format='channels_first')
pooled = pooled(u)
self.s = tf.reshape(pooled, self.shape_input[:-1] + [j * q] + [-1])
self.output = self.s
tf.compat.v1.disable_eager_execution()
inverse = tf.gradients(x_conv, x, x_conv)[0]
self.reconstruction_loss = tf.nn.l2_loss(
inverse - tf.stop_gradient(x)) / np.prod(self.shape_input)
def init_filters(self, j, q, k, learn_scales=False, learn_knots=False,
learn_filters=True, hilbert=False):
extra_octave = 1 if learn_scales else 0
self.filter_samples = k * 2 ** (j + extra_octave)
time_max = np.float32(k * 2**(j - 1 + extra_octave))
time_grid = tf.linspace(-time_max, time_max, self.filter_samples)
scales_base = 2**(tf.range(j * q, dtype=tf.float32) / np.float32(q))
scales_delta = tf.Variable(
tf.zeros(j * q), trainable=learn_scales, name='scales')
scales = scales_base + scales_delta
nyquist_offset = scales + \
tf.stop_gradient(tf.one_hot(0, j * q) * tf.nn.relu(1 - scales[0]))
scales_correction = tf.concat(
[tf.zeros(1),
tf.nn.relu(nyquist_offset[:-1] - nyquist_offset[1:])], 0)
self.scales = nyquist_offset + \
tf.stop_gradient(tf.cumsum(scales_correction))
knots_base = tf.Variable(
tf.ones(k), trainable=learn_knots, name='knots')
knots_sum = tf.cumsum(
tf.clip_by_value(
tf.expand_dims(knots_base, 0) * tf.expand_dims(self.scales, 1),
1, self.filter_samples - k), exclusive=True, axis=1)
self.knots = knots_sum - (k // 2) * tf.expand_dims(self.scales, 1)
if hilbert is True:
m = (np.cos(np.arange(k) * np.pi) * np.hamming(k)).astype(FORMAT)
p = (np.zeros(k)).astype(FORMAT)
self.m = tf.Variable(m, name='m', trainable=learn_filters)
self.p = tf.Variable(p, name='p', trainable=learn_filters)
# Boundary Conditions and centering
mask = np.ones(k, dtype=np.float32)
mask[0], mask[-1] = 0, 0
m_null = self.m - tf.reduce_mean(self.m[1:-1])
filters = real_hermite_interp(
time_grid, self.knots, m_null * mask, self.p * mask)
# Renorm and set filter-bank
filters_renorm = filters / tf.reduce_max(filters, 1, keepdims=True)
filters_fft = tf.signal.rfft(filters_renorm) # was spectral.rfft
filters = tf.signal.ifft(
tf.concat([filters_fft, tf.zeros_like(filters_fft)], 1))
# Define the parameters for saving
self.parameters = self.m, self.p, self.scales, self.knots
return filters
def renorm(self, parent, epsilon=1e-3):
# Extract all shapes.
if epsilon > 0:
s = self.s / (tf.expand_dims(parent.s, -2) + epsilon)
batch_size, *_, samples = s.get_shape().as_list()
return tf.reshape(s, [batch_size, -1, samples])
else:
return tf.reshape(self.s, [batch_size, -1, samples])
# testing
data = tf.random.uniform((4,3,16800), dtype=tf.float32)
batch_size = 4
args = {'layers': {'j': [4, 6, 8], 'q': [8, 2, 1], 'k': 7, 'pooling_type': 'average', 'decimation': 4, 'pooling': 1024, 'learn_scales': False, 'learn_knots': False, 'learn_filters': True, 'hilbert': True}, 'eps_norm': 0.001, 'eps_log': 0.0001, 'learning': {'epochs': 3, 'rate': 0.001}, 'pca': {'n_components': 5}, 'gmm': {'gmm_type': 'natural', 'trainable': False}, 'gmm_init': {'n_components': 10, 'max_iter': 1000, 'covariance_type': 'full', 'warm_start': True}}
# Run over batches
epochs = args['learning']['epochs']
learning_rate = args['learning']['rate']
for epoch in range(epochs):
# Gradually decrease learning rate over epochs
if epoch == epochs // 2:
learning_rate /= 5
if epoch == 3 * epochs // 4:
learning_rate /= 5
# Calculate scattering coefficients for all batches
scat_all = list()
n_batches = data.shape[0] // batch_size
for b in range(n_batches):
layers = [Scattering(data, index=0, **args['layers'])]
for i in range(1, 3):
layer = Scattering(layers[-1].u, index=i, **args['layers'])
layers.append(layer)
# Extract parameters.
net = [layer.parameters for layer in layers]
# Get reconstruction losses.
rl = tf.add_n([a.reconstruction_loss for a in layers])
# Renormalize coefficients.
r = list()
for i in range(1, 3):
r.append(layers[i].renorm(layers[i - 1], args['eps_norm']))
# Concatenate.
sx = tf.transpose(tf.concat(r, axis=1), [1, 0, 2])
sx = tf.reshape(sx, [sx.get_shape().as_list()[0], -1])
sx = tf.transpose(sx)
sx = tf.math.log(sx + args['eps_log'])
sx[np.isnan(sx)] = np.log(args['eps_log'])
sx[np.isinf(sx)] = np.log(args['eps_log'])
scat_all.append(sx)
The issue is from the line 'sx[np.isnan(sx)] = np.log(args['eps_log'])'. The full error is shown below:
---------------------------------------------------------------------------
NotImplementedError Traceback (most recent call last)
Cell In [6], line 34
31 print("sx:", sx)
32 print("sx shape: ", sx.shape)
---> 34 sx[np.isnan(sx)] = np.log(args['eps_log'])
35 sx[np.isinf(sx)] = np.log(args['eps_log'])
36 scat_all.append(sx)
File c:\Python310\lib\site-packages\tensorflow\python\framework\ops.py:922, in Tensor.__array__(***failed resolving arguments***)
920 def __array__(self, dtype=None):
921 del dtype
--> 922 raise NotImplementedError(
923 f"Cannot convert a symbolic tf.Tensor ({self.name}) to a numpy array."
924 f" This error may indicate that you're trying to pass a Tensor to"
925 f" a NumPy call, which is not supported.")
NotImplementedError: Cannot convert a symbolic tf.Tensor (Log_2:0) to a numpy array. This error may indicate that you're trying to pass a Tensor to a NumPy call, which is not supported.
Based on solutions on previous stackoverflow posts, I have tried to upgrade my tensorflow (2.11.0) and numpy versions (1.23.5) but that did not solve the problem. I saw some suggestions on downgrading numpy but because of other dependencies that did not work. My Python version is 3.8.16. Any suggestions on how to proceed? Thanks in advance.

Get the area for a specific point's corresponding region in a Voronoi diagram

Using this answer, I can create a bounded Voronoi diagram (credit to #Flabetvibes for this code):
import matplotlib.pyplot as plt
import numpy as np
import scipy as sp
import scipy.spatial
import sys
eps = sys.float_info.epsilon
def in_box(towers, bounding_box):
return np.logical_and(np.logical_and(bounding_box[0] <= towers[:, 0],
towers[:, 0] <= bounding_box[1]),
np.logical_and(bounding_box[2] <= towers[:, 1],
towers[:, 1] <= bounding_box[3]))
def voronoi(towers, bounding_box):
# Select towers inside the bounding box
i = in_box(towers, bounding_box)
# Mirror points
points_center = towers[i, :]
points_left = np.copy(points_center)
points_left[:, 0] = bounding_box[0] - (points_left[:, 0] - bounding_box[0])
points_right = np.copy(points_center)
points_right[:, 0] = bounding_box[1] + (bounding_box[1] - points_right[:, 0])
points_down = np.copy(points_center)
points_down[:, 1] = bounding_box[2] - (points_down[:, 1] - bounding_box[2])
points_up = np.copy(points_center)
points_up[:, 1] = bounding_box[3] + (bounding_box[3] - points_up[:, 1])
points = np.append(points_center,
np.append(np.append(points_left,
points_right,
axis=0),
np.append(points_down,
points_up,
axis=0),
axis=0),
axis=0)
# Compute Voronoi
vor = sp.spatial.Voronoi(points)
# Filter regions
regions = []
for region in vor.regions:
flag = True
for index in region:
if index == -1:
flag = False
break
else:
x = vor.vertices[index, 0]
y = vor.vertices[index, 1]
if not(bounding_box[0] - eps <= x and x <= bounding_box[1] + eps and
bounding_box[2] - eps <= y and y <= bounding_box[3] + eps):
flag = False
break
if region != [] and flag:
regions.append(region)
vor.filtered_points = points_center
vor.filtered_regions = regions
return vor
def centroid_region(vertices):
# Polygon's signed area
A = 0
# Centroid's x
C_x = 0
# Centroid's y
C_y = 0
for i in range(0, len(vertices) - 1):
s = (vertices[i, 0] * vertices[i + 1, 1] - vertices[i + 1, 0] * vertices[i, 1])
A = A + s
C_x = C_x + (vertices[i, 0] + vertices[i + 1, 0]) * s
C_y = C_y + (vertices[i, 1] + vertices[i + 1, 1]) * s
A = 0.5 * A
C_x = (1.0 / (6.0 * A)) * C_x
C_y = (1.0 / (6.0 * A)) * C_y
return np.array([[C_x, C_y]])
points = np.array([[0.17488374, 0.36498964],
[0.94904866, 0.80085891],
[0.89265224, 0.4160692 ],
[0.17035869, 0.82769497],
[0.30274931, 0.04572908],
[0.40515272, 0.1445514 ],
[0.23191921, 0.08250689],
[0.48713553, 0.94806717],
[0.77714412, 0.46517511],
[0.25945989, 0.76444964]])
vor = voronoi(points,(0,1,0,1))
fig = plt.figure()
ax = fig.gca()
# Plot initial points
ax.plot(vor.filtered_points[:, 0], vor.filtered_points[:, 1], 'b.')
# Plot ridges points
for region in vor.filtered_regions:
vertices = vor.vertices[region, :]
ax.plot(vertices[:, 0], vertices[:, 1], 'go')
# Plot ridges
for region in vor.filtered_regions:
vertices = vor.vertices[region + [region[0]], :]
ax.plot(vertices[:, 0], vertices[:, 1], 'k-')
Now, I want to get the area of the region containing one of the original points in blue, such as points[0]. In this example, points[0] is the point (0.17488374, 0.36498964). I thought I could find the area for this point with the following code:
area = ConvexHull(vor.vertices[vor.filtered_regions[0], :]).volume
Because I figured that the index of 0 in points[0] would correspond with the index of 0 in vor.filtered_regions[0]. But it doesn't -- vor.filtered_regions[9] is actually what I'm looking for (which I figured out manually but I'd like for it to be automated). In another example, the region with index 2 was the one I was looking for, so it doesn't appear consistent either.
Is there a way to find the index for vor.filtered_regions that'll give me the area I want? Or is there another way to go about this? Even though I'm creating the entire Voronoi diagram with all 10 points, the area of the region with points[0] is all I'm actually looking for (while still being bounded), so I'm assuming there might be a quicker way to go about doing this but I have no idea what that may be.
The point_region attribute of the scipy Voronoi diagram tells you which region is associated to which point. So you can use that data to look up the associated regions.
Here is a much simplified version of your voronoi function which uses that attribute to ensure that filted_points and filtered_regions are constructed consistently, i.e., the first region is the one associated with the first point.
def voronoi(towers, bounding_box):
# Select towers inside the bounding box
i = in_box(towers, bounding_box)
# Mirror points
points_center = towers[i, :]
points_left = np.copy(points_center)
points_left[:, 0] = bounding_box[0] - (points_left[:, 0] - bounding_box[0])
points_right = np.copy(points_center)
points_right[:, 0] = bounding_box[1] + (bounding_box[1] - points_right[:, 0])
points_down = np.copy(points_center)
points_down[:, 1] = bounding_box[2] - (points_down[:, 1] - bounding_box[2])
points_up = np.copy(points_center)
points_up[:, 1] = bounding_box[3] + (bounding_box[3] - points_up[:, 1])
points = np.append(points_center,
np.append(np.append(points_left,
points_right,
axis=0),
np.append(points_down,
points_up,
axis=0),
axis=0),
axis=0)
# Compute Voronoi
vor = sp.spatial.Voronoi(points)
# Filter regions
regions = []
[vor.point_region[i] for i in range(10)]
vor.filtered_points = points_center
vor.filtered_regions = [vor.regions[vor.point_region[i]] for i in range(len(points_center))]
return vor

Calculate covariance of torch tensor (2d feature map)

I have a torch tensor with shape (batch_size, number_maps, x_val, y_val). The tensor is normalized with a sigmoid function, so within range [0, 1]. I want to find the covariance for each map, so I want to have a tensor with shape (batch_size, number_maps, 2, 2). As far as I know, there is no torch.cov() function as in numpy. How can I efficiently calculate the covariance without converting it to numpy?
Edit:
def get_covariance(tensor):
bn, nk, w, h = tensor.shape
tensor_reshape = tensor.reshape(bn, nk, 2, -1)
x = tensor_reshape[:, :, 0, :]
y = tensor_reshape[:, :, 1, :]
mean_x = torch.mean(x, dim=2).unsqueeze(-1)
mean_y = torch.mean(y, dim=2).unsqueeze(-1)
xx = torch.sum((x - mean_x) * (x - mean_x), dim=2).unsqueeze(-1) / (h*w - 1)
xy = torch.sum((x - mean_x) * (y - mean_y), dim=2).unsqueeze(-1) / (h*w - 1)
yx = xy
yy = torch.sum((y - mean_y) * (y - mean_y), dim=2).unsqueeze(-1) / (h*w - 1)
cov = torch.cat((xx, xy, yx, yy), dim=2)
cov = cov.reshape(bn, nk, 2, 2)
return cov
I tried the following now, but I m pretty sure it's not correct.
You could try the function suggested on Github:
def cov(x, rowvar=False, bias=False, ddof=None, aweights=None):
"""Estimates covariance matrix like numpy.cov"""
# ensure at least 2D
if x.dim() == 1:
x = x.view(-1, 1)
# treat each column as a data point, each row as a variable
if rowvar and x.shape[0] != 1:
x = x.t()
if ddof is None:
if bias == 0:
ddof = 1
else:
ddof = 0
w = aweights
if w is not None:
if not torch.is_tensor(w):
w = torch.tensor(w, dtype=torch.float)
w_sum = torch.sum(w)
avg = torch.sum(x * (w/w_sum)[:,None], 0)
else:
avg = torch.mean(x, 0)
# Determine the normalization
if w is None:
fact = x.shape[0] - ddof
elif ddof == 0:
fact = w_sum
elif aweights is None:
fact = w_sum - ddof
else:
fact = w_sum - ddof * torch.sum(w * w) / w_sum
xm = x.sub(avg.expand_as(x))
if w is None:
X_T = xm.t()
else:
X_T = torch.mm(torch.diag(w), xm).t()
c = torch.mm(X_T, xm)
c = c / fact
return c.squeeze()
https://github.com/pytorch/pytorch/issues/19037

Difficulties in understanding Python code that computes transformed mesh

There's some code that I have basically translated from its original version in MATLAB to Python but there is a part that I don't understand. I have one triangle mesh that I want to transform. For this, the transformations for each triangle face is computed. After that, the edges of the transformed triangle are specified. However, what happens then is unclear to me. If you have any mathematical or geometric background to share concerning the following steps, I would appreciate it very much!
weight = np.ones(1)
fixvertex = 1
fixto = np.zeros((3))
M = scipy.sparse.lil_matrix((len(template) + fixvertex, len(template)))
dx = scipy.sparse.lil_matrix((len(template) + fixvertex, 3))
for i in range(len(faces)):
v = faces[i, :]
### ... what happens inbetween is irrelevant
# Transform triangle
x = T # np.array([template[v[0], :], template[v[1], :], template[v[2], :]]).reshape(3, 3).T
p12 = x[:, 0] - x[:, 1]
p13 = x[:, 0] - x[:, 2]
p23 = x[:, 1] - x[:, 2]
p12 = p12 / np.linalg.norm(p12)
p13 = p13 / np.linalg.norm(p13)
p23 = p23 / np.linalg.norm(p23)
# I do not get the point of the following lines
wts = [(p13.T).dot(p23), -(p12.T).dot(p23), (p12.T).dot(p13)]
wij3 = [[0, wts[0] / np.sqrt(1 - wts[0] ** 2), wts[1] / np.sqrt(1 - wts[1] ** 2)],
[0, 0, wts[2] / np.sqrt(1 - wts[2] ** 2)],
[0, 0, 0]]
wij3 = np.asarray(wij3)
wij3 = wij3 + wij3.T
WIJ = wij3 - np.diag([sum(x) for x in zip(*wij3)])
M[np.ix_(v, v)] = M[np.ix_(v, v)] + WIJ
dx[v, :] = dx[v, :] + (WIJ # x.T)
weight = np.ones((fixvertex)) * weight
for i in range(fixvertex):
M[len(template) + i, fixvertex - 1] = weight[i]
dx[len(template):len(template) + fixvertex, :] = np.multiply(fixto, np.tile(weight, (3)))
M = np.real(M)
dx = np.real(dx)
Mt = M.T
model = scipy.sparse.linalg.spsolve(Mt # M, Mt # dx)

Implementing LeCun Local Contrast Normalization with Theano

I'm trying to use the code that I found to implement the LeCun Local Contrast Normalization but I get incorrect result. The code is in Python and uses the theano library.
def lecun_lcn(input, img_shape, kernel_shape, threshold=1e-4):
"""
Yann LeCun's local contrast normalization
Orginal code in Theano by: Guillaume Desjardins
"""
input = input.reshape(input.shape[0], 1, img_shape[0], img_shape[1])
X = T.matrix(dtype=theano.config.floatX)
X = X.reshape(input.shape)
filter_shape = (1, 1, kernel_shape, kernel_shape)
filters = gaussian_filter(kernel_shape).reshape(filter_shape)
convout = conv.conv2d(input=X,
filters=filters,
image_shape=(input.shape[0], 1, img_shape[0], img_shape[1]),
filter_shape=filter_shape,
border_mode='full')
# For each pixel, remove mean of 9x9 neighborhood
mid = int(np.floor(kernel_shape / 2.))
centered_X = X - convout[:, :, mid:-mid, mid:-mid]
# Scale down norm of 9x9 patch if norm is bigger than 1
sum_sqr_XX = conv.conv2d(input=centered_X ** 2,
filters=filters,
image_shape=(input.shape[0], 1, img_shape[0], img_shape[1]),
filter_shape=filter_shape,
border_mode='full')
denom = T.sqrt(sum_sqr_XX[:, :, mid:-mid, mid:-mid])
per_img_mean = denom.mean(axis=[1, 2])
divisor = T.largest(per_img_mean.dimshuffle(0, 'x', 'x', 1), denom)
divisor = T.maximum(divisor, threshold)
new_X = centered_X / divisor
new_X = new_X.dimshuffle(0, 2, 3, 1)
new_X = new_X.flatten(ndim=3)
f = theano.function([X], new_X)
return f(input)
Here is the testing code:
x_img_origin = plt.imread("..//data//Lenna.png")
x_img = plt.imread("..//data//Lenna.png")
x_img_real_result = plt.imread("..//data//Lenna_Processed.png")
x_img = x_img.reshape(1, x_img.shape[0], x_img.shape[1], x_img.shape[2])
for d in range(3):
x_img[:, :, :, d] = tools.lecun_lcn(x_img[:, :, :, d], (x_img.shape[1], x_img.shape[2]), 9)
x_img = x_img[0]
pylab.subplot(1, 3, 1); pylab.axis('off'); pylab.imshow(x_img_origin)
pylab.gray()
pylab.subplot(1, 3, 2); pylab.axis('off'); pylab.imshow(x_img)
pylab.subplot(1, 3, 3); pylab.axis('off'); pylab.imshow(x_img_real_result)
pylab.show()
Here is the result:
(left to right: origin, my result, the expected result)
Could someone tell me what I did wrong with the code?
Here is how I implemented local contrast normalization as reported in Jarrett et al (http://yann.lecun.com/exdb/publis/pdf/jarrett-iccv-09.pdf). You can use it as a separate layer.
I tested it on the code from the LeNet tutorial of theano in which I applied LCN to the input and to each convolutional layer which yields slightly better results.
You can find the full code here:
https://github.com/jostosh/theano_utils/blob/master/lcn.py
class LecunLCN(object):
def __init__(self, X, image_shape, threshold=1e-4, radius=9, use_divisor=True):
"""
Allocate an LCN.
:type X: theano.tensor.dtensor4
:param X: symbolic image tensor, of shape image_shape
:type image_shape: tuple or list of length 4
:param image_shape: (batch size, num input feature maps,
image height, image width)
:type threshold: double
:param threshold: the threshold will be used to avoid division by zeros
:type radius: int
:param radius: determines size of Gaussian filter patch (default 9x9)
:type use_divisor: Boolean
:param use_divisor: whether or not to apply divisive normalization
"""
# Get Gaussian filter
filter_shape = (1, image_shape[1], radius, radius)
self.filters = theano.shared(self.gaussian_filter(filter_shape), borrow=True)
# Compute the Guassian weighted average by means of convolution
convout = conv.conv2d(
input=X,
filters=self.filters,
image_shape=image_shape,
filter_shape=filter_shape,
border_mode='full'
)
# Subtractive step
mid = int(numpy.floor(filter_shape[2] / 2.))
# Make filter dimension broadcastable and subtract
centered_X = X - T.addbroadcast(convout[:, :, mid:-mid, mid:-mid], 1)
# Boolean marks whether or not to perform divisive step
if use_divisor:
# Note that the local variances can be computed by using the centered_X
# tensor. If we convolve this with the mean filter, that should give us
# the variance at each point. We simply take the square root to get our
# denominator
# Compute variances
sum_sqr_XX = conv.conv2d(
input=T.sqr(centered_X),
filters=self.filters,
image_shape=image_shape,
filter_shape=filter_shape,
border_mode='full'
)
# Take square root to get local standard deviation
denom = T.sqrt(sum_sqr_XX[:,:,mid:-mid,mid:-mid])
per_img_mean = denom.mean(axis=[2,3])
divisor = T.largest(per_img_mean.dimshuffle(0, 1, 'x', 'x'), denom)
# Divisise step
new_X = centered_X / T.maximum(T.addbroadcast(divisor, 1), threshold)
else:
new_X = centered_X
self.output = new_X
def gaussian_filter(self, kernel_shape):
x = numpy.zeros(kernel_shape, dtype=theano.config.floatX)
def gauss(x, y, sigma=2.0):
Z = 2 * numpy.pi * sigma ** 2
return 1. / Z * numpy.exp(-(x ** 2 + y ** 2) / (2. * sigma ** 2))
mid = numpy.floor(kernel_shape[-1] / 2.)
for kernel_idx in xrange(0, kernel_shape[1]):
for i in xrange(0, kernel_shape[2]):
for j in xrange(0, kernel_shape[3]):
x[0, kernel_idx, i, j] = gauss(i - mid, j - mid)
return x / numpy.sum(x)
I think these two lines may have some mistakes on the matrix axes:
per_img_mean = denom.mean(axis=[1, 2])
divisor = T.largest(per_img_mean.dimshuffle(0, 'x', 'x', 1), denom)
and it should be rewritten as:
per_img_mean = denom.mean(axis=[2, 3])
divisor = T.largest(per_img_mean.dimshuffle(0, 1, 'x', 'x'), denom)

Categories