I have a function in my data prepocessing which performs a blockwise DCT on 3D numpy arrays in YCbCr-mode.
def perform_blockwise_dct(img, ratio):
imsize = img.shape
dct_blocks = np.zeros(imsize)
for i in np.r_[:imsize[0]:8]:
for j in np.r_[:imsize[1]:8]:
dct_blocks[i:(i+8),j:(j+8), 0] = dct(dct(img[i:(i+8),j:(j+8), 0].T, norm='ortho').T, norm='ortho')
dct_blocks[i:(i+8),j:(j+8), 1] = dct(dct(img[i:(i+8),j:(j+8), 1].T, norm='ortho').T, norm='ortho')
dct_blocks[i:(i+8),j:(j+8), 2] = dct(dct(img[i:(i+8),j:(j+8), 2].T, norm='ortho').T, norm='ortho')
return dct_blocks
To be able to implement a custom mean square error function I would like to reverse this function. The problem is that when implementing the loss function it is a tensorflow tensor. There is an inverse DCT function to use. However, I do not know how to perform an equivilent double for-loop to do it block-wise. Currently it is done on the entire image, like this:
def mse_custom_loss(a, b)
y = tf.spectral.idct(a[:,:,0], norm='ortho')
cb = tf.spectral.idct(a[:,:,1], norm='ortho')
cr = tf.spectral.idct(a[:,:,2], norm='ortho')
a = K.stack([y, cb, cr], axis=-1)
y = tf.spectral.idct(b[:,:,0], norm='ortho')
cb = tf.spectral.idct(b[:,:,1], norm='ortho')
cr = tf.spectral.idct(b[:,:,2], norm='ortho')
b = K.stack([y, cb, cr], axis=-1)
return mean_square_error(a, b)
Any ideas on how to do it correctly? I assume lambda functions might be a possibility?
I think this is a TensorFlow equivalent to your NumPy/SciPy function:
import tensorflow as tf
def perform_blockwise_dct_tf(img):
shape = tf.shape(img)
x, y, c = shape[0], shape[1], shape[2]
img_res = tf.reshape(img, [x // 8, 8, y // 8, 8, c])
img_dct1 = tf.spectral.dct(tf.transpose(img_res, [0, 1, 2, 4, 3]), norm='ortho')
img_dct2 = tf.spectral.dct(tf.transpose(img_dct1, [0, 2, 4, 3, 1]), norm='ortho')
out = tf.reshape(tf.transpose(img_dct2, [0, 4, 1, 2, 3]), shape)
return out
A small test:
import numpy as np
from scipy.fftpack import dct
def perform_blockwise_dct(img):
imsize = img.shape
dct_blocks = np.zeros(imsize, dtype=img.dtype)
for i in np.r_[:imsize[0]:8]:
for j in np.r_[:imsize[1]:8]:
dct_blocks[i:(i+8), j:(j+8), 0] = dct(dct(img[i:(i+8), j:(j+8), 0].T, norm='ortho').T, norm='ortho')
dct_blocks[i:(i+8), j:(j+8), 1] = dct(dct(img[i:(i+8), j:(j+8), 1].T, norm='ortho').T, norm='ortho')
dct_blocks[i:(i+8), j:(j+8), 2] = dct(dct(img[i:(i+8), j:(j+8), 2].T, norm='ortho').T, norm='ortho')
return dct_blocks
np.random.seed(100)
# DCT in TensorFlow only supports float32
img = np.random.rand(128, 256, 3).astype(np.float32)
out1 = perform_blockwise_dct(img)
with tf.Graph().as_default(), tf.Session() as sess:
out2 = sess.run(perform_blockwise_dct_tf(img))
# There is a bit of error
print(np.allclose(out1, out2, rtol=1e-5, atol=1e-6))
# True
Related
I'm trying to contract a network with multiple tensors and using singular value decomposition during contraction to simplify the contraction process. Whilst this works perfectly when I'm not taking any gradient, it fails once gradient tape starts to watch the tensors (I'm not sure why this is related). Below I wrote my simple contraction function and the function that I'm taking svd:
import tensorflow as tf
#tf.function
def contraction_step(network, max_singular_values: int):
bottom = network[-1]
uppper = network[-2]
def contract_up_down(up,dn):
shu = tf.shape(up)
shd = tf.shape(dn)
c = tf.einsum("ijkxlm,nkpyqr->injpxylqmr", up, dn)
return tf.reshape(c, (
shu[0]*shd[0], shu[1], shd[2], shu[-3], shd[-3], shu[-2]*shd[-2], shu[-1]*shd[-1]
))
new = []
multiplier = tf.eye(tf.shape(bottom[-1])[-1]*tf.shape(uppper[-1])[-1], dtype=uppper[-1].dtype)
for ix in reversed(range(len(bottom))):
tensor = contract_up_down(uppper[ix], bottom[ix])
t = tf.einsum("ludpxor,ij->ludpxoj",tensor, multiplier)
u, s, vh = svd(t, 1, max_singular_values = max_singular_values)
multiplier = tf.tensordot(u, s/tf.norm(s), axes=(-1,0))
new.insert(0, vh)
new[-1] = tf.tensordot(new[-1], multiplier, axes=(-1,0))
return network[:-2] + [new]
def svd(tensor,pivot,max_singular_values = None,cutoff = 0.0):
left_dims = tf.shape(tensor)[:pivot]
right_dims = tf.shape(tensor)[pivot:]
tensor = tf.reshape(tensor, (tf.reduce_prod(left_dims), tf.reduce_prod(right_dims)))
s, u, v = tf.linalg.svd(tensor)
s_shape = tf.math.count_nonzero(
tf.cast(s >= cutoff, dtype = tf.int32), dtype = tf.int32
)
if max_singular_values is None:
max_singular_values = s_shape
else:
max_singular_values = tf.cast(tf.constant(max_singular_values), dtype = tf.int32)
num_sing_vals_keep = tf.maximum(
tf.minimum(max_singular_values, s_shape), tf.constant(1, dtype = tf.int32)
)
s = tf.slice(s, [0], [num_sing_vals_keep])
u = tf.slice(u, [0, 0], [tf.shape(u)[0], num_sing_vals_keep])
v = tf.slice(v, [0, 0], [tf.shape(v)[0], num_sing_vals_keep])
vh = tf.linalg.adjoint(v)
dim_s = tf.shape(s)[0] # must use tf.shape (not s.shape) to compile
u = tf.reshape(u, tf.concat([left_dims, [dim_s]], axis = -1))
vh = tf.reshape(vh, tf.concat([[dim_s], right_dims], axis = -1))
return u, tf.linalg.diag(s), vh
These functions work perfectly while using standalone:
upper = [tf.random.uniform((5,3,3,2,1,5), dtype=tf.float64) for _ in range(5)]
lower = [tf.random.uniform((5,3,3,2,1,5), dtype=tf.float64) for _ in range(5)]
contracted = contraction_step([upper, lower], 2)[0]
print(f"shapes: {', '.join([str(x.shape) for x in contracted])}")
# shapes: (2, 3, 3, 2, 2, 1, 2), (2, 3, 3, 2, 2, 1, 2), (2, 3, 3, 2, 2, 1, 2), (2, 3, 3, 2, 2, 1, 2), (2, 3, 3, 2, 2, 1, 2)
However, with the gradient, I get the following error:
with tf.GradientTape() as tape:
tape.watch(upper + lower)
contracted = contraction_step([upper, lower], 2)[0]
NotImplementedError: SVD gradient has not been implemented for input with unknown inner matrix shape.
It seems like for some reason during gradient mode TensorFlow loses the shape information of the tensors. Note that I get the same error when I set the tensors as tf.Variable instead of watching them manually. Any help would be highly appreciated!
Thanks
System information
OS Platform and Distribution: macOS v12.0.1
TensorFlow version: v2.6.0-rc2-32-g919f693420e 2.6.0
Python version: 3.8.9
I found a temporary solution that does not include all the aspects of the previous svd function but it works. TensorFlow requires object shapes to be set after slicing or manipulation (this might not be for every case but specific to mine). Thus I modified the svd function accordingly;
from functools import reduce
def svd(tensor, pivot, max_singular_values = None):
multip = lambda x, y: x * y
left_dims = tensor.get_shape()[:pivot]#tf.shape(tensor)[:pivot]
right_dims = tensor.get_shape()[pivot:]#tf.shape(tensor)[pivot:]
tensor = tf.reshape(tensor, (reduce(multip, left_dims), reduce(multip, right_dims)))
s, u, v = tf.linalg.svd(tensor)
if max_singular_values is None:
max_singular_values = s.shape[0]
num_sing_vals_keep = min(s.shape[0], max_singular_values)
s = tf.slice(s, [0], [num_sing_vals_keep])
tf.ensure_shape(s, tf.TensorShape(num_sing_vals_keep))
u = tf.slice(u, [0, 0], [tf.shape(u)[0], num_sing_vals_keep])
v = tf.slice(v, [0, 0], [tf.shape(v)[0], num_sing_vals_keep])
vh = tf.linalg.adjoint(v)
dim_s = s.shape[0]#tf.shape(s)[0]
u = tf.reshape(u, left_dims+(dim_s,))
vh = tf.reshape(vh, (dim_s,) + right_dims)
tf.ensure_shape(u, tf.TensorShape(left_dims+(dim_s,)))
tf.ensure_shape(vh, tf.TensorShape((dim_s,) + right_dims))
tf.ensure_shape(s, tf.TensorShape((dim_s,)))
return u, tf.linalg.diag(s), vh
And this seems to be working both with tf.GradientTape() but I'm getting the following warnings when I run it in tf.vectorized_map
u,s,v = tf.vectorized_map(lambda vec: svd(vec, 1, 10), tf.random.uniform((10, 5, 5)))
WARNING:tensorflow:Using a while_loop for converting Svd
so if anyone has a better solution it's highly appreciated.
I have designed a single layer neural network:
x=AF
y=xw
A is matrix(nn) Graph Adjacency Matrix ,F is matrix(n*2) and w is Wight.
This is the code:
import numpy as np
import networkx as nx
def relu(X):
return np.maximum(0,X)
A1 = np.matrix([
[0, 0, 1, 0,0,0],
[0, 0, 1, 0,0,0],
[0, 1, 0, 0,1,1],
[1, 0, 1, 0,0,0],
[0,1,0,0,1,0],
[0,1,1,0,1,0]],
dtype=float
)
G = nx.from_numpy_matrix(A1, create_using=nx.DiGraph)
nodes=list(G.nodes())
print(nodes)
print('edges',len(G.edges()))
UN_G=G.to_undirected()
A=nx.adjacency_matrix(UN_G)
print('un_edges',len(UN_G.edges()))
F = np.matrix([
[G.in_degree(i), G.out_degree(i)]
for i in range(A.shape[0])
], dtype=float)
I = np.matrix(np.eye(A.shape[0]))
A_prem = A + I
D_hat= np.array(np.sum(A_prem, axis=0))[0]
D_hat = np.matrix(np.diag(D_hat))
A_hat=D_hat**-1 * A_prem * D_hat**-1
x=A_hat*F
x=A*F
(x)
In the unsupervised loss function: A is sparse matrix
def loss_fn(y,A):
coo = A.tocoo()
tmp=zip(coo.row, coo.col, coo.data)
sum = tf.Variable(0.0)
for i,j,k in tmp:
sum=sum+k*tf.linalg.norm(y[i]-y[j])
return (sum)
In the weight training phase:
epchs=50
w = tf.Variable(tf.random.normal((2, 2)), name='w')
eta=0.3
for ephoc in range(epchs):
with tf.GradientTape(persistent=True) as tape:
tape.watch(w)
y = tf.nn.softmax(x # w)
loss=loss_fn(y,A)
print(ephoc,' ',loss)
dl_dw = tape.gradient(loss, w)
w.assign_sub(eta*dl_dw)
value of [dl_dw] and loss are nan. What is the problem with my code? Thank you for guiding me
I'm trying to apply the Expectation Maximization Algorithm (EM) to a Gaussian Mixture Model (GMM) using Python and NumPy. The PDF document I am basing my implementation on can be found here.
Below are the equations:
When applying the algorithm I get the mean of the first and second cluster equal to:
array([[2.50832195],
[2.51546208]])
When the actual vector means for the first and second cluster are, respectively:
array([[0],
[0]])
and:
array([[5],
[5]])
The same thing happens when getting the values of the covariance matrices I get:
array([[7.05168736, 6.17098629],
[6.17098629, 7.23009494]])
When it should be:
array([[1, 0],
[0, 1]])
for both clusters.
Here is the code:
np.random.seed(1)
# first cluster
X_11 = np.random.normal(0, 1, 1000)
X_21 = np.random.normal(0, 1, 1000)
# second cluster
X_12 = np.random.normal(5, 1, 1000)
X_22 = np.random.normal(5, 1, 1000)
X_1 = np.concatenate((X_11,X_12), axis=None)
X_2 = np.concatenate((X_21,X_22), axis=None)
# data matrix of k x n dimensions (2 x 2000 dimensions)
X = np.concatenate((np.array([X_1]),np.array([X_2])), axis=0)
# multivariate normal distribution function gives n x 1 vector (2000 x 1 vector)
def normal_distribution(x, mu, sigma):
mvnd = []
for i in range(np.shape(x)[1]):
gd = (2*np.pi)**(-2/2) * np.linalg.det(sigma)**(-1/2) * np.exp((-1/2) * np.dot(np.dot((x[:,i:i+1]-mu).T, np.linalg.inv(sigma)), (x[:,i:i+1]-mu)))
mvnd.append(gd)
return np.reshape(np.array(mvnd), (np.shape(x)[1], 1))
# Initialized parameters
sigma_1 = np.array([[10, 0],
[0, 10]])
sigma_2 = np.array([[10, 0],
[0, 10]])
mu_1 = np.array([[10],
[10]])
mu_2 = np.array([[10],
[10]])
pi_1 = 0.5
pi_2 = 0.5
Sigma_1 = np.empty([2000, 2, 2])
Sigma_2 = np.empty([2000, 2, 2])
for i in range(10):
# E-step:
w_i1 = (pi_1*normal_distribution(X, mu_1, sigma_1))/(pi_1*normal_distribution(X, mu_1, sigma_1) + pi_2*normal_distribution(X, mu_2, sigma_2))
w_i2 = (pi_2*normal_distribution(X, mu_2, sigma_2))/(pi_1*normal_distribution(X, mu_1, sigma_1) + pi_2*normal_distribution(X, mu_2, sigma_2))
# M-step:
pi_1 = np.sum(w_i1)/2000
pi_2 = np.sum(w_i2)/2000
mu_1 = np.array([(1/(np.sum(w_i1)))*np.sum(w_i1.T*X, axis=1)]).T
mu_2 = np.array([(1/(np.sum(w_i2)))*np.sum(w_i2.T*X, axis=1)]).T
for i in range(2000):
Sigma_1[i:i+1, :, :] = w_i1[i:i+1,:]*np.dot((X[:,i:i+1]-mu_1), (X[:,i:i+1]-mu_1).T)
Sigma_2[i:i+1, :, :] = w_i2[i:i+1,:]*np.dot((X[:,i:i+1]-mu_2), (X[:,i:i+1]-mu_2).T)
sigma_1 = (1/(np.sum(w_i1)))*np.sum(Sigma_1, axis=0)
sigma_2 = (1/(np.sum(w_i2)))*np.sum(Sigma_2, axis=0)
Would really appreciate if someone could point out the mistake in my code or in my misunderstanding of the algorithm..
I have recently hit a roadblock when it comes to performance. I know how to manually loop and do the interpolation from the origin cell to all the other cells by brute-forcing/looping each row and column in 2d array.
however when I process a 2D array of a shape say (3000, 3000), the linear spacing and the interpolation come to a standstill and severely hurt performance.
I am looking for a way I can optimize this loop, I am aware of vectorization and broadcasting just not sure how I can apply it in this situation.
I will explain it with code and figures
import numpy as np
from scipy.ndimage import map_coordinates
m = np.array([
[10,10,10,10,10,10],
[9,9,9,10,9,9],
[9,8,9,10,8,9],
[9,7,8,0,8,9],
[8,7,7,8,8,9],
[5,6,7,7,6,7]])
origin_row = 3
origin_col = 3
m_max = np.zeros(m.shape)
m_dist = np.zeros(m.shape)
rows, cols = m.shape
for col in range(cols):
for row in range(rows):
# Get spacing linear interpolation
x_plot = np.linspace(col, origin_col, 5)
y_plot = np.linspace(row, origin_row, 5)
# grab the interpolated line
interpolated_line = map_coordinates(m,
np.vstack((y_plot,
x_plot)),
order=1, mode='nearest')
m_max[row][col] = max(interpolated_line)
m_dist[row][col] = np.argmax(interpolated_line)
print(m)
print(m_max)
print(m_dist)
As you can see this is very brute force, and I have managed to broadcast all the code around this part but stuck on this part.
here is an illustration of what I am trying to achieve, I will go through the first iteration
1.) the input array
2.) the first loop from 0,0 to origin (3,3)
3.) this will return [10 9 9 8 0] and the max will be 10 and the index will be 0
5.) here is the output for the sample array I used
Here is an update of the performance based on the accepted answer.
To speed up the code, you could first create the x_plot and y_plot outside of the loops instead of creating them several times each one:
#this would be outside of the loops
num = 5
lin_col = np.array([np.linspace(i, origin_col, num) for i in range(cols)])
lin_row = np.array([np.linspace(i, origin_row, num) for i in range(rows)])
then you could access them in each loop by x_plot = lin_col[col] and y_plot = lin_row[row]
Second, you can avoid both loops by using map_coordinates on more than just one v_stack for each couple (row, col). To do so, you can create all the combinaisons of x_plot and y_plot by using np.tile and np.ravel such as:
arr_vs = np.vstack(( np.tile( lin_row, cols).ravel(),
np.tile( lin_col.ravel(), rows)))
Note that ravel is not used at the same place each time to get all the combinaisons. Now you can use map_coordinates with this arr_vs and reshape the result with the number of rows, cols and num to get each interpolated_line in the last axis of a 3D-array:
arr_map = map_coordinates(m, arr_vs, order=1, mode='nearest').reshape(rows,cols,num)
Finally, you can use np.max and np.argmax on the last axis of arr_map to get the results m_max and m_dist. So all the code would be:
import numpy as np
from scipy.ndimage import map_coordinates
m = np.array([
[10,10,10,10,10,10],
[9,9,9,10,9,9],
[9,8,9,10,8,9],
[9,7,8,0,8,9],
[8,7,7,8,8,9],
[5,6,7,7,6,7]])
origin_row = 3
origin_col = 3
rows, cols = m.shape
num = 5
lin_col = np.array([np.linspace(i, origin_col, num) for i in range(cols)])
lin_row = np.array([np.linspace(i, origin_row, num) for i in range(rows)])
arr_vs = np.vstack(( np.tile( lin_row, cols).ravel(),
np.tile( lin_col.ravel(), rows)))
arr_map = map_coordinates(m, arr_vs, order=1, mode='nearest').reshape(rows,cols,num)
m_max = np.max( arr_map, axis=-1)
m_dist = np.argmax( arr_map, axis=-1)
print (m_max)
print (m_dist)
and you get like expected:
#m_max
array([[10, 10, 10, 10, 10, 10],
[ 9, 9, 10, 10, 9, 9],
[ 9, 9, 9, 10, 8, 9],
[ 9, 8, 8, 0, 8, 9],
[ 8, 8, 7, 8, 8, 9],
[ 7, 7, 8, 8, 8, 8]])
#m_dist
array([[0, 0, 0, 0, 0, 0],
[0, 0, 2, 0, 0, 0],
[0, 2, 0, 0, 0, 0],
[0, 1, 0, 0, 0, 0],
[0, 2, 0, 0, 0, 0],
[1, 1, 2, 1, 2, 1]])
EDIT: lin_col and lin_row are related, so you can do faster:
if cols >= rows:
arr = np.arange(cols)[:,None]
lin_col = arr + (origin_col-arr)/(num-1.)*np.arange(num)
lin_row = lin_col[:rows] + np.linspace(0, origin_row - origin_col, num)[None,:]
else:
arr = np.arange(rows)[:,None]
lin_row = arr + (origin_row-arr)/(num-1.)*np.arange(num)
lin_col = lin_row[:cols] + np.linspace(0, origin_col - origin_row, num)[None,:]
Here is a sort-of-vectorized approach. It is not very optimized and there may be one or two index-off-by-one errors, but it may give you ideas.
Two examples a monochrome 384x512 test pattern and a "real" 3-channel 768x1024 image. Both are uint8.
This takes half a minute on my machine.
For larger images one would require more RAM than I have (8GB). Or one would have to break it down into smaller chunks.
And the code
import numpy as np
def rays(img, ctr):
M, N, *d = img.shape
aidx = 2*(slice(None),) + (img.ndim-2)*(None,)
m, n = ctr
out = np.empty_like(img)
offsI = np.empty(img.shape, np.uint16)
offsJ = np.empty(img.shape, np.uint16)
img4, out4, I4, J4 = ((x[m:, n:], x[m:, n::-1], x[m::-1, n:], x[m::-1, n::-1]) for x in (img, out, offsI, offsJ))
for i, o, y, x in zip(img4, out4, I4, J4):
for _ in range(2):
M, N, *d = i.shape
widths = np.arange(1, M+1, dtype=np.uint16).clip(None, N)
I = np.arange(M, dtype=np.uint16).repeat(widths)
J = np.ones_like(I)
J[0] = 0
J[widths[:-1].cumsum()] -= widths[:-1]
J = J.cumsum(dtype=np.uint16)
ii = np.arange(1, 2*M-1, dtype=np.uint16) // 2
II = ii.clip(None, I[:, None])
jj = np.arange(2*M-2, dtype=np.uint32) // 2 * 2 + 1
jj[0] = 0
JJ = ((1 + jj) * J[:, None] // (2*(I+1))[:, None]).astype(np.uint16).clip(None, J[:, None])
idx = i[II, JJ].argmax(axis=1)
II, JJ = (np.take_along_axis(ZZ[aidx] , idx[:, None], 1)[:, 0] for ZZ in (II, JJ))
y[I, J], x[I, J] = II, JJ
SH = II, JJ, *np.ogrid[tuple(map(slice, img.shape))][2:]
o[I, J] = i[SH]
i, o = i.swapaxes(0, 1), o.swapaxes(0, 1)
y, x = x.swapaxes(0, 1), y.swapaxes(0, 1)
return out, offsI, offsJ
from scipy.misc import face
f = face()
fr, *fidx = rays(f, (200, 400))
s = np.uint8((np.arange(384)[:, None] % 41 < 2)&(np.arange(512) % 41 < 2))
s = 255*s + 128*s[::-1, ::-1] + 64*s[::-1] + 32*s[:, ::-1]
sr, *sidx = rays(s, (200, 400))
import Image
Image.fromarray(f).show()
Image.fromarray(fr).show()
Image.fromarray(s).show()
Image.fromarray(sr).show()
I trying to get the output of the code but error at with tf.control_dependencies(). THe error is shown in below: The original code is came from enter link description here
Traceback (most recent call last):
File "croptest.py", line 80, in <module>
crop(Image,boxes,batch_inds);
File "croptest.py", line 55, in crop
with tf.control_dependencies([assert_op, images, batch_inds]):
File "/home/ubuntu/Desktop/WK/my_project/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 3936, in control_dependencies
return get_default_graph().control_dependencies(control_inputs)
File "/home/ubuntu/Desktop/WK/my_project/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 3665, in control_dependencies
c = self.as_graph_element(c)
File "/home/ubuntu/Desktop/WK/my_project/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 2708, in as_graph_element
return self._as_graph_element_locked(obj, allow_tensor, allow_operation)
File "/home/ubuntu/Desktop/WK/my_project/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 2797, in _as_graph_element_locked
% (type(obj).__name__, types_str))
TypeError: Can not convert a list into a Tensor or Operation.
I believe there is no mistake on the code, i just curious how the control dependencies not working even all inputs are given.
the code i ran is as below:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
import numpy as np
def crop(images, boxes, batch_inds, stride = 1, pooled_height = 2, pooled_width = 2, scope='ROIAlign'):
"""Cropping areas of features into fixed size
Params:
--------
images: a 4-d Tensor of shape (N, H, W, C)
boxes: rois in the original image, of shape (N, ..., 4), [x1, y1, x2, y2]
batch_inds:
Returns:
--------
A Tensor of shape (N, pooled_height, pooled_width, C)
"""
with tf.name_scope(scope):
#
boxes = boxes / (stride + 0.0)
print("boxes again=",boxes)
boxes = tf.reshape(boxes, [-1, 4])
print ("2=======================================================")
print("boxes again=",boxes)
# normalize the boxes and swap x y dimensions
shape = tf.shape(images)
boxes = tf.reshape(boxes, [-1, 2]) # to (x, y)
print ("3=======================================================")
print(boxes)
xs = boxes[:, 0]
ys = boxes[:, 1]
print ("4=======================================================")
print(xs,ys)
xs = xs / tf.cast(shape[2], tf.float32)
ys = ys / tf.cast(shape[1], tf.float32)
print ("5=======================================================")
print("again xs,ys",xs,ys)
boxes = tf.concat([ys[:, tf.newaxis], xs[:, tf.newaxis]], axis=1)
boxes = tf.reshape(boxes, [-1, 4]) # to (y1, x1, y2, x2)
print ("6=======================================================")
print("again boxes", boxes)
#if batch_inds is False:
# num_boxes = tf.shape(boxes)[0]
# batch_inds = tf.zeros([num_boxes], dtype=tf.int32, name='batch_inds')
# batch_inds = boxes[:, 0] * 0
# batch_inds = tf.cast(batch_inds, tf.int32)
# assert_op = tf.Assert(tf.greater(tf.shape(images)[0], tf.reduce_max(batch_inds)), [images, batch_inds])
assert_op = tf.Assert(tf.greater(tf.size(images), 0), [images, batch_inds])
print ("7=======================================================")
print("assert_op", assert_op)
print ("8=======================================================")
with tf.control_dependencies([assert_op, images, batch_inds]):
return tf.image.crop_and_resize(images, boxes, batch_inds,
[pooled_height, pooled_width],
method='bilinear',
name='Crop')
This is the inputs i set:
Image =[[[[1, 1, 1,1], [2, 2, 2,2]], [[3,3, 3, 3], [4,4, 4, 4]]]]
print ("=======================================================")
box = [[1, 1, 2, 2]]
boxes = tf.constant(box, tf.float32)
batch_inds=[1]
batch_inds = np.zeros((4,), dtype=np.int32)
batch_inds = tf.convert_to_tensor(batch_inds)
print("boxes=", boxes)
print (Image)
print(tf.shape(Image));
crop(Image,boxes,batch_inds);
What is wrong with my inputs if i don't want to modified the crop() function?
Thank you.
I solved it by adding
tf.convert_to_tensor (Image) after the Image =[[[[1, 1, 1,1], [2, 2, 2,2]], [[3,3, 3, 3], [4,4, 4, 4]]]]