I am having a custom layer and I want to print the intermediate tensors which are not linked to the returned tensor(shown in code) by call() method of custom layer. The code I used is:
class Similarity(Layer):
def __init__(self, num1, num2):
super(Similarity, self).__init__()
self.num1 = num1
self.num2 = num2
# self.total = tf.Variable(initial_value=tf.zeros((16,self.num1, 1)), trainable=False)
def build(self, input_shape):
super(Similarity, self).build((None, self.num1, 1))
def compute_mask(self, inputs, mask=None):
# Just pass the received mask from previous layer, to the next layer or
# manipulate it if this layer changes the shape of the input
return mask
def call(self, inputs, mask=None):
print(">>", type(inputs), inputs.shape, inputs)
normalized = tf.nn.l2_normalize(inputs, axis = 2)
print("norm", normalized)
# multiply row i with row j using transpose
# element wise product
similarity = tf.matmul(normalized, normalized,
adjoint_b = True # transpose second matrix
)
print("SIM", similarity)
z=tf.linalg.band_part(similarity, 0, -1)*3 + tf.linalg.band_part(similarity, -1, 0)*2 - tf.linalg.band_part(similarity,0,0)*6 + tf.linalg.band_part(similarity,0,0)
# z = K.print_tensor(tf.reduce_sum(z, 2, keepdims=True))
z = tf.reduce_sum(z, 2, keepdims=True)
z = tf.argsort(z) # <----------- METHOD2: Reassigned the Z to the tensor I want to print temporarily
z = K.print_tensor(z)
print(z)
z=tf.linalg.band_part(similarity, 0, -1)*3 + tf.linalg.band_part(similarity, -1, 0)*2 - tf.linalg.band_part(similarity,0,0)*6 + tf.linalg.band_part(similarity,0,0)
z = K.print_tensor(tf.reduce_sum(z, 2, keepdims=True)) #<------------- THIS LINE WORKS/PRINTS AS Z is returned
# z = tf.reduce_sum(z, 2, keepdims=True)
#tf.function
#<------------- METHOD1: Want to print RANKT tensor but this DID NOT WORKED
def f(z):
rankt = K.print_tensor(tf.argsort(z))
# rankt = tf.reshape(rankt, (-1, self.num1))
# rankt = K.print_tensor(rankt)
return rankt
pt = f(z)
return z # <--------- The returned tensor
def compute_output_shape(self, input_shape):
print("IS", (None, self.num1, 1))
return (None, self.num1, 1)
To be more clear,
I used method1 in which I used #tf.function to print rankt tensor but it didn't worked.
Secondly, in method2, I reassigned z(returned tensor after call()) temporarily, so that it's executed in backprop and I get the printed values. After this I reassigned z to original opertaions
To summarize it I don't want value of z but I want to print value of some variable which is depended upon z but I am not able to print any variable other than z
There is tf.print function for this.
In the eager mode, it returns nothing and just prints the tensors. When used during computation graph building, it returns TF operators that do identity and print the tensor values as a side-effect.
I have searhed a lot but I couldn't find anything to print intermediate tenosors. I turns out that we could only print the tensors which are linked to the exectuted tensor (here z). So what I did was, I printed z using K.print_tensor() and then, later on, used that tensor (obviously now in list form) to perform my computation (was side computation, not to be implemented in logic)
Related
I want to create a custom keras layer that compute the "predict" of a Self Organizing Map.
Here is my implementation of the layer
`
class SOMLayer(keras.layers.Layer):
def __init__(self, X_train, y_train, w, n=50, input_shape=265):
super(SOMLayer, self).__init__()
self.inputs = None
self.X_train = X_train
self.y_train = y_train
self._input_len = input_shape
# weights of pre-trained SOM, two versions: _weights is a tensor, _weights_nc is a numpy array
self._weights = tf.convert_to_tensor(w, dtype=tf.float32)
self._weights_nc = w
self._activation_map = zeros((n, n))
# activation distance calc., lblmap version works with numpy arrays, the other one with tf tensors
self._activation_distance = self._manhattan_distance
self._activation_distance_lblmap = self._manhattan_distance_lblmap
self.winmap = self.labels_map(X_train, y_train)
self.default_class = sum(list(self.winmap.values())).most_common()[0][0]
#tf.function
def call(self, inputs):
ret_arr = np.array(np.full(6, 0.01), ndmin=2)
win_position = self.winner(inputs)
if win_position.ref() in self.winmap:
ret_arr[0][self.winmap[win_position.ref()].most_common()[0][0]] = 0.95
else:
ret_arr[0][self.default_class] = 0.95
return ret_arr
def _activate(self, x):
"""Updates matrix activation_map, in this matrix
the element i,j is the response of the neuron i,j to x."""
self._activation_map = self._activation_distance(x, self._weights)
def _activate_lblmap(self, x):
"""Updates matrix activation_map, in this matrix
the element i,j is the response of the neuron i,j to x."""
self._activation_map = self._activation_distance_lblmap(x, self._weights_nc)
def _manhattan_distance(self, x, w):
return tf.linalg.norm(tf.subtract(x, w), ord=1, axis=-1)
def _manhattan_distance_lblmap(self, x, w):
return linalg.norm(subtract(x, w), ord=1, axis=-1)
def _check_input_len(self, data):
"""Checks that the data in input is of the correct shape."""
data_len = len(data[0])
if self._input_len != data_len:
msg = 'Received %d features, expected %d.' % (data_len,
self._input_len)
raise ValueError(msg)
#tf.function
def winner(self, x):
"""Computes the coordinates of the winning neuron for the sample x."""
self._activate(x)
return tf.unravel_index(tf.argmin(self._activation_map, output_type=tf.int32),
tf.shape(self._activation_map))
def winner_lblmap(self, x):
"""Computes the coordinates of the winning neuron for the sample x."""
self._activate_lblmap(x)
return unravel_index(self._activation_map.argmin(),
self._activation_map.shape)
def labels_map(self, data, labels):
"""Returns a dictionary wm where wm[(i,j)] is a dictionary
that contains the number of samples from a given label
that have been mapped in position i,j.
Parameters
----------
data : np.array or list
Data matrix.
label : np.array or list
Labels for each sample in data.
"""
self._check_input_len(data)
if not len(data) == len(labels):
raise ValueError('data and labels must have the same length.')
winmap = defaultdict(list)
for x, l in zip(data, labels):
winmap[self.winner_lblmap(x)].append(l)
for position in winmap:
winmap[position] = Counter(winmap[position])
return winmap
`
There are two implementation of the same method for methods like "activate", "manhattan_distance" and "winner".
The implementation with "_lblmap" in the name works with numpy arrays (is the impl. of the minisom python library) and the other impl. uses tensorflow tensors because the input of the layer when the model execute is a tensor and i was not able to convert that tensor in a numpy array.
The problem is in the "winner" method, the winner method that works with numpy array return this:
self.winner_lblmap numpy array implementation
(0, 2)
that is the position (like x,y coordinates) of the Best Matching Unit that will determine the class of the input, and its called inside "labels_map" method.
The winner method called inside "call" method works with tensorflow tensors because it will manage the input passed when you use model.predict(...) and this input is a tensor.
The output of this implementation is:
self.winner tensorflow tensors implementation
Tensor("UnravelIndex:0", shape=(2, 50), dtype=int32)
And looking the shape it's easy to see that is not the correct shape of the desired output, and also i'm not able to directly access the data inside this tensor (or previuos calculated ones).
I tried to force "eager evaluation" in different ways to try to visualize the data but without success.
I tried also to tune the "tf.linalg.norm" parameters but the shape of the result is (0,).
Is there a way to access the data inside the tensor and obtain tha same result of the "winner_lblmap" method that works with numpy arrays?
I have an input
inp = torch.tensor([1.0])
and a neural network
class Model_updater(nn.Module):
def __init__(self):
super(Model_updater, self).__init__()
self.fc1 = nn.Linear(1, 2)
self.fc2 = nn.Linear(2, 3)
self.fc3 = nn.Linear(3, 2)
def forward(self, x):
x = torch.relu(self.fc1(x))
x = torch.relu(self.fc2(x))
x = self.fc3(x)
return x
net_updater = Model_updater()
opt_updater = optim.Adam(net_updater.parameters())
I'm trying to update my input using the neural network's output:
inp = torch.tensor([1.0])
epochs = 3
for i in range(epochs):
opt_updater.zero_grad()
inp_copy = inp.detach().clone()
mu, sigma = net_updater(inp_copy)
dist1 = Normal(mu, torch.abs(sigma))
a = dist1.rsample()
inp += a
loss = torch.tensor(5.0) - inp
loss.backward(retain_graph=True)
opt_updater.step()
But getting the error:
RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.FloatTensor [3, 2]], which is output 0 of TBackward, is at version 2; expected version 1
I also tried changing the loss calculations with
loss = torch.tensor(5.0) - inp_copy
But got the error
RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn
I also tried without the retain_graph=True but I get
RuntimeError: Trying to backward through the graph a second time,
but the saved intermediate results have already been freed. Specify retain_graph=True when calling .backward() or autograd.grad() the first time.
Which doesn't really makes sense to me because I don't see where I'm calling backward() twice
Most likely, this is what you want
inp1 = inp + a # create a separate variable for updated value
inp.data = inp1.data # update the value without touching the graph
loss = torch.tensor(5.0) - inp1 # use updated value which has gradient
I am setting up a custom layer with a custom gradient. The inputs are a single 2-D tensor of shape (?, 2). The outputs are also a single 2-D tensor with shape (?, 2).
I am struggling with understanding how these objects behave. What I've gathered from the documentation is that for a given input, the gradient will have the same shape as the output and that I need to return a list of gradients for each input. I've been assuming that since my inputs look like (?, 2) and my outputs look like (?, 2), then the grad function should return a length-2 list: [input_1_grad, input_2_grad], where both list items are tensors with the shape of the output, (?, 2).
This is not working, which is why I'm hoping someone here could help.
Here is my error (appears to occur at compile time):
ValueError: Num gradients 3 generated for op name:
"custom_layer/IdentityN" op: "IdentityN" input:
"custom_layer_2/concat" input: "custom_layer_1/concat" attr { key:
"T" value {
list {
type: DT_FLOAT
type: DT_FLOAT
} } } attr { key: "_gradient_op_type" value {
s: "CustomGradient-28729" } } do not match num inputs 2
The other wrinkle is that the input to the custom layer is itself also a custom layer (though without a custom gradient). I will provide the code for both layers, in case it's helpful.
Also, note that the network compiles and runs if I don't try to specify a custom gradient. But, since my functions need help differentiating themselves, I need to manually intervene, so having a working custom gradient is critical.
First Custom Layer (no custom gradient):
class custom_layer_1(tensorflow.keras.layers.Layer):
def __init__(self):
super(custom_layer_1, self).__init__()
def build(self, input_shape):
self.term_1 = self.add_weight('term_1', trainable=True)
self.term_2 = self.add_weight('term_2', trainable=True)
def call(self, x):
self.term_1 = formula in terms of x
self.term_2 = another formula in terms of x
return tf.concat([self.term_1, self.term_2], axis=1)
Second Custom Layer (with the custom gradient):
class custom_layer_2(tensorflow.keras.layers.Layer):
### the inputs
# x is the concatenation of term_1 and term_2
def __init__(self):
super(custom_layer_2, self).__init__()
def build(self, input_shape):
#self.weight_1 = self.add_weight('weight_1', trainable=True)
#self.weight_2 = self.add_weight('weight_2', trainable=True)
def call(self, x):
return custom_function(x)
The Custom Function:
#tf.custom_gradient
def custom_function(x):
### the inputs
# x is a concatenation of term_1 and term_2
weight_1 = function in terms of x
weight_2 = another function in terms of x
### the gradient
def grad(dy):
# assuming dy has the output shape of (?, 2). could be wrong.
d_weight_1 = K.reshape(dy[:, 0], shape=(K.shape(x)[0], 1))
d_weight_1 = K.reshape(dy[:, 1], shape=(K.shape(x)[0], 1))
term_1 = K.reshape(x[:, 0], shape=(K.shape(x)[0], 1))
term_2 = K.reshape(x[:, 1], shape=(K.shape(x)[0], 1))
d_weight_1_d_term_1 = tf.where(K.equal(term_1, K.zeros_like(term_1)), K.zeros_like(term_1), -term_2 / term_1) * d_weight_1
d_weight_1_d_term_2 = tf.where(K.equal(term_1, K.zeros_like(term_1)), K.zeros_like(term_1), 1 / term_1) * d_weight_1
d_weight_2_d_term_1 = tf.where(K.equal(term_2, K.zeros_like(term_2)), K.zeros_like(term_1), 2 * term_1 / term_2) * d_weight_2
d_weight_2_d_term_2 = tf.where(K.equal(term_2, K.zeros_like(term_2)), K.zeros_like(term_1), -K.square(term_1 / term_2)) * d_weight_2
return tf.concat([d_weight_1_d_term_1, d_weight_1_d_term_2], axis=1), tf.concat([d_weight_2_d_term_1, d_weight_2_d_term_2], axis=1)
return tf.concat([weight_1, weight_2], axis=1), grad
Any help would be much appreciated!
Since the Keras wrapper does not support attention model yet, I'd like to refer to the following custom attention.
https://github.com/datalogue/keras-attention/blob/master/models/custom_recurrents.py
But the problem is, when I run the code above, it returns following error:
ImportError: cannot import name '_time_distributed_dense'
It looks like no more _time_distributed_dense is supported by keras over 2.0.0
the only parts that use _time_distributed_dense module is the part below:
def call(self, x):
# store the whole sequence so we can "attend" to it at each timestep
self.x_seq = x
# apply the a dense layer over the time dimension of the sequence
# do it here because it doesn't depend on any previous steps
# thefore we can save computation time:
self._uxpb = _time_distributed_dense(self.x_seq, self.U_a, b=self.b_a,
input_dim=self.input_dim,
timesteps=self.timesteps,
output_dim=self.units)
return super(AttentionDecoder, self).call(x)
In which way should I change the _time_distrubuted_dense(self ... ) part?
I just posted from An Chen's answer of the GitHub issue (the page or his answer might be deleted in the future)
def _time_distributed_dense(x, w, b=None, dropout=None,
input_dim=None, output_dim=None,
timesteps=None, training=None):
"""Apply `y . w + b` for every temporal slice y of x.
# Arguments
x: input tensor.
w: weight matrix.
b: optional bias vector.
dropout: wether to apply dropout (same dropout mask
for every temporal slice of the input).
input_dim: integer; optional dimensionality of the input.
output_dim: integer; optional dimensionality of the output.
timesteps: integer; optional number of timesteps.
training: training phase tensor or boolean.
# Returns
Output tensor.
"""
if not input_dim:
input_dim = K.shape(x)[2]
if not timesteps:
timesteps = K.shape(x)[1]
if not output_dim:
output_dim = K.shape(w)[1]
if dropout is not None and 0. < dropout < 1.:
# apply the same dropout pattern at every timestep
ones = K.ones_like(K.reshape(x[:, 0, :], (-1, input_dim)))
dropout_matrix = K.dropout(ones, dropout)
expanded_dropout_matrix = K.repeat(dropout_matrix, timesteps)
x = K.in_train_phase(x * expanded_dropout_matrix, x, training=training)
# collapse time dimension and batch dimension together
x = K.reshape(x, (-1, input_dim))
x = K.dot(x, w)
if b is not None:
x = K.bias_add(x, b)
# reshape to 3D tensor
if K.backend() == 'tensorflow':
x = K.reshape(x, K.stack([-1, timesteps, output_dim]))
x.set_shape([None, None, output_dim])
else:
x = K.reshape(x, (-1, timesteps, output_dim))
return x
You could just add this on your Python code.
I have the following function:
def forward_propagation(self, x):
# The total number of time steps
T = len(x)
# During forward propagation we save all hidden states in s because need them later.
# We add one additional element for the initial hidden, which we set to 0
s = tf.zeros([T+1, self.hidden_dim])
# The outputs at each time step. Again, we save them for later.
o = tf.zeros([T, self.word_dim])
a = tf.placeholder(tf.float32)
b = tf.placeholder(tf.float32)
c = tf.placeholder(tf.float32)
s_t = tf.nn.tanh(a + tf.reduce_sum(tf.multiply(b, c)))
o_t = tf.nn.softmax(tf.reduce_sum(tf.multiply(a, b)))
# For each time step...
with tf.Session() as sess:
s = sess.run(s)
o = sess.run(o)
for t in range(T):
# Note that we are indexing U by x[t]. This is the same as multiplying U with a one-hot vector.
s[t] = sess.run(s_t, feed_dict={a: self.U[:, x[t]], b: self.W, c: s[t-1]})
o[t] = sess.run(o_t, feed_dict={a: self.V, b: s[t]})
return [o, s]
self.U, self.V, and self.W are numpy arrays. I try to get softmax on
o_t = tf.nn.softmax(tf.reduce_sum(tf.multiply(a, b)))
graph, and it gives me error on this line:
o[t] = sess.run(o_t, feed_dict={a: self.V, b: s[t]})
The error is:
InvalidArgumentError (see above for traceback): Expected begin[0] == 0
(got -1) and size[0] == 0 (got 1) when input.dim_size(0) == 0
[[Node: Slice = Slice[Index=DT_INT32, T=DT_INT32,
_device="/job:localhost/replica:0/task:0/cpu:0"](Shape_1, Slice/begin, Slice/size)]]
How I am supposed to get softmax in tensorflow?
The problem arises because you call tf.reduce_sum on the argument of tf.nn.softmax. As a result, the softmax function fails because a scalar is not a valid input argument. Did you mean to use tf.matmul instead of the combination of tf.reduce_sum and tf.multiply?
Edit: Tensorflow does not provide an equivalent of np.dot out of the box. If you want to compute the dot product of a matrix and a vector, you need to sum over indices explicitly:
# equivalent to np.dot(a, b) if a.ndim == 2 and b.ndim == 1
c = tf.reduce_sum(a * b, axis=1)