how to change torch.scatter_add to tensorflow function - python

I need transfer code pytorch to tensorflow
this pytorch code is here NADST
encoded_context = ft['encoded_context2']
encoded_in_domainslots = ft['encoded_in_domainslots2']
self.pointer_attn(ft['out_states'], encoded_context, encoded_context, context_mask)
pointer_attn = self.pointer_attn.attn.squeeze(1)
p_vocab = F.softmax(vocab_attn, dim = -1)
context_index = context.unsqueeze(1).expand_as(pointer_attn)
p_context_ptr = torch.zeros(p_vocab.size()).cuda()
p_context_ptr.scatter_add_(2, context_index, pointer_attn)
I want to change code "p_context_ptr.scatter_add_(2, context_index, pointer_attn)" to tensorflow version.
so I use "tf.compat.v1.tensor_scatter_nd_add()" of tensorflow function, but not same operation torch scatter_add_() fucntion
I'm so try work until now but not found solution my some code like this
def get_scatter_add(tensor, indices, updates):
if indices.shape.rank > 2:
tensor = tf.compat.v1.reshape(tensor, shape=[-1, tensor.shape[-1]])
indices = tf.compat.v1.reshape(indices, shape=[-1, indices.shape[-1]])
updates = tf.compat.v1.reshape(updates, shape=[-1, updates.shape[-1]])
one_hot_index = tf.compat.v1.one_hot(indices=indices, depth=tensor.shape[-1])
tile_update = tf.compat.v1.expand_dims(updates, axis=-1)
updates = tf.compat.v1.to_float(one_hot_index) * tf.compat.v1.to_float(tile_update)
indices = tf.compat.v1.expand_dims(indices, axis=-1)
update = tensor.shape[indices.shape[-1]:]
res = indices.shape[:-1] + update
scatter = tf.compat.v1.tensor_scatter_nd_add(tensor, indices, updates)
return scatter
but, memory overflow, my variable shape is tensor.shape()->[1100, 19200], update.shape()->[1100, 900], updates.shape()->[1100, 900]
how to solve this problem ???
Thank you for your reply
have nice day!!!

I found solution by myself
tensorflow tensor_scatter_nd_add function is some problem vector dimension is expanded for target vector.
but except for one case is same operation to torch scatter_add_ fucntion
this case :
import tensorflow as tf
indices = tf.constant([[4], [3], [1], [7]])
updates = tf.constant([9, 10, 11, 12])
tensor = tf.ones([8], dtype=tf.int32)
updated = tf.tensor_scatter_nd_add(tensor, indices, updates)
print(updated)
it only update, tensor one dimension and indices is rank 2 shape
so i am change shape like above method like this
tensor.shape()->reshape[-1]
update.shape()->reshape[-1]
indices.shape()->reshape[-1, 1]
this same above case but, we need update index operation but if we have pointer generater for DST task, becuase tensor is vocabulary size of last dimension, so index + vocab size next batch and +vocab*2 next batch
so it function same operation Torch scatter_add_
example:
tensor = [35, 32, vocab_size], indices = [35, 32, 900], update = [35, 32, 900]
Torch case:
tensor.scatter_add_(2, indices, update)
Tensorflow case:
tensor = my_tensorflow_scatter_add(tensor, indices, update)
this same operation case above variable dimension
my_tensorflow_scatter_add function:
def my_tensorflow_scatter_add(tensor, indices, updates):
original_tensor = tensor
# expand index value from vocab size
indices = tf.compat.v1.reshape(indices, shape=[-1, tf.shape(indices)[-1]])
indices_add = tf.compat.v1.expand_dims(tf.range(0, tf.shape(indices)[0], 1)*(tf.shape(tensor)[-1]), axis=-1)
indices += indices_add
# resize
tensor = tf.compat.v1.reshape(tensor, shape=[-1])
indices = tf.compat.v1.reshape(indices, shape=[-1, 1])
updates = tf.compat.v1.reshape(updates, shape=[-1])
#check_
"""
update = tensor.shape[indices.shape[-1]:]
res = indices.shape[:-1] + update
"""
#same Torch scatter_add_
scatter = tf.compat.v1.tensor_scatter_nd_add(tensor, indices, updates)
scatter = tf.compat.v1.reshape(scatter, shape=[tf.shape(original_tensor)[0], tf.shape(original_tensor)[1], -1])
return scatter
I solved my question problem

Alternative solution without flattening all tensors. Assuming the tensor shapes tensor = [35, 32, vocab_size], indices = [35, 32, 900], update = [35, 32, 900] (based on Proper usage of `tf.scatter_nd` in tensorflow-r1.2) :
def scatter_add(tensor, indices, updates):
"""
Args:
tensor: (seq_len, batch_size, vocab_size)
indices: (seq_len, batch_size, dim)
updates: (seq_len, batch_size, dim)
Returns:
(seq_len, batch_size, vocab_size)
"""
seq_len, batch_size, dim = indices.shape
# Create additional indices
i1, i2 = tf.meshgrid(tf.range(seq_len),
tf.range(batch_size), indexing="ij")
i1 = tf.tile(i1[:, :, tf.newaxis], [1, 1, dim])
i2 = tf.tile(i2[:, :, tf.newaxis], [1, 1, dim])
# Create final indices
idx = tf.stack([i1, i2, indices], axis=-1)
# Get scatter-added tensor
scatter = tf.tensor_scatter_nd_add(tensor, idx, updates)
return scatter

Related

How to modify elements of a Keras Tensor object

I am building a Convolution Neural Network in Keras that receives batch of images with dimensions (None, 256, 256, 1) and the output would be batches with size (None, 256, 256, 3). Now after the final layer output I want to add a layer that assigns values to some of the pixels in output layer based on a value condition on inputs. Here is what I tried:
The Function
def SetBoundaries(ins):
xi = ins[0]
xo = ins[1]
bnds = np.where(xi[:, :, :, 0] == 0)
bnds_s, bnds_i, bnds_j = bnds[0], bnds[1], bnds[2]
xo[bnds_s, bnds_i, bnds_j, 0] = 0
xo[bnds_s, bnds_i, bnds_j, 1] = 0
xo[bnds_s, bnds_i, bnds_j, 2] = 0
return xo
Keras model
def conv_res(inputs):
x0 = inputs
...
xc = conv_layer(xc, kernel_size=3, stride=1,
num_filters=3, name="Final_Conv")
# apply assignment function
xc = Lambda(SetBoundaries, name="assign_boundaries")([x0, xc])
return xc
Finally, the model is built using
def build_model(inputs):
xres = int(inputs.shape[1])
yres = int(inputs.shape[2])
cres = int(inputs.shape[3])
inputs = Input((xres, yres, cres))
outputs = UNet.conv_res(inputs)
model = keras.Model(inputs=inputs, outputs=outputs)
return model
However, when I run I get the error:
NotImplementedError: Cannot convert a symbolic Tensor (assign_boundaries/Equal:0) to a numpy array.
Everything works fine without the Lambda function. I understand the issue is assigning value to Tensor object but how can I achieve what I am after?
Thanks
np.where works with NumPy arrays, but the output from your model is a Tensorflow tensor. Try using tf.where, which is the same thing but for tf.Tensors.
I managed to make it work by changing the function to:
def SetBoundaries(ins):
xi = ins[0]
xo = ins[1]
xin = tf.broadcast_to(xi, tf.shape(xo))
mask = K.cast(tf.not_equal(xin, 0), dtype="float32")
xf = layers.Multiply()([mask, xo])
return xf

How do I zip tensors in tensorflow when the dimensions don't match

I have two tensors, one of shape [None, 20, 2], and one of shape [None, 1].
I would like to do an operation on each of the sub-tensors in lockstep to produce a value such that I would end up with a tensor of shape [None, 1].
In python land, I would zip these two, and iterate over the result.
So, just to be clear, I'd like to write a function that takes a [20, 2]-shape tensor and a [1]-shape tensor, and produces a [1]-shape tensor, then apply this function to the [None, 20, 2] and [None, 1] tensors, to produce a [None, 1] tensor.
Hope I articulated that well enough; higher dimensionality makes my head spin sometimes.
This works for me (TensorFlow version 1.4.0)
tf.reset_default_graph()
sess = tf.Session()
# Define placeholders with undefined first dimension.
a = tf.placeholder(dtype=tf.float32, shape=[None, 3, 4])
b = tf.placeholder(dtype=tf.float32, shape=[None, 1])
# Create some input data.
a_input = np.arange(24).reshape(2, 3, 4)
b_input = np.arange(2).reshape(2, 1)
# TensorFlow map function.
def f_tf(x):
return tf.reduce_sum(x[0]) + tf.reduce_sum(x[1])
# Numpy map function (for validation of results).
def f_numpy(x):
return np.sum(x[0]) + np.sum(x[1])
# Run TensorFlow function.
s = tf.map_fn(f, [a, b], dtype=tf.float32)
sess.run(s, feed_dict={a: a_input, b: b_input})
array([ 66., 211.], dtype=float32)
# Run Numpy function.
for inp in zip(a_input, b_input):
print(f_numpy(inp))
66
211

Flattening two last dimensions of a tensor in TensorFlow

I'm trying to reshape a tensor from [A, B, C, D] into [A, B, C * D] and feed it into a dynamic_rnn. Assume that I don't know the B, C, and D in advance (they're a result of a convolutional network).
I think in Theano such reshaping would look like this:
x = x.flatten(ndim=3)
It seems that in TensorFlow there's no easy way to do this and so far here's what I came up with:
x_shape = tf.shape(x)
x = tf.reshape(x, [batch_size, x_shape[1], tf.reduce_prod(x_shape[2:])]
Even when the shape of x is known during graph building (i.e. print(x.get_shape()) prints out absolute values, like [10, 20, 30, 40] after the reshaping get_shape() becomes [10, None, None]. Again, still assume the initial shape isn't known so I can't operate with absolute values.
And when I'm passing x to a dynamic_rnn it fails:
ValueError: Input size (depth of inputs) must be accessible via shape inference, but saw value None.
Why is reshape unable to handle this case? What is the right way of replicating Theano's flatten(ndim=n) in TensorFlow with tensors of rank 4 and more?
It is not a flaw in reshape, but a limitation of tf.dynamic_rnn.
Your code to flatten the last two dimensions is correct. And, reshape behaves correctly too: if the last two dimensions are unknown when you define the flattening operation, then so is their product, and None is the only appropriate value that can be returned at this time.
The culprit is tf.dynamic_rnn, which expects a fully-defined feature shape during construction, i.e. all dimensions apart from the first (batch size) and the second (time steps) must be known. It is a bit unfortunate perhaps, but the current implementation does not seem to allow RNNs with a variable number of features, à la FCN.
I tried a simple code according to your requirements. Since you are trying to reshape a CNN output, the shape of X is same as the output of CNN in Tensorflow.
HEIGHT = 100
WIDTH = 200
N_CHANELS =3
N_HIDDEN =64
X = tf.placeholder(tf.float32, shape=[None,HEIGHT,WIDTH,N_CHANELS],name='input') # output of CNN
shape = X.get_shape().as_list() # get the shape of each dimention shape[0] =BATCH_SIZE , shape[1] = HEIGHT , shape[2] = HEIGHT = WIDTH , shape[3] = N_CHANELS
input = tf.reshape(X, [-1, shape[1] , shape[2] * shape[3]])
print(input.shape) # prints (?, 100, 600)
#Input for tf.nn.dynamic_rnn should be in the shape of [BATCH_SIZE, N_TIMESTEPS, INPUT_SIZE]
#Therefore, according to the reshape N_TIMESTEPS = 100 and INPUT_SIZE= 600
#create the RNN here
lstm_layers = tf.contrib.rnn.BasicLSTMCell(N_HIDDEN, forget_bias=1.0)
outputs, _ = tf.nn.dynamic_rnn(lstm_layers, input, dtype=tf.float32)
Hope this helps.
I found a solution to this by using .get_shape().
Assuming 'x' is a 4-D Tensor.
This will only work with the Reshape Layer. As you were making changes to the architecture of the model, this should work.
x = tf.keras.layers.Reshape(x, [x.get_shape()[0], x.get_shape()[1], x.get_shape()[2] * x.get_shape()][3])
Hope this works!
If you use the tf.keras.models.Model or tf.keras.layers.Layer wrapper, the build method provides a nice way to do this.
Here's an example:
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Conv1D, Conv2D, Conv2DTranspose, Attention, Layer, Reshape
class VisualAttention(Layer):
def __init__(self, channels_out, key_is_value=True):
super(VisualAttention, self).__init__()
self.channels_out = channels_out
self.key_is_value = key_is_value
self.flatten_images = None # see build method
self.unflatten_images = None # see build method
self.query_conv = Conv1D(filters=channels_out, kernel_size=1, padding='same')
self.value_conv = Conv1D(filters=channels_out, kernel_size=4, padding='same')
self.key_conv = self.value_conv if key_is_value else Conv1D(filters=channels_out, kernel_size=4, padding='same')
self.attention_layer = Attention(use_scale=False, causal=False, dropout=0.)
def build(self, input_shape):
b, h, w, c = input_shape
self.flatten_images = Reshape((h*w, c), input_shape=(h, w, c))
self.unflatten_images = Reshape((h, w, self.channels_out), input_shape=(h*w, self.channels_out))
def call(self, x, training=True):
x = self.flatten_images(x)
q = self.query_conv(x)
v = self.value_conv(x)
inputs = [q, v] if self.key_is_value else [q, v, self.key_conv(x)]
output = self.attention_layer(inputs=inputs, training=training)
return self.unflatten_images(output)
# test
import numpy as np
x = np.arange(8*28*32*3).reshape((8, 28, 32, 3)).astype('float32')
model = VisualAttention(8)
y = model(x)
print(y.shape)

scatter_nd_add() example for sparse addition in TensroFlow

I am having difficulty applying tf.scatter_nd_add() to 2D tensors. The documentation is a bit unclear and has does not contain an example for sparse update but only for full slice updates.
My case is the following:
updates - 2D tensor of shape [None, 6]
indices - 2D tensor of shape [None, 6]
ref - 2D Variable of zeros of shape [None, 6]
It is guaranteed that updates, indices and ref will always have their first dimension equal, but the size of that dimension can be varying. The update I want to perform looks like
for i, j:
k = indices[i][j]
ref[i][k] += updates[i][j]
Note that indices contains duplicates. tf.scatter_nd_add(ref, indices, updates) complains about shape mismatch and I cannot figure out how I need to restructure the tensors in order to performs the update.
I figured it out. Each 2D entry in indices must actually specify the absolute location that will get updated in ref. This means that indices must be 3D and then the non-vectorized update looks like:
for i, j:
r, k = indices[i][j]
ref[r][k] += updates[i][j]
In the above question it just happens that r is always equal to i.
Here is a full Tensorflow implementation with varying shapes. For clarity, in the following example, col_indices corresponds to indices from the original question:
import tensorflow as tf
import numpy as np
updates = tf.placeholder(dtype=tf.float32, shape=[None, 6])
col_indices = tf.placeholder(dtype=tf.int32, shape=[None, 6])
row_indices = tf.cumsum(tf.ones_like(col_indices), axis=0, exclusive=True)
indices = tf.concat([tf.expand_dims(row_indices, axis=-1),
tf.expand_dims(col_indices, axis=-1)], axis=-1)
tmp_var = tf.Variable(0, trainable=False, dtype=tf.float32, validate_shape=False)
ref = tf.assign(tmp_var, tf.zeros_like(updates), validate_shape=False)
# This makes sure that ref is always 0 before scatter_nd_add() runs
with tf.control_dependencies([target_var]):
result = tf.scatter_nd_add(ref, indices, updates)
sess = tf.Session()
sess.run(tf.global_variables_initializer())
# Create example input data
np_input = np.arange(0, 6, 1, dtype=np.int32)
np_input = np.tile(np_input[None,:], [10, 1])
res = sess.run(result, feed_dict={updates: np_input, col_indices: np_input})
print(res)

Tensorflow: Recurrent Neural Network Batch Training

I am trying to implement RNN in Tensorflow. I am writing my own functions instead of using RNN cells to practice.
The problem is sequence tagging, input size is [32, 48, 900] where 32 is batch size, 48 is time steps and 900 is vocab size which is one-hot encoded vector. Output is [32, 48, 145] where first two dimensions are same as input, but the last dimension is output vocabulary size (one-hot). Basically this is a NLP tagging problem.
I am getting following error:
InvalidArgumentError (see above for traceback): logits and labels must
be same size: logits_size=[48,145] labels_size=[1536,145]
The actual labels_size is [32, 48, 145] but it merges first two dimensions without my control. FYI 32*48 = 1536
If I run my RNN with batch size 1, it works fine as expected. I could not figure out how to solve the issue. I am getting the problem in the last line of the code.
I pasted the related part of the code:
inputs = tf.placeholder(shape=[None, self.seq_length, self.vocab_size], dtype=tf.float32, name="inputs")
targets = tf.placeholder(shape=[None, self.seq_length, self.output_vocab_size], dtype=tf.float32, name="targets")
init_state = tf.placeholder(shape=[1, self.hidden_size], dtype=tf.float32, name="state")
initializer = tf.random_normal_initializer(stddev=0.1)
with tf.variable_scope("RNN") as scope:
hs_t = init_state
ys = []
for t, xs_t in enumerate(tf.split(inputs[0], self.seq_length, axis=0)):
if t > 0: scope.reuse_variables()
Wxh = tf.get_variable("Wxh", [self.vocab_size, self.hidden_size], initializer=initializer)
Whh = tf.get_variable("Whh", [self.hidden_size, self.hidden_size], initializer=initializer)
Why = tf.get_variable("Why", [self.hidden_size, self.output_vocab_size], initializer=initializer)
bh = tf.get_variable("bh", [self.hidden_size], initializer=initializer)
by = tf.get_variable("by", [self.output_vocab_size], initializer=initializer)
hs_t = tf.tanh(tf.matmul(xs_t, Wxh) + tf.matmul(hs_t, Whh) + bh)
ys_t = tf.matmul(hs_t, Why) + by
ys.append(ys_t)
hprev = hs_t
output_softmax = tf.nn.softmax(ys) # Get softmax for sampling
#outputs = tf.concat(ys, axis=0)
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=targets, logits=ys))
The problem may fall in the size of the ys, ys should have the size of [32, 48, 145], but the output ys only have the size of [48,145], so if the batchsize is 1, the taget size is [1, 48, 145], which just have the same size of [48,145] after dimensionality reduction.
To solve the problem you can add a loop to deal with the batchsize ( inputs[0] ) :
such as :
for i in range(inputs.getshape(0)):
for t, xs_t in enumerate(tf.split(inputs[i], self.seq_length, axis=0)):

Categories