Concatenate along last dimension with custom layer with Tensorflow - python

I'm trying to concatenate a number to the last dimension of a (None, 10, 3) tensor to make it a (None, 10, 4) tensor using a custom layer. It seems impossible, because to concatenate, all the dimensions except for the one being merged on must be equal and we can't initialize a tensor with 'None' as the first dimension.
For example, the code below gives me this error:
ValueError: Shape must be rank 3 but is rank 2 for '{{node position_embedding_concat_37/concat}} = ConcatV2[N=2, T=DT_FLOAT, Tidx=DT_INT32](Placeholder, position_embedding_concat_37/concat/values_1, position_embedding_concat_37/concat/axis)' with input shapes: [?,10,3], [10,1], []
class PositionEmbeddingConcat(tf.keras.layers.Layer):
def __init__(self, sequence_length, **kwargs):
super(PositionEmbeddingConcat, self).__init__(**kwargs)
self.positional_embeddings_array = np.arange(sequence_length).reshape(sequence_length, 1)
def call(self, inputs):
outp = tf.concat([inputs, self.positional_embeddings_array], axis = 2)
return outp
seq_len = 10
input_layer = Input(shape = (seq_len, 3))
embedding_layer = PositionEmbeddingConcat(sequence_length = seq_len)
embeddings = embedding_layer(input_layer)
dense_layer = Dense(units = 1)
output = dense_layer(Flatten()(embeddings))
modelT = tf.keras.Model(input_layer, output)
Is there another way to do this?

You will have to make sure you respect the batch dimension. Maybe something like this:
outp = tf.concat([inputs, tf.cast(tf.repeat(self.positional_embeddings_array[None, ...], repeats=tf.shape(inputs)[0], axis=0), dtype=tf.float32)], axis = 2)
Also, tf.shape gives you the dynamic shape of a tensor.

your question is to concatenate of tensors shape [10, 3] and [10, 1] but you need to perform a Dense function with a specific number of units. You can remark multiplication to use tf.concatenate() only or you change the Dense function to the specific number of units.
Sample: Position embedding is not performed concatenate function, tails to the current dimension, or propagated results from both for domain result.
import tensorflow as tf
class MyPositionEmbeddedLayer( tf.keras.layers.Concatenate ):
def __init__( self, units ):
super(MyPositionEmbeddedLayer, self).__init__( units )
self.num_units = units
def build(self, input_shape):
self.kernel = self.add_weight("kernel",
shape=[int(input_shape[-1]),
self.num_units])
def call(self, inputs, tails):
### area to perform pre-calculation or custom algorithms ###
# #
# #
############################################################
temp = tf.keras.layers.Concatenate(axis=2)([inputs, tails])
temp = tf.matmul(temp, self.kernel)
temp = tf.squeeze( temp )
return temp
#####################################################
start = 3
limit = 93
delta = 3
sample = tf.range(start, limit, delta)
sample = tf.cast( sample, dtype=tf.float32 )
sample = tf.constant( sample, shape=( 10, 1, 3, 1 ) )
start = 3
limit = 33
delta = 3
tails = tf.range(start, limit, delta)
tails = tf.cast( tails, dtype=tf.float32 )
tails = tf.constant( tails, shape=( 10, 1, 1, 1 ) )
layer = MyPositionEmbeddedLayer(10)
print( layer(sample, tails) )
Output: You see it learning with Dense kernels, close neighbors frequencies aliases.
...
[[-26.67632 35.44779 23.239683 20.374893 -12.882696
54.963055 -18.531412 -4.589509 -21.722694 -43.44675 ]
[-27.629044 36.713783 24.069672 21.102568 -13.3427925
56.92602 -19.193249 -4.7534204 -22.498507 -44.99842 ]
[-28.58177 37.979774 24.89966 21.830242 -13.802889
58.88899 -19.855083 -4.917331 -23.274317 -46.55009 ]
[ -9.527256 12.6599245 8.299887 7.276747 -4.600963
19.629663 -6.6183615 -1.6391104 -7.7581053 -15.516697 ]]], shape=(10, 4, 10), dtype=float32)

Related

expand a tensor in Keras

My hare-brained idea is to create a custom layer that allows me to programmatically add features to a model's output.
EDIT - My "O" output values (see image below) are ASCII values. I want the "F"eature nodes to be 1 if the corresponding "O" nodes are alphabetic and 0 otherwise. In a previous experiment, the additional information made training much much better.
class Unpack_and_Categorize(keras.layers.Layer):
def __init__(self, units=32, **kwargs):
super(Unpack_and_Categorize, self).__init__(units, **kwargs)
self.units = units
self.trainable = False
def build(self, input_shape):
self.weight = self.add_weight(
shape=(input_shape[-1], self.units),
trainable=True,
)
self.bias = self.add_weight(
shape=(self.units,), trainable=True, dtype="float32"
)
def call(self, inputs):
batch_size = inputs.shape[0]
one_hot_size = self.units
c = tf.constant([0] * (one_hot_size * batch_size), shape=(batch_size, one_hot_size))
base_out = tf.tensordot(inputs, self.weight, axes = 1) + self.bias
return tf.concat(base_out, c, shape=(batch_size, 2*one_hot_size))
This image shows what I am trying to accomplish. My custom layer (right side) has 3 values that are densely connected to the previous layer. But now I want to add three 3 more output values that are totally derived from the O1..3. For example, I might set Fx to 1 if Ox was an even number. This would be done in the call method.
So the challenge is that I don't want to hardcode the number of outputs. That is, if the input layer has 10 inputs, then the customer layer will have 20 values. (The challenge that follows is 'will it back-prop, or simply explode...)
Here is an example where we see the "A" is categorized with a 1, while the punctuation and numeric are categorized with a 0.
It's a bit difficult to say exactly what you want to do without seeing all your code, but you need to make sure that all dimensions except the one you want to concatenate are the same:
import tensorflow as tf
def call(inputs):
w_init = tf.random_normal_initializer()
w = tf.Variable(
initial_value=w_init(shape=(10, 10), dtype="float32"),
trainable=True,
)
batch_size = tf.shape(inputs)[0]
c = tf.constant([1.0] * (15 * tf.cast(batch_size, tf.float32)), shape=(batch_size, 15))
print('Inputs: ', inputs.shape)
print('C: ', c.shape)
return tf.concat((tf.tensordot(inputs, w, axes = 1) ,c), 1)
batch_size = 5
print('Result: ', call(tf.random.normal((batch_size, 10))).shape)
Inputs: (5, 10)
C: (5, 15)
Result: (5, 25)

PyTorch: Sizes of tensors must match on 2 input neural network

I am attempting to recreate a 2 input neural network from this article: https://towardsdatascience.com/moving-from-keras-to-pytorch-f0d4fff4ce79
I have copied the network described in the post and adjusted it so that it fits my data. The first input is from GloVe Word embeddings while the other is numerical features about the text data.
class Net(nn.Module):
def __init__(self,hidden_size,lin_size, embedding_matrix=embedding_weights):
super(Alex_NeuralNet_Meta, self).__init__()
# Initialize some parameters for your model
self.hidden_size = hidden_size
drp = 0.1
# Layer 1: Embeddings.
self.embedding = nn.Embedding(size_of_vocabulary, pretrained_embedding_dim)
self.embedding.weight = nn.Parameter(torch.tensor(embedding_matrix, dtype=torch.float32))
self.embedding.weight.requires_grad = False
# Layer 2: Dropout1D(0.1)
self.embedding_dropout = nn.Dropout2d(0.1)
# Layer 3: Bidirectional CuDNNLSTM
self.lstm = nn.LSTM(pretrained_embedding_dim, hidden_size, bidirectional=True, batch_first=True)
# Layer 4: Bidirectional CuDNNGRU
self.gru = nn.GRU(hidden_size*2, hidden_size, bidirectional=True, batch_first=True)
# Layer 7: A dense layer
self.linear = nn.Linear(hidden_size*6 + X2_train.shape[1], lin_size)
self.relu = nn.ReLU()
# Layer 8: A dropout layer
self.dropout = nn.Dropout(drp)
# Layer 9: Output dense layer with one output for our Binary Classification problem.
self.out = nn.Linear(lin_size, 1)
def forward(self, x):
'''
here x[0] represents the first element of the input that is going to be passed.
We are going to pass a tuple where first one contains the sequences(x[0])
and the second one is a additional feature vector(x[1])
'''
h_embedding = self.embedding(x[0].long())
h_embedding = torch.squeeze(self.embedding_dropout(torch.unsqueeze(h_embedding, 0)))
#print("emb", h_embedding.size())
h_lstm, _ = self.lstm(h_embedding)
# print("lst",h_lstm.size())
h_gru, hh_gru = self.gru(h_lstm)
hh_gru = hh_gru.view(-1, 2*self.hidden_size )
print("gru", h_gru.size())
print("h_gru", hh_gru.size())
# Layer 5: is defined dynamically as an operation on tensors.
avg_pool = torch.mean(h_gru, 1)
max_pool, _ = torch.max(h_gru, 1)
print("avg_pool", avg_pool.size())
print("max_pool", max_pool.size())
# the extra features you want to give to the model
f = torch.tensor(x[1], dtype=torch.float).cuda()
print("f", f.size())
# Layer 6: A concatenation of the last state, maximum pool, average pool and
# additional features
conc = torch.cat(( hh_gru, avg_pool, max_pool, f), 1)
#print("conc", conc.size())
# passing conc through linear and relu ops
conc = self.relu(self.linear(conc))
conc = self.dropout(conc)
out = self.out(conc)
# return the final output
return out
And during runtime I get an error on the concatenation line:
RuntimeError: Sizes of tensors must match except in dimension 0. Got 33164 and 20 (The offending index is 0)
From the dimensions of the outputs, I can see where the problem lies but I am not sure how I can fix it
The data inputs to the network is:
torch.Size([20, 150])
torch.Size([33164, 40])
The sizes of each layer output is:
gru torch.Size([20, 150, 80])
h_gru torch.Size([20, 80])
avg_pool torch.Size([20, 80])
max_pool torch.Size([20, 80])
f torch.Size([33164, 40])
For the example above the batch size is 20, hidden_size is 40, the number of rows in numerical data features is 33164 and its feature size is 40.
Thanks for any help in advance

Dimensionality of tensor from my WaveNet incompatible with PyTorch cross_entropy function

I've been doing a project regarding making my own WaveNet implementation as Deepmind delivered early in the 2016's in Python.
Preprocessing includes mu law encoding, and one hot encoding. The model itself functions well, my problem lies in the loss function torch.nn.functional.cross_entropy used during training, found here: https://pytorch.org/docs/stable/nn.functional.html
Particularly, the relation between my output and my target tensors, namely
input_tensor.shape = tensor([1, 256, 225332]) # [batch_size, sample_size, audio_length]
output.shape = tensor([1, 256, 225332])
According to F.cross_entropy, I must have output = (N, C) and target = input_tensor = (N).
My supervisor told me to do the following:
output = output.T.reshape(-1, 256) = tensor([225332, 256])
target = input_tensor.T.long() = tensor([225332, 256, 1]) # This needs to be 1-dimensional, help?
For anyone interested in the explicit code, below:
NOTE - the receptive field is not padded, so just for debugging purposes I have subtracted it, while I do know this is not natural.
>>> output.T.reshape(-1, 256).shape
torch.Size([225332, 256])
>>> input_tensor[:, :, model.input_size - model.output_size:].T.shape
torch.Size([225332, 256, 1])
>>> loss = F.cross_entropy(output.T.reshape(-1, 256), input_tensor[:, :, model.input_size - model.output_size:].T.long().to(device))
Traceback (most recent call last):
File "C:\Program Files\JetBrains\PyCharm Community Edition 2020.3.3\plugins\python-ce\helpers\pydev\_pydevd_bundle\pydevd_exec2.py", line 3, in Exec
exec(exp, global_vars, local_vars)
File "<input>", line 1, in <module>
File "C:\Users\JaQtae\anaconda3\envs\CortiGit\lib\site-packages\torch\nn\functional.py", line 2693, in cross_entropy
return nll_loss(log_softmax(input, 1), target, weight, None, ignore_index, None, reduction)
File "C:\Users\JaQtae\anaconda3\envs\CortiGit\lib\site-packages\torch\nn\functional.py", line 2388, in nll_loss
ret = torch._C._nn.nll_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index)
RuntimeError: 1D target tensor expected, multi-target not supported
Somewhat of a novice-in-training with ML and AI, particularly the PyTorch library.
Would appreciate any advice regarding how I should tackle this issue.
The training:
model = Wavenet(layers=3,blocks=2,output_size=32).to(device)
model.apply(initialize) # Initialize causalconv1d() with xavier_uniform_ weights and bias of 0.
model.train()
optimizer = optim.Adam(model.parameters(), lr=0.0003)
for i, batch in tqdm(enumerate(train_loader)):
mu_enc_my_x = encode_mu_law(x=batch, mu=256)
input_tensor = one_hot_encoding(mu_enc_my_x)
input_tensor = input_tensor.to(device)
output = model(input_tensor)
# TODO: Inspect input/output formats, maybe something wrong....
loss = F.cross_entropy(output.T.reshape(-1, 256), input_tensor[:,:,model.input_size - model.output_size:].long().to(device)) # subtract receptive field instead of pad it, workaround for quick debugging of loss-issue.
print("\nLoss:", loss.item())
optimizer.zero_grad()
loss.backward()
optimizer.step()
if i % 1000 == 0:
print("\nSaving model")
torch.save(model.state_dict(), "wavenet.pt")
The purpose is to get my loss function to work properly, so that I can generate sound files. The current ones with my bad loss function obviously return pure noise.
My full model if any help.
"""
Wavenet model
Sources:
https://github.com/kan-bayashi/PytorchWaveNetVocoder/blob/master/wavenet_vocoder/nets/wavenet.py
https://github.com/r9y9/wavenet_vocoder/blob/master/wavenet_vocoder/wavenet.py
https://github.com/Dankrushen/Wavenet-PyTorch/blob/master/wavenet/models.py
https://github.com/vincentherrmann/pytorch-wavenet
"""
from torch import nn
import torch
#TODO: Add local and global conditioning
def initialize(m):
"""
Initialize CNN with Xavier_uniform weight and 0 bias.
"""
if isinstance(m, torch.nn.Conv1d):
nn.init.xavier_uniform_(m.weight)
nn.init.constant_(m.bias, 0.0)
class CausalConv1d(torch.nn.Module):
"""
Causal Convolution for WaveNet
Causality can be introduced with padding as (kernel_size - 1) * dilation (see Keras documentation)
or it can be introduced as follows according to Golbin.
https://github.com/golbin/WaveNet/blob/05545339096c3a1d9909d96fb19da4fbae28d8c6/wavenet/networks.py#L38
Else, look at the following article, several ways to implement it using PyTorch:
https://github.com/pytorch/pytorch/issues/1333
- Jakob
"""
def __init__(self, in_channels, out_channels, kernel_size, dilation = 1, bias = True):
super(CausalConv1d, self).__init__()
# padding=1 for same size(length) between input and output for causal convolution
self.dilation = dilation
self.kernel_size = kernel_size
self.in_channels = in_channels
self.out_channels = out_channels
self.padding = padding = (kernel_size-1) * dilation # kernelsize = 2, -1 * dilation = 1, = 1. - Jakob.
self.conv = torch.nn.Conv1d(in_channels, out_channels,
kernel_size, padding=padding, dilation=dilation,
bias=bias) # Fixed for WaveNet but not sure
def forward(self, x):
output = self.conv(x)
if self.padding != 0:
output = output[:, :, :-self.padding]
return output
class Wavenet(nn.Module):
def __init__(self,
layers=3,
blocks=2,
dilation_channels=32,
residual_block_channels=512,
skip_connection_channels=512,
output_channels=256,
output_size=32,
kernel_size=3
):
super(Wavenet, self).__init__()
self.layers = layers
self.blocks = blocks
self.dilation_channels = dilation_channels
self.residual_block_channels = residual_block_channels
self.skip_connection_channels = skip_connection_channels
self.output_channels = output_channels
self.kernel_size = kernel_size
self.output_size = output_size
# initialize dilation variables
receptive_field = 1
init_dilation = 1
# List of layers and connections
self.dilations = []
self.residual_convs = nn.ModuleList()
self.filter_conv_layers = nn.ModuleList()
self.gate_conv_layers = nn.ModuleList()
self.skip_convs = nn.ModuleList()
# First convolutional layer
self.first_conv = CausalConv1d(in_channels=self.output_channels,
out_channels=residual_block_channels,
kernel_size = 2)
# Building the Modulelists for the residual blocks
for b in range(blocks):
additional_scope = kernel_size - 1
new_dilation = 1
for i in range(layers):
# dilations of this layer
self.dilations.append((new_dilation, init_dilation))
# dilated convolutions
self.filter_conv_layers.append(nn.Conv1d(in_channels=residual_block_channels, out_channels=dilation_channels, kernel_size=kernel_size, dilation=new_dilation))
self.gate_conv_layers.append(nn.Conv1d(in_channels=residual_block_channels, out_channels=dilation_channels, kernel_size=kernel_size, dilation=new_dilation))
# 1x1 convolution for residual connection
self.residual_convs.append(nn.Conv1d(in_channels=dilation_channels, out_channels=residual_block_channels, kernel_size=1))
# 1x1 convolution for skip connection
self.skip_convs.append(nn.Conv1d(in_channels=dilation_channels,
out_channels=skip_connection_channels,
kernel_size=1))
# Update receptive field and dilation
receptive_field += additional_scope
additional_scope *= 2
init_dilation = new_dilation
new_dilation *= 2
# Last two convolutional layers
self.last_conv_1 = nn.Conv1d(in_channels=skip_connection_channels,
out_channels=skip_connection_channels,
kernel_size=1)
self.last_conv_2 = nn.Conv1d(in_channels=skip_connection_channels,
out_channels=output_channels,
kernel_size=1)
#Calculate model receptive field and the required input size for the given output size
self.receptive_field = receptive_field
self.input_size = receptive_field + output_size - 1
def forward(self, input):
# Feed first convolutional layer with input
x = self.first_conv(input)
# Initialize skip connection
skip = 0
# Residual block
for i in range(self.blocks * self.layers):
(dilation, init_dilation) = self.dilations[i]
# Residual connection bypassing dilated convolution block
residual = x
# input to dilated convolution block
filter = self.filter_conv_layers[i](x)
filter = torch.tanh(filter)
gate = self.gate_conv_layers[i](x)
gate = torch.sigmoid(gate)
x = filter * gate
# Feed into 1x1 convolution for skip connection
s = self.skip_convs[i](x)
#Adding skip & Match size with decreasing dimensionality of x
if skip is not 0:
skip = skip[:, :, -s.size(2):]
skip = s + skip # Sum all skip connections
# Feed into 1x1 convolution for residual connection
x = self.residual_convs[i](x)
#Adding Residual & Match size with decreasing dimensionality of x
x = x + residual[:, :, dilation * (self.kernel_size - 1):]
# print(x.shape)
x = torch.relu(skip)
#Last conv layers
x = torch.relu(self.last_conv_1(x))
x = self.last_conv_2(x)
soft = torch.nn.Softmax(dim=1)
x = soft(x)
return x
EDIT: added code snippet of train for clarity, and full model

Why return self.head(x.view(x.size(0), -1)) in the nn.Module for pyTorch reinforcement learning example

I understand that the balancing the pole example requires 2 outputs. Reinforcement Learning (DQN) Tutorial
Here is the output for self.head
print ('x',self.head)
x = Linear(in_features=512, out_features=2, bias=True)
When I run the epochs below is the outputs:
print (self.head(x.view(x.size(0), -1)))
return self.head(x.view(x.size(0), -1))
tensor([[-0.6945, -0.1930]])
tensor([[-0.0195, -0.1452]])
tensor([[-0.0906, -0.1816]])
tensor([[ 0.0631, -0.9051]])
tensor([[-0.0982, -0.5109]])
...
The size of x is:
x = torch.Size([121, 32, 2, 8])
So I am trying to understand what x.view(x.size(0), -1) is doing?
I understand from the comment in the code that it's returning:
Returns tensor([[left0exp,right0exp]...]).
But how does x which is torch.Size([121, 32, 2, 8]) being reduced to a tensor of size 2?
Is there an alternative way of writing that makes more sense? What if I had 4 outputs. How would I represent that? Why x.size(0). Why -1?
So appears to take self.head with 4 outputs to 2 outputs. Is that correct?
At the bottom is that class I am referring:
class DQN(nn.Module):
def __init__(self, h, w, outputs):
super(DQN, self).__init__()
self.conv1 = nn.Conv2d(3, 16, kernel_size=5, stride=2)
self.bn1 = nn.BatchNorm2d(16)
self.conv2 = nn.Conv2d(16, 32, kernel_size=5, stride=2)
self.bn2 = nn.BatchNorm2d(32)
self.conv3 = nn.Conv2d(32, 32, kernel_size=5, stride=2)
self.bn3 = nn.BatchNorm2d(32)
# Number of Linear input connections depends on output of conv2d layers
# and therefore the input image size, so compute it.
def conv2d_size_out(size, kernel_size = 5, stride = 2):
return (size - (kernel_size - 1) - 1) // stride + 1
convw = conv2d_size_out(conv2d_size_out(conv2d_size_out(w)))
convh = conv2d_size_out(conv2d_size_out(conv2d_size_out(h)))
linear_input_size = convw * convh * 32
self.head = nn.Linear(linear_input_size, outputs)
# Called with either one element to determine next action, or a batch
# during optimization. Returns tensor([[left0exp,right0exp]...]).
def forward(self, x):
x = F.relu(self.bn1(self.conv1(x)))
x = F.relu(self.bn2(self.conv2(x)))
x = F.relu(self.bn3(self.conv3(x)))
return self.head(x.view(x.size(0), -1))
x.view(x.size(0), -1) is flattening the tensor, this is because the Linear layer only accepts a vector (1d array). To break it down, x.view() reshapes the tensor of the specified shape (more info). x.shape(0) returns 1st dimension of the tensor (which is the batch size, this should remain the constant). The -1 in x.view() is a filler, in other words, its dimensions that we don't know, so PyTorch automatically calculates it. For example, if x = torch.tensor([1,2,3,4]), to reshape the tensor to a 2x2, you could do x.view(2,2) or x.view(2,-1) or x.view(-1,2).
The output shape is not a tensor shape of 2, but that of 121,2 (the 121 is the batch size, and the 2 comes from the Linear layers output). So to change the output size from 2, to 4, you would have to change the outputs argument in the __init__ function to 4.

Finding the sum or mean of a 3d matrix with variable length in tensorflow

I have to do a averaging of 3d Tensor, where first dimension represents batch_size , second dimension reporesents max_length of sentence ( time axis ) in the batch and last dimension represents the embedding dimension. Those who are familiar with lstm, it is obtained by tf.nn.emebedding_lookup
For example:
Assume I have 3 sentences
[ [i, love, you,], [i, don't, love, you,], [i, always, love, you, so, much ]]
Here batch_size = 3, max_length = 6 (3rd sentence ) and assume embedding dimension = 100. Normally, we will pad the first 2 sentences to match the max_length. Now, I need to average the word embeddings of each word. But, if I am using tf.reduce_sum, it will consider those padded vectors into consideration for the first two sentences, which is wrong. Is there an efficient way to do this in tensorflow.
A possible solution consists in passing the lengths of the original sentences (the ones without padding) to the model. In this way we can compute correct average embedding for each sentence.
In the preprocessing phase (when you generate your sentences), keep track of the length of each sentence. Suppose you generate the sentences with a generate_batch function, then:
batch = generate_batch(...)
batch_sentences = batch["sentences"] # [[i, love, you], [i, don't, love, you], ...]
batch_sentence_lengths = batch["sentence_lengths"] # [3, 4, ...]
Now you can feed the sentences and their lengths to the model:
with tf.Session(...) as sess:
...
(loss, ) = sess.run(
[loss],
feed_dict = {
sentences: batch_sentences,
sentence_lengths: batch_sentence_lengths,
...
})
...
You can use the length of each sentence in your model now:
...
# sentence_lengths is a sequence of integers: convert it to a sequence of floats
# sentence_lengths_float.shape = sentence_lengths.shape = (batch_size, )
sentence_lengths_float = tf.cast(sentence_lengths, tf.float32)
# Compute the sum of the embeddings for each sentence.
# If sentence_embeddings.shape = (batch_size, max_sentence_length, embedding_size), then sentence_axis = 1
# embeddings_sum_for_each_sentence.shape = (batch_size, embeddings_size)
embeddings_sum_for_each_sentence = tf.reduce_sum(sentence_embeddings, axis=sentence_axis)
# tf.div(a, b) divides each element of the last dimension of a by each element of b as long as the a.shape[-1] = n and b.shape = (1, n). See broadcasting in tf.
# If a is matrix, then tf.div divides each element of a row by the corresponding element in b. But we want a column-wise division, so we need to transpose a first.
# embeddings_avg_for_each_sentence_t.shape = (embedding_size, batch_size)
embeddings_avg_for_each_sentence_t = tf.div(tf.transpose(embeddings_sum_for_each_sentence), sentence_lengths_float)
# Finally we need to tranpose the result again.
# embeddings_avg_for_each_sentence.shape = (batch_size, embedding_size)
embeddings_avg_for_each_sentence = tf.tranpose(embeddings_avg_for_each_sentence_t)
...
The way to do this is as follows. It is a bit complicated but works fine.
Some functions are obtained from https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/ops/rnn.py . I recommend to do the imports same as above code. The main code is as follows
def _dynamic_average_loop(inputs,
initial_state,
parallel_iterations,
swap_memory,
sequence_length=None,
dtype=None):
state = initial_state
assert isinstance(parallel_iterations, int), "parallel_iterations must be int"
flat_input = nest.flatten(inputs)
embedding_dimension = tf.shape(inputs)[2]
flat_output_size = [embedding_dimension]
# Construct an initial output
input_shape = array_ops.shape(flat_input[0])
time_steps = input_shape[0]
batch_size = _best_effort_input_batch_size(flat_input)
inputs_got_shape = tuple(input_.get_shape().with_rank_at_least(3)
for input_ in flat_input)
const_time_steps, const_batch_size = inputs_got_shape[0].as_list()[:2]
for shape in inputs_got_shape:
if not shape[2:].is_fully_defined():
raise ValueError(
"Input size (depth of inputs) must be accessible via shape inference,"
" but saw value None.")
got_time_steps = shape[0].value
got_batch_size = shape[1].value
if const_time_steps != got_time_steps:
raise ValueError(
"Time steps is not the same for all the elements in the input in a "
"batch.")
if const_batch_size != got_batch_size:
raise ValueError(
"Batch_size is not the same for all the elements in the input.")
# Prepare dynamic conditional copying of state & output
def _create_zero_arrays(size):
size = _concat(batch_size, size)
return array_ops.zeros(
array_ops.stack(size), _infer_state_dtype(dtype, state))
flat_zero_output = tuple(_create_zero_arrays(output)
for output in flat_output_size)
zero_output = nest.pack_sequence_as(structure=embedding_dimension,
flat_sequence=flat_zero_output)
if sequence_length is not None:
min_sequence_length = math_ops.reduce_min(sequence_length)
max_sequence_length = math_ops.reduce_max(sequence_length)
else:
max_sequence_length = time_steps
time = array_ops.constant(0, dtype=dtypes.int32, name="time")
with ops.name_scope("dynamic_rnn") as scope:
base_name = scope
def _create_ta(name, element_shape, dtype):
return tensor_array_ops.TensorArray(dtype=dtype,
size=time_steps,
element_shape=element_shape,
tensor_array_name=base_name + name)
in_graph_mode = not context.executing_eagerly()
if in_graph_mode:
output_ta = tuple(
_create_ta(
"output_%d" % i,
element_shape=(tensor_shape.TensorShape([const_batch_size])
.concatenate(
_maybe_tensor_shape_from_tensor(out_size))),
dtype=_infer_state_dtype(dtype, state))
for i, out_size in enumerate(flat_output_size))
input_ta = tuple(
_create_ta(
"input_%d" % i,
element_shape=flat_input_i.shape[1:],
dtype=flat_input_i.dtype)
for i, flat_input_i in enumerate(flat_input))
input_ta = tuple(ta.unstack(input_)
for ta, input_ in zip(input_ta, flat_input))
else:
output_ta = tuple([0 for _ in range(time_steps.numpy())]
for i in range(len(flat_output_size)))
input_ta = flat_input
def tf_average(A, B):
return A+B
def _time_step(time, output_ta_t, state):
input_t = tuple(ta.read(time) for ta in input_ta)
# Restore some shape information
for input_, shape in zip(input_t, inputs_got_shape):
input_.set_shape(shape[1:])
input_t = nest.pack_sequence_as(structure=inputs, flat_sequence=input_t)
flat_state = nest.flatten(state)
flat_zero_output = nest.flatten(zero_output)
# Vector describing which batch entries are finished.
copy_cond = time >= sequence_length
def _copy_one_through(output, new_output):
# Otherwise propagate the old or the new value.
with ops.colocate_with(new_output):
return array_ops.where(copy_cond, output, new_output)
the_average = tf_average(input_t, state)
the_average_updated = _copy_one_through(zero_output, the_average)
the_average_last_state = _copy_one_through(state, the_average)
for output, flat_output in zip([the_average_updated], flat_zero_output):
output.set_shape(flat_output.get_shape())
final_output = nest.pack_sequence_as(structure=zero_output, flat_sequence=[the_average_updated])
output_ta_t = tuple(ta.write(time, out) for ta, out in zip(output_ta_t, [final_output]))
return (time + 1, output_ta_t, the_average_last_state)
if in_graph_mode:
# Make sure that we run at least 1 step, if necessary, to ensure
# the TensorArrays pick up the dynamic shape.
loop_bound = math_ops.minimum(
time_steps, math_ops.maximum(1, max_sequence_length))
else:
# Using max_sequence_length isn't currently supported in the Eager branch.
loop_bound = time_steps
_, output_final_ta, final_state = control_flow_ops.while_loop(
cond=lambda time, *_: time < loop_bound,
body=_time_step,
loop_vars=(time, output_ta, state),
parallel_iterations=parallel_iterations,
maximum_iterations=time_steps,
swap_memory=swap_memory)
final_outputs = tuple(ta.stack() for ta in output_final_ta)
# Restore some shape information
for output, output_size in zip(final_outputs, flat_output_size):
shape = _concat(
[const_time_steps, const_batch_size], output_size, static=True)
output.set_shape(shape)
final_outputs = nest.pack_sequence_as(structure=embedding_dimension,
flat_sequence=final_outputs)
return final_outputs , final_state
def dynamic_average(inputs, sequence_length=None, initial_state=None,
dtype=None, parallel_iterations=None, swap_memory=False,
time_major=False, scope=None):
with vs.variable_scope(scope or "rnn") as varscope:
# Create a new scope in which the caching device is either
# determined by the parent scope, or is set to place the cached
# Variable using the same placement as for the rest of the RNN.
if _should_cache():
if varscope.caching_device is None:
varscope.set_caching_device(lambda op: op.device)
# By default, time_major==False and inputs are batch-major: shaped
# [batch, time, depth]
# For internal calculations, we transpose to [time, batch, depth]
flat_input = nest.flatten(inputs)
embedding_dimension = tf.shape(inputs)[2]
if not time_major:
# (B,T,D) => (T,B,D)
flat_input = [ops.convert_to_tensor(input_) for input_ in flat_input]
flat_input = tuple(_transpose_batch_time(input_) for input_ in flat_input)
parallel_iterations = parallel_iterations or 32
if sequence_length is not None:
sequence_length = math_ops.to_int32(sequence_length)
if sequence_length.get_shape().ndims not in (None, 1):
raise ValueError(
"sequence_length must be a vector of length batch_size, "
"but saw shape: %s" % sequence_length.get_shape())
sequence_length = array_ops.identity( # Just to find it in the graph.
sequence_length, name="sequence_length")
batch_size = _best_effort_input_batch_size(flat_input)
state = tf.zeros(shape=(batch_size, embedding_dimension))
def _assert_has_shape(x, shape):
x_shape = array_ops.shape(x)
packed_shape = array_ops.stack(shape)
return control_flow_ops.Assert(
math_ops.reduce_all(math_ops.equal(x_shape, packed_shape)),
["Expected shape for Tensor %s is " % x.name,
packed_shape, " but saw shape: ", x_shape])
if not context.executing_eagerly() and sequence_length is not None:
# Perform some shape validation
with ops.control_dependencies(
[_assert_has_shape(sequence_length, [batch_size])]):
sequence_length = array_ops.identity(
sequence_length, name="CheckSeqLen")
inputs = nest.pack_sequence_as(structure=inputs, flat_sequence=flat_input)
(outputs, final_state) = _dynamic_average_loop(
inputs,
state,
parallel_iterations=parallel_iterations,
swap_memory=swap_memory,
sequence_length=sequence_length,
dtype=dtype)
if not time_major:
outputs = nest.map_structure(_transpose_batch_time, outputs)
return outputs, final_state
This is the main code. So to find the sum of a 3D matrix, of variable length as in RNN, we can test it as follows
tf.reset_default_graph()
the_inputs = np.random.uniform(-1,1,(30,50,111)).astype(np.float32)
the_length = np.random.randint(50, size=30)
the_input_tensor = tf.convert_to_tensor(the_inputs)
the_length_tensor = tf.convert_to_tensor(the_length)
outputs, final_state = dynamic_average(inputs=the_input_tensor,
sequence_length=the_length_tensor)
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
outputs_result , final_state_result = sess.run((outputs, final_state))
print("Testing")
for index in range(len(the_inputs)):
print(the_inputs[index,:,:][:the_length[index]].sum(axis=0) == final_state_result[index])
print('------------------------------------------------------------------')

Categories