keras Attention: Incompatible shapes: [32,2] vs. [1200,2] - python

I am trying to add Attention layer to my model for text clasiffication.
but I get an error after adding the layer and then fitting the model.
here is my code:
model = Sequential()
for i in range(len(kernel_size)):
model.add(Conv1D(filters=nb_filter, kernel_size=kernel_size[i], padding='valid', activation='relu',
input_shape=(data_batch_size, emb_dim)))
model.add(MaxPooling1D(pool_size=pool_size))
model.add(Bidirectional(LSTM(units=lstm_out, return_sequences=True), merge_mode='concat',
input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Bidirectional(LSTM(units=lstm_out, go_backwards=True)))
# ------------------------------------------------------------------------------------------------------------
# ------------------------------------------------------------------------------------------------------------
model.add(Attention(return_sequences=True))
# ------------------------------------------------------------------------------------------------------------
# ------------------------------------------------------------------------------------------------------------
model.add(Dropout(DropoutP))
model.add(Dense(cat_output, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
Y_tmp = np.zeros([Y_train.shape[0], 2])
Y_tmp[:, 0] = 2 - Y_train
Y_tmp[:, 1] = Y_train - 1
Y_train = Y_tmp
history = model.fit(X_train, Y_train, validation_split=test_size, epochs=nb_epoch, verbose=1,
callbacks=[EarlyStopping(monitor='val_accuracy', patience=0, restore_best_weights=True)])
And This is the Attention class:
class Attention(Layer):
def __init__(self, return_sequences=True):
self.return_sequences = return_sequences
super(Attention, self).__init__()
def build(self, input_shape):
self.W = self.add_weight(name="att_weight", shape=(input_shape[-1], 1), initializer="normal")
self.b = self.add_weight(name="att_bias", shape=(input_shape[1], 1), initializer="zeros")
super(Attention, self).build(input_shape)
def call(self, x):
e = K.tanh(K.dot(x, self.W) + self.b)
a = K.softmax(e, axis=1)
output = x * a
if self.return_sequences:
return output
return K.sum(output, axis=1)
And this is the error: Incompatible shapes: [32,2] vs. [1200,2]
What am I doing wrong?

There is a problem with :
self.b = self.add_weight(name="att_bias", shape=(input_shape[1], 1), initializer="zeros")
which should be :
self.b = self.add_weight(name="att_bias", shape=(1,), initializer="zeros")
In fact, you are redefining the Dense layer. To see for yourself, you can look at the custom Linear layer in layers and models via sub-classing.
The custom attention layer is actually what you want using the Dense layers and is more general (a Bahdanau attention layer).

Related

tf.GradientTape giving None gradient

I'm trying to write a custom training loop. After creating the model, I have added some extra trainable parameter to some layers of my model. I have used these extra parameters to update my original parameter on every forward pass. But when I'm calculating the gradient, it's giving None for the extra parameter that i have added last. Code is given below:
model = Sequential()
model.add(tf.keras.layers.Flatten(input_shape=(1,1)))
model.add(Dense(1, activation='relu'))
model.add(Dense(2, activation='softmax'))
model.layers[1].add_weight(name="x1", shape=(1,), initializer=tf.keras.initializers.Constant(value=1.0),trainable=True)
dataset = tf.data.Dataset.from_tensor_slices((feature, labels))
for i, (x_batch_train, y_batch_train) in enumerate(dataset):
with tf.GradientTape() as tape:
for par in model.layers[1].trainable_weights:
if "x1" in par.name:
bits = tf.convert_to_tensor(par)
for par in model.layers[1].trainable_weights:
if "kernel" in par.name:
par = bits + 1.0
x = model(x_batch_train, training = True)
loss = tf.keras.losses.SparseCategoricalCrossentropy(y_batch_train, x)
val = tape.gradient(loss, model.trainable_weights)
for v in val:
print(v)
Here, I have added one extra parameter called x1 and it's updating the kernel of Dense layer. But I'm getting None gradient for x1 parameter. The output is:
tf.Tensor([[0.]], shape=(1, 1), dtype=float32)
tf.Tensor([-0.], shape=(1,), dtype=float32)
None
tf.Tensor([[0. 0.]], shape=(1, 2), dtype=float32)
tf.Tensor([-0.5 0.5], shape=(2,), dtype=float32)
Why it's happening?
The problem is that the changes you are making to the layer's weights have no direct connection to the output of the model in the context of tf.GradientTape and are therefore not tracked. You could solve this with a simple custom layer:
import tensorflow as tf
class DenseLayer(tf.keras.layers.Layer):
def __init__(self, units=1):
super(DenseLayer, self).__init__()
self.units = units
def build(self, input_shape):
self.w = self.add_weight("kernel",
shape=[int(input_shape[-1]),
self.units], trainable=True)
self.b = self.add_weight(shape=(self.units,), initializer="zeros", trainable=True)
self.bits = self.add_weight(name="x1", shape=[int(input_shape[-1]),
self.units], initializer=tf.keras.initializers.ones(), trainable=True)
def call(self, inputs):
return tf.nn.relu(tf.matmul(inputs, (self.w + self.bits + 1.0)) + self.b)
dense_layer = DenseLayer(1)
model = tf.keras.Sequential()
model.add(tf.keras.layers.Flatten(input_shape=(1,1)))
model.add(dense_layer)
model.add(tf.keras.layers.Dense(2, activation='softmax'))
print(model.summary())
dataset = tf.data.Dataset.from_tensor_slices((tf.random.normal((50, 1, 1)), tf.random.uniform((50, ), maxval=2, dtype=tf.int32))).batch(2)
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy()
optimizer = tf.keras.optimizers.Adam(learning_rate=0.01)
for i, (x_batch_train, y_batch_train) in enumerate(dataset):
with tf.GradientTape() as tape:
y = model(x_batch_train, training = True)
loss = loss_fn(y_batch_train, y)
val = tape.gradient(loss, model.trainable_weights)
for v in val:
print(v)
optimizer.apply_gradients(zip(val, model.trainable_variables))
Your idea is good I didn't extend from the last answer but this question is asked once about the custom layer and that you can do it for lstm by training as model.fit( ... )
It is not about the Gradient Tape.
[ Sample - Dense ]:
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Class / Function
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
class MyDenseLayer(tf.keras.layers.Layer):
def __init__(self, num_outputs, num_add):
super(MyDenseLayer, self).__init__()
self.num_outputs = num_outputs
self.num_add = num_add
def build(self, input_shape):
self.kernel = self.add_weight("kernel",
shape=[int(input_shape[-1]),
self.num_outputs])
def call(self, inputs):
temp = tf.add( inputs, self.num_add )
temp = tf.matmul(temp, self.kernel)
return temp
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Model Initialize
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
model = tf.keras.models.Sequential([
tf.keras.layers.InputLayer(input_shape=( 32, 32, 4 )),
tf.keras.layers.Normalization(mean=3., variance=2.),
tf.keras.layers.Normalization(mean=4., variance=6.),
tf.keras.layers.Conv2D(32, (3, 3), activation='relu'),
tf.keras.layers.MaxPooling2D((2, 2)),
tf.keras.layers.Dense(128, activation='relu'),
tf.keras.layers.Reshape((128, 225)),
tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(96, return_sequences=True, return_state=False)),
tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(96)),
])
layer = MyDenseLayer(10, 5)
model.add(layer)
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(192, activation='relu'))
model.add(tf.keras.layers.Dense(10, activation='softmax'))
model.summary()
[ Output ]:

[tensorflow]How do I wrap a sequence of code in to by subclassing the model class ( tf.keras.Model)

I have code sequence as below. There i tried to wrap that code by subclassing using tensorflow model class. However i get following errors. Any help is apprated to solve these errors. Thank you in advance
code sequence
input_tensor = Input(shape=(720, 540, 2))
base_model = ResNet50V2(input_tensor=input_tensor, include_top=False, weights=None, classes=4)
x = base_model.output
x = GlobalAveragePooling2D()(x)
predictions = Dense(4, activation= 'sigmoid')(x)
model = Model(inputs = base_model.input, outputs = predictions)
Attemted model class
class StreoModel(tf.keras.Model):
def __init__(self):
super(StreoModel, self).__init__()
self.dense1 = Dense(4, activation='sigmoid')
def call(self, inputs):
input_tensor = Input(shape=(720, 540, 2))
x = ResNet50V2(input_tensor=input_tensor, include_top=False, weights=None, classes=4)
x= x.output
x = GlobalAveragePooling2D()(x)
predictions = self.dense1(x)
return predictions
Error log:
TypeError: Cannot convert a symbolic Keras input/output to a numpy array. This error may indicate that you're trying to pass a symbolic value to a NumPy call, which is not supported. Or, you may be trying to pass Keras symbolic inputs/outputs to a TF API that does not register dispatching, preventing Keras from automatically converting the API call to a lambda layer in the Functional Model.
I think the problem lies in the way you pass data to your ResNet50V2. Try defining a simple Subclassing model like this:
class StreoModel(tf.keras.Model):
def __init__(self):
super(StreoModel, self).__init__()
self.resnet_v2 = tf.keras.applications.resnet_v2.ResNet50V2(include_top=False, weights=None, classes=4, input_shape=(720, 540, 2))
self.resnet_v2.trainable = True
x= self.resnet_v2.output
x = tf.keras.layers.GlobalAveragePooling2D()(x)
output = tf.keras.layers.Dense(4, activation='softmax')(x)
self.model = tf.keras.Model(self.resnet_v2.input, output)
Note that I removed your input layer and add an input shape to ResNet50V2. According to the docs, you should specify the input_shape if include_top=False. I also changed your output activation function to softmax, since you are dealing with 4 classes.
And then using it:
sm = StreoModel()
sm.model(np.random.random((1, 720, 540, 2)))
# <tf.Tensor: shape=(1, 4), dtype=float32, numpy=array([[0.25427648, 0.25267935, 0.23970276, 0.2533414 ]], dtype=float32)>
If you want to define your Model with a call method, then you can do it like this:
class StreoModel(tf.keras.Model):
def __init__(self):
super(StreoModel, self).__init__()
self.dense = tf.keras.layers.Dense(4, activation='softmax')
self.resnet = tf.keras.applications.resnet_v2.ResNet50V2(include_top=False, weights=None, classes=4, input_shape=(720, 540, 2))
self.pooling = tf.keras.layers.GlobalAveragePooling2D()
def call(self, inputs):
x = self.resnet(inputs)
x = self.pooling(x)
predictions = self.dense(x)
return predictions
And use it like this:
sm = StreoModel()
sm(np.random.random((1, 720, 540, 2)))
# <tf.Tensor: shape=(1, 4), dtype=float32, numpy=array([[0.25062975, 0.2428435 , 0.25178066, 0.25474608]], dtype=float32)>

ValueError: logits and labels must have the same shape ((None, 1) vs ())

I am getting a ValueError: logits and labels must have the same shape ((None, 1) vs ()) when doing a model evaluate. I get the model to train but when I evaluate is when I have the problem. I used a tf.expand_dims for logits but wondering if this needs to be applied to the labels as well?
here is my code below.
import tensorflow as tf
import tensorflow_datasets as tfds
dataset, info = tfds.load('imdb_reviews', with_info=True,
as_supervised=True)
train_dataset, test_dataset = dataset['train'], dataset['test']
BUFFER_SIZE = 10000
BATCH_SIZE = 64
train_dataset = train_dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE).prefetch(1)
VOCAB_SIZE, EMBED_SIZE, NUM_OOV_BUCKETS = 10000, 128, 1000
encoder = tf.keras.layers.experimental.preprocessing.TextVectorization(
max_tokens=VOCAB_SIZE)
encoder.adapt(train_dataset.map(lambda text, label: text))
class AttentionLayer(tf.keras.layers.Layer):
def __init__(self, **kwargs):
super(AttentionLayer, self).__init__(**kwargs)
self.query_layer = tf.keras.layers.Conv1D(
filters=100,
kernel_size=4,
padding='same'
)
self.value_layer = tf.keras.layers.Conv1D(
filters=100,
kernel_size=4,
padding='same'
)
self.attention_layer = tf.keras.layers.Attention()
def call(self, inputs):
query = self.query_layer(inputs)
value = self.value_layer(inputs)
attention = self.attention_layer([query, value])
return tf.keras.layers.concatenate([query, attention])
attention_layer = AttentionLayer()
model1 = tf.keras.models.Sequential([
tf.keras.Input(shape=(),batch_size=1, dtype=tf.string, name='InputLayer'),
encoder,
tf.keras.layers.Embedding(VOCAB_SIZE + NUM_OOV_BUCKETS, EMBED_SIZE, mask_zero=True, name='Embedding_Layer'),
attention_layer,
tf.keras.layers.Conv1D(filters=32, kernel_size=4, padding = 'same', activation = 'relu', name='Conv1DLayer'),
tf.keras.layers.MaxPooling1D(pool_size=2, name='MaxPoolLayer'),
tf.keras.layers.LSTM(64, dropout = 0.2, name='DropoutLayer'),
tf.keras.layers.Dense(250, activation = 'relu', name='DenseLayer'),
tf.keras.layers.Dense(1, activation='sigmoid', name='Output_Layer')
])
model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])
def preprocess_y(x, y):
return x, tf.expand_dims(y, -1)
history1 = model1.fit(
train_dataset.map(preprocess_y),
batch_size=BATCH_SIZE,
epochs=1)
model1.evaluate(test_dataset)
ValueError: logits and labels must have the same shape ((None, 1) vs ())

Why does my keras custom layer fit well on training data but gives bad results on validation?

I am trying to understand how Keras custom layers works, but I am facing a problem with the validation accuracy of my model.
I tried to reproduce a simple convolutional network on MNIST dataset but with a custom layer combining the Conv2D operator and the BatchNormalisation.
First, the data I used :
from keras.datasets import mnist
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train = np.array([x.reshape(28, 28, 1) for x in X_train])
X_test = np.array([x.reshape(28, 28, 1) for x in X_test])
y_train = pd.get_dummies(y_train)
y_test = pd.get_dummies(y_test)
Here is the original implementation which works well :
def get_model():
input_ = Input(shape=(28, 28, 1))
x = Conv2D(filters=64, kernel_size=3, activation="relu", input_shape=(28,28,1))(input_)
x = BatchNormalization()(x)
x = MaxPool2D(pool_size=(2,2))(x)
x = Conv2D(filters=128, kernel_size=3, activation="relu")(input_)
x = BatchNormalization()(x)
x = MaxPool2D(pool_size=(2,2))(x)
x = Conv2D(filters=256, kernel_size=3, activation="relu")(input_)
x = BatchNormalization()(x)
x = MaxPool2D(pool_size=(2,2))(x)
x = Flatten()(x)
x = Dense(128, activation="relu")(x)
x = Dense(64, activation="relu")(x)
x = Dense(10, activation="softmax")(x)
mod = Model(inputs=input_, outputs=x)
return mod
optim = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, clipvalue=K.epsilon())
model = get_model()
model.compile(optimizer=optim, loss='categorical_crossentropy', metrics=["accuracy"])
model.fit(X_train, y_train, batch_size=128, epochs=3, validation_data=(X_test, y_test))
With this initial model, after 3 epochs, I get a train accuracy of 97% and validation 97%
And here is my custom layer :
class Conv2DLayer(Layer):
def __init__(self, filters, kernel_size, dropout_ratio=None, strides=(1, 1), activation="relu", use_bn=True, *args, **kwargs):
self._filters = filters
self._kernel_size = kernel_size
self._dropout_ratio = dropout_ratio
self._strides = strides
self.use_bn = use_bn
self._activation = activation
self._args = args
self._kwargs = kwargs
super(Conv2DLayer, self).__init__(*args, **kwargs)
def build(self, input_shape):
self.conv = Conv2D(self._filters,
kernel_size=self._kernel_size,
activation=self._activation,
strides=self._strides,
input_shape=input_shape,
*self._args,
**self._kwargs)
self.conv.build(input_shape)
self.out_conv_shape = self.conv.compute_output_shape(input_shape)
self._trainable_weights = self.conv._trainable_weights
self._non_trainable_weights = self.conv._non_trainable_weights
if self.use_bn:
self.bn = BatchNormalization()
self.bn.build(self.out_conv_shape)
self._trainable_weights.extend(self.bn._trainable_weights)
self._non_trainable_weights.extend(self.bn._non_trainable_weights)
if self._dropout_ratio is not None:
self.dropout = Dropout(rate=self._dropout_ratio)
self.dropout.build(self.out_conv_shape)
self._trainable_weights.extend(self.dropout._trainable_weights)
self._non_trainable_weights.extend(self.dropout._non_trainable_weights)
super(Conv2DLayer, self).build(input_shape)
def call(self, inputs):
x = self.conv(inputs)
if self.use_bn:
x = self.bn(x)
if self._dropout_ratio is not None:
x = self.dropout(x)
return x
def compute_output_shape(self, input_shape):
return self.out_conv_shape
Finally, here is the modified model :
def get_model():
input_ = Input(shape=(28, 28, 1))
x = Conv2DLayer(filters=64, kernel_size=3, activation="relu")(input_)
x = MaxPool2D(pool_size=(2,2))(x)
x = Conv2DLayer(filters=128, kernel_size=3, activation="relu")(input_)
x = MaxPool2D(pool_size=(2,2))(x)
x = Conv2DLayer(filters=256, kernel_size=3, activation="relu")(input_)
x = MaxPool2D(pool_size=(2,2))(x)
x = Flatten()(x)
x = Dense(128, activation="relu")(x)
x = Dense(64, activation="relu")(x)
x = Dense(10, activation="softmax")(x)
mod = Model(inputs=input_, outputs=x)
return mod
For this model with custom layer, I managed to get the same train accuracy (97%), but the validation accuracy get stuck around 50%.
EDIT
Thanks to Matias Valdenegro, I achieved to solve the problem by modifying the call method :
def call(self, inputs):
training = K.learning_phase()
x = self.conv(inputs)
if self.use_bn:
x = self.bn(x, training=training)
if self._dropout_ratio is not None:
x = self.dropout(x, training=training)
return x
With K the keras.backend module.
Both Dropout and Batch Normalization behave differently during training and testing/inference, and your layer does not have any of that behavior, so its using those inner layers as training mode during inference, producing incorrect results.
I am not sure but I think you can fix this by passing the training parameter in the call function call through the layers, something like:
def call(self, inputs, training=None):
x = self.conv(inputs)
if self.use_bn:
x = self.bn(x, training=training)
if self._dropout_ratio is not None:
x = self.dropout(x, training=training)
return x
This should make the inner layers work differently during train and testing/inference phases.

How to embed my customised tensorflow layer into keras model

Following is the code of one simple example for what I want to implement:
Error: raise TypeError("inputs must be a sequence"), TypeError: inputs must be a sequence
How to solve this to make the program can work? Any help will be appreciated.
from keras.models import Sequential
from keras.layers import LSTM, Dense, Flatten
import numpy as np
from keras.engine.topology import Layer
import tensorflow as tf
class MyLayer(Layer):
def __init__(self, **kwargs):
super(MyLayer, self).__init__(**kwargs)
def build(self, input_shape):
super(MyLayer, self).build(input_shape)
def call(self, x):
"Some other tf function will be put at here"
outputs, state = tf.contrib.rnn.static_rnn(tf.contrib.rnn.LSTMBlockCell(32), x, dtype=tf.float32)
return outputs
def compute_output_shape(self, input_shape):
return input_shape
def get_model(timesteps, data_dim):
model = Sequential()
model.add(LSTM(32, return_sequences=True, input_shape=(timesteps, data_dim)))
model.add(LSTM(32, return_sequences=True))
model.add(MyLayer()) # this is my layer
model.add(Flatten())
model.add(Dense(10, activation='softmax'))
return model
def run_demo():
data_dim = 16
timesteps = 8
num_classes = 10
model = get_model(timesteps, data_dim)
model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])
"""Generate the traning and validation data"""
x_train = np.random.random((1000, timesteps, data_dim))
y_train = np.random.random((1000, num_classes))
x_val = np.random.random((100, timesteps, data_dim))
y_val = np.random.random((100, num_classes))
model.fit(x_train, y_train, batch_size=64, epochs=5, validation_data=(x_val, y_val))
if __name__ == "__main__":
run_demo()
Sorry, I am not too familiar with Recurrent Model.
But I think the problem is input size.
The size into custom layer (?, 8, 32)
but tf.nn.static_rnn require list like
so you need to change the input size then problem will fix.
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Flatten, Dense
class MyLayer(tf.keras.layers.Layer):
def __init__(self, **kwargs):
super(MyLayer, self).__init__(**kwargs)
def build(self, input_shape):
self.cell = tf.nn.rnn_cell.BasicRNNCell(32)
super(MyLayer, self).build(input_shape)
def call(self, x):
"Some other tf function will be put at here"
rnn_inputs = tf.unstack(x, axis=1)
outputs, state = tf.nn.static_rnn(self.cell, rnn_inputs, dtype=tf.float32)
for i in range(len(outputs)):
outputs[i] = tf.expand_dims(outputs[i], axis=1)
outputs = tf.concat(outputs, axis=1)
return outputs
def compute_output_shape(self, input_shape):
return input_shape
def get_model(timesteps, data_dim):
model = Sequential()
model.add(LSTM(32, return_sequences=True, input_shape=(timesteps, data_dim)))
model.add(LSTM(32, return_sequences=True))
model.add(MyLayer()) # this is my layer
model.add(Flatten())
model.add(Dense(10, activation='softmax'))
return model
def run_demo():
data_dim = 16
timesteps = 8
num_classes = 10
model = get_model(timesteps, data_dim)
model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])
"""Generate the traning and validation data"""
x_train = np.random.random((1000, timesteps, data_dim))
y_train = np.random.random((1000, num_classes))
x_val = np.random.random((100, timesteps, data_dim))
y_val = np.random.random((100, num_classes))
model.fit(x_train, y_train, batch_size=64, epochs=5, validation_data=(x_val, y_val))
if __name__ == "__main__":
run_demo()

Categories