I'm trying to write a custom training loop. After creating the model, I have added some extra trainable parameter to some layers of my model. I have used these extra parameters to update my original parameter on every forward pass. But when I'm calculating the gradient, it's giving None for the extra parameter that i have added last. Code is given below:
model = Sequential()
model.add(tf.keras.layers.Flatten(input_shape=(1,1)))
model.add(Dense(1, activation='relu'))
model.add(Dense(2, activation='softmax'))
model.layers[1].add_weight(name="x1", shape=(1,), initializer=tf.keras.initializers.Constant(value=1.0),trainable=True)
dataset = tf.data.Dataset.from_tensor_slices((feature, labels))
for i, (x_batch_train, y_batch_train) in enumerate(dataset):
with tf.GradientTape() as tape:
for par in model.layers[1].trainable_weights:
if "x1" in par.name:
bits = tf.convert_to_tensor(par)
for par in model.layers[1].trainable_weights:
if "kernel" in par.name:
par = bits + 1.0
x = model(x_batch_train, training = True)
loss = tf.keras.losses.SparseCategoricalCrossentropy(y_batch_train, x)
val = tape.gradient(loss, model.trainable_weights)
for v in val:
print(v)
Here, I have added one extra parameter called x1 and it's updating the kernel of Dense layer. But I'm getting None gradient for x1 parameter. The output is:
tf.Tensor([[0.]], shape=(1, 1), dtype=float32)
tf.Tensor([-0.], shape=(1,), dtype=float32)
None
tf.Tensor([[0. 0.]], shape=(1, 2), dtype=float32)
tf.Tensor([-0.5 0.5], shape=(2,), dtype=float32)
Why it's happening?
The problem is that the changes you are making to the layer's weights have no direct connection to the output of the model in the context of tf.GradientTape and are therefore not tracked. You could solve this with a simple custom layer:
import tensorflow as tf
class DenseLayer(tf.keras.layers.Layer):
def __init__(self, units=1):
super(DenseLayer, self).__init__()
self.units = units
def build(self, input_shape):
self.w = self.add_weight("kernel",
shape=[int(input_shape[-1]),
self.units], trainable=True)
self.b = self.add_weight(shape=(self.units,), initializer="zeros", trainable=True)
self.bits = self.add_weight(name="x1", shape=[int(input_shape[-1]),
self.units], initializer=tf.keras.initializers.ones(), trainable=True)
def call(self, inputs):
return tf.nn.relu(tf.matmul(inputs, (self.w + self.bits + 1.0)) + self.b)
dense_layer = DenseLayer(1)
model = tf.keras.Sequential()
model.add(tf.keras.layers.Flatten(input_shape=(1,1)))
model.add(dense_layer)
model.add(tf.keras.layers.Dense(2, activation='softmax'))
print(model.summary())
dataset = tf.data.Dataset.from_tensor_slices((tf.random.normal((50, 1, 1)), tf.random.uniform((50, ), maxval=2, dtype=tf.int32))).batch(2)
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy()
optimizer = tf.keras.optimizers.Adam(learning_rate=0.01)
for i, (x_batch_train, y_batch_train) in enumerate(dataset):
with tf.GradientTape() as tape:
y = model(x_batch_train, training = True)
loss = loss_fn(y_batch_train, y)
val = tape.gradient(loss, model.trainable_weights)
for v in val:
print(v)
optimizer.apply_gradients(zip(val, model.trainable_variables))
Your idea is good I didn't extend from the last answer but this question is asked once about the custom layer and that you can do it for lstm by training as model.fit( ... )
It is not about the Gradient Tape.
[ Sample - Dense ]:
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Class / Function
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
class MyDenseLayer(tf.keras.layers.Layer):
def __init__(self, num_outputs, num_add):
super(MyDenseLayer, self).__init__()
self.num_outputs = num_outputs
self.num_add = num_add
def build(self, input_shape):
self.kernel = self.add_weight("kernel",
shape=[int(input_shape[-1]),
self.num_outputs])
def call(self, inputs):
temp = tf.add( inputs, self.num_add )
temp = tf.matmul(temp, self.kernel)
return temp
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Model Initialize
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
model = tf.keras.models.Sequential([
tf.keras.layers.InputLayer(input_shape=( 32, 32, 4 )),
tf.keras.layers.Normalization(mean=3., variance=2.),
tf.keras.layers.Normalization(mean=4., variance=6.),
tf.keras.layers.Conv2D(32, (3, 3), activation='relu'),
tf.keras.layers.MaxPooling2D((2, 2)),
tf.keras.layers.Dense(128, activation='relu'),
tf.keras.layers.Reshape((128, 225)),
tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(96, return_sequences=True, return_state=False)),
tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(96)),
])
layer = MyDenseLayer(10, 5)
model.add(layer)
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(192, activation='relu'))
model.add(tf.keras.layers.Dense(10, activation='softmax'))
model.summary()
[ Output ]:
Related
I am trying to add Attention layer to my model for text clasiffication.
but I get an error after adding the layer and then fitting the model.
here is my code:
model = Sequential()
for i in range(len(kernel_size)):
model.add(Conv1D(filters=nb_filter, kernel_size=kernel_size[i], padding='valid', activation='relu',
input_shape=(data_batch_size, emb_dim)))
model.add(MaxPooling1D(pool_size=pool_size))
model.add(Bidirectional(LSTM(units=lstm_out, return_sequences=True), merge_mode='concat',
input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Bidirectional(LSTM(units=lstm_out, go_backwards=True)))
# ------------------------------------------------------------------------------------------------------------
# ------------------------------------------------------------------------------------------------------------
model.add(Attention(return_sequences=True))
# ------------------------------------------------------------------------------------------------------------
# ------------------------------------------------------------------------------------------------------------
model.add(Dropout(DropoutP))
model.add(Dense(cat_output, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
Y_tmp = np.zeros([Y_train.shape[0], 2])
Y_tmp[:, 0] = 2 - Y_train
Y_tmp[:, 1] = Y_train - 1
Y_train = Y_tmp
history = model.fit(X_train, Y_train, validation_split=test_size, epochs=nb_epoch, verbose=1,
callbacks=[EarlyStopping(monitor='val_accuracy', patience=0, restore_best_weights=True)])
And This is the Attention class:
class Attention(Layer):
def __init__(self, return_sequences=True):
self.return_sequences = return_sequences
super(Attention, self).__init__()
def build(self, input_shape):
self.W = self.add_weight(name="att_weight", shape=(input_shape[-1], 1), initializer="normal")
self.b = self.add_weight(name="att_bias", shape=(input_shape[1], 1), initializer="zeros")
super(Attention, self).build(input_shape)
def call(self, x):
e = K.tanh(K.dot(x, self.W) + self.b)
a = K.softmax(e, axis=1)
output = x * a
if self.return_sequences:
return output
return K.sum(output, axis=1)
And this is the error: Incompatible shapes: [32,2] vs. [1200,2]
What am I doing wrong?
There is a problem with :
self.b = self.add_weight(name="att_bias", shape=(input_shape[1], 1), initializer="zeros")
which should be :
self.b = self.add_weight(name="att_bias", shape=(1,), initializer="zeros")
In fact, you are redefining the Dense layer. To see for yourself, you can look at the custom Linear layer in layers and models via sub-classing.
The custom attention layer is actually what you want using the Dense layers and is more general (a Bahdanau attention layer).
I am trying to get an Adversarial AutoEncoder going using keras Fit method on a keras.model class
but for some reason it is not working.
Keep in mind that I tried updating encoder and decoder at the same time.
I tried giving the disc loss to the encoder with and without the reconstruction loss
The reconstruction loss stayed the same while encoder disc loss kept increasing as the discriminator's own loss kept dropping.
discriminator = keras.Sequential(
[
keras.Input(shape=(4, 4, 128)),
layers.Flatten(),
layers.Dense(128, activation="relu"),
layers.Dense(128, activation="relu"),
layers.Dense(128, activation="relu"),
layers.Dense(1, activation="sigmoid"),
],
name="discriminator",
)
discriminator.summary()
encoder = keras.Sequential(
[
keras.Input(shape=(28, 28, 1)),
layers.Conv2D(24, 3, activation="relu", strides=2, padding="same"),
layers.Conv2D(48, 3, activation="relu", strides=2, padding="same"),
layers.Conv2D(96, 3, activation="relu", strides=2, padding="same"),
layers.Flatten(),
layers.Dense(4 * 4 * 128, activation="linear"),
layers.Reshape((4, 4, 128)),
],
name="encoder",
)
encoder.summary()
decoder = keras.Sequential(
[
keras.Input(shape=(4, 4, 128)),
layers.Flatten(),
layers.Dense(7 * 7 * 64, activation="relu"),
layers.Reshape((7, 7, 64)),
layers.Conv2DTranspose(64, 3, activation="relu", strides=2, padding="same"),
layers.Conv2DTranspose(32, 3, activation="relu", strides=2, padding="same"),
layers.Conv2DTranspose(1, 3, activation="sigmoid", strides=1, padding="same"),
],
name="decoder",
)
I am not sure If it is in the model itself of not. I am using MNIST Dataset for this
class AAE(keras.Model):
def __init__(self, encoder, decoder, discriminator):
super(AAE, self).__init__()
self.encoder = encoder
self.decoder = decoder
self.discriminator = discriminator
self.total_loss_tracker = keras.metrics.Mean(name="total_loss")
self.reconstruction_loss_tracker = keras.metrics.Mean(name="reconstruction_loss")
self.disc_tracker = keras.metrics.Mean(name="disc_loss")
self.discEnc_tracker = keras.metrics.Mean(name="discEnc_loss")
#property
def metrics(self):
return [
self.total_loss_tracker,
self.reconstruction_loss_tracker,
self.disc_tracker,
self.discEnc_tracker,
]
def compile(self, di_optimizer, e_optimizer,de_optimizer, loss_fn):
super(AAE, self).compile()
self.dis_optimizer = di_optimizer
self.e_optimizer = e_optimizer
self.de_optimizer = de_optimizer
self.lossBCE = loss_fn[0]
self.lossMAE = loss_fn[1]
def train_step(self, data):
latent = self.encoder(data)
batch_size = 200
dists = tf.random.normal((batch_size,4,4,128))
y_real = tf.ones((batch_size, 1))
y_fake = tf.zeros((batch_size, 1))
real_dist_mix = tf.concat((dists, latent),axis=0)
y_real_fake_mix = tf.concat((y_real, y_fake),axis=0)
with tf.GradientTape() as tape:
predictions = self.discriminator(real_dist_mix)
d_loss = self.lossBCE(y_real_fake_mix, predictions)
grads = tape.gradient(d_loss, self.discriminator.trainable_weights)
self.dis_optimizer.apply_gradients(zip(grads, self.discriminator.trainable_weights))
with tf.GradientTape() as Etape, tf.GradientTape() as Dtape:
latent = self.encoder(data)
reconstruction = self.decoder(latent)
reconstruction_loss = self.lossMAE(data, reconstruction)
total_loss = reconstruction_loss
Egrads = Etape.gradient(total_loss, self.encoder.trainable_weights)
self.e_optimizer.apply_gradients(zip(Egrads, self.encoder.trainable_weights))
Dgrads = Dtape.gradient(total_loss, self.decoder.trainable_weights)
self.de_optimizer.apply_gradients(zip(Dgrads, self.decoder.trainable_weights))
with tf.GradientTape() as tape:
latent = self.encoder(data)
predictions = self.discriminator(latent)
e_loss = self.lossBCE(y_fake, predictions)
grads = tape.gradient(e_loss, self.encoder.trainable_weights)
self.e_optimizer.apply_gradients(zip(grads, self.encoder.trainable_weights))
self.total_loss_tracker.update_state(total_loss)
self.reconstruction_loss_tracker.update_state(reconstruction_loss)
self.disc_tracker.update_state(d_loss)
self.discEnc_tracker.update_state(e_loss)
return {
"loss": self.total_loss_tracker.result(),
"reconstruction_loss": self.reconstruction_loss_tracker.result(),
"disc_loss": self.disc_tracker.result(),
"discEnc_loss": self.discEnc_tracker.result(),
}
(x_train, _), (x_test, _) = keras.datasets.mnist.load_data()
mnist_digits = np.concatenate([x_train, x_test], axis=0)
mnist_digits = np.expand_dims(mnist_digits, -1).astype("float32") / 255
Aae = AAE(encoder, decoder, discriminator)
#vae.compile(optimizer=keras.optimizers.Adam())
Aae.compile(
di_optimizer=keras.optimizers.Adam(learning_rate=0.00001),
e_optimizer=keras.optimizers.Adam(learning_rate=0.0001),
de_optimizer=keras.optimizers.Adam(learning_rate=0.0001),
loss_fn=[tf.keras.losses.BinaryCrossentropy(),tf.keras.losses.MeanAbsoluteError()]
)
h=Aae.fit(mnist_digits, epochs=15, batch_size=200)
I think that the error is here:
with tf.GradientTape() as tape:
latent = self.encoder(data)
predictions = self.discriminator(latent)
e_loss = self.lossBCE(y_fake, predictions)
grads = tape.gradient(e_loss, self.encoder.trainable_weights)
self.e_optimizer.apply_gradients(zip(grads, self.encoder.trainable_weights))
I would put e_loss = self.lossBCE(y_real, predictions), because the encoder tries to fool the discriminator.
I am getting a ValueError: logits and labels must have the same shape ((None, 1) vs ()) when doing a model evaluate. I get the model to train but when I evaluate is when I have the problem. I used a tf.expand_dims for logits but wondering if this needs to be applied to the labels as well?
here is my code below.
import tensorflow as tf
import tensorflow_datasets as tfds
dataset, info = tfds.load('imdb_reviews', with_info=True,
as_supervised=True)
train_dataset, test_dataset = dataset['train'], dataset['test']
BUFFER_SIZE = 10000
BATCH_SIZE = 64
train_dataset = train_dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE).prefetch(1)
VOCAB_SIZE, EMBED_SIZE, NUM_OOV_BUCKETS = 10000, 128, 1000
encoder = tf.keras.layers.experimental.preprocessing.TextVectorization(
max_tokens=VOCAB_SIZE)
encoder.adapt(train_dataset.map(lambda text, label: text))
class AttentionLayer(tf.keras.layers.Layer):
def __init__(self, **kwargs):
super(AttentionLayer, self).__init__(**kwargs)
self.query_layer = tf.keras.layers.Conv1D(
filters=100,
kernel_size=4,
padding='same'
)
self.value_layer = tf.keras.layers.Conv1D(
filters=100,
kernel_size=4,
padding='same'
)
self.attention_layer = tf.keras.layers.Attention()
def call(self, inputs):
query = self.query_layer(inputs)
value = self.value_layer(inputs)
attention = self.attention_layer([query, value])
return tf.keras.layers.concatenate([query, attention])
attention_layer = AttentionLayer()
model1 = tf.keras.models.Sequential([
tf.keras.Input(shape=(),batch_size=1, dtype=tf.string, name='InputLayer'),
encoder,
tf.keras.layers.Embedding(VOCAB_SIZE + NUM_OOV_BUCKETS, EMBED_SIZE, mask_zero=True, name='Embedding_Layer'),
attention_layer,
tf.keras.layers.Conv1D(filters=32, kernel_size=4, padding = 'same', activation = 'relu', name='Conv1DLayer'),
tf.keras.layers.MaxPooling1D(pool_size=2, name='MaxPoolLayer'),
tf.keras.layers.LSTM(64, dropout = 0.2, name='DropoutLayer'),
tf.keras.layers.Dense(250, activation = 'relu', name='DenseLayer'),
tf.keras.layers.Dense(1, activation='sigmoid', name='Output_Layer')
])
model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])
def preprocess_y(x, y):
return x, tf.expand_dims(y, -1)
history1 = model1.fit(
train_dataset.map(preprocess_y),
batch_size=BATCH_SIZE,
epochs=1)
model1.evaluate(test_dataset)
ValueError: logits and labels must have the same shape ((None, 1) vs ())
I got the following error in training cifar10 data in tensorflow-2. I used this tutorial.
TypeError: Expected float32 passed to parameter 'y' of op 'Equal',
got 'collections' of type 'str' instead. Error: Expected float32, got
'collections' of type 'str' instead.
My code looks like:
class Mymodel(tf.keras.Model):
def __init__(self, class_size):
"""Initialize parameters and build model.
"""
super(Mymodel, self).__init__()
self.class_size =class_size
self.conv1 = tf.keras.layers.Conv2D(32, kernel_size =3, strides =2, activation='relu')
self.conv2 = tf.keras.layers.Conv2D(64, kernel_size =2, strides =2, activation='relu')
self.conv3 = tf.keras.layers.Conv2D(64, kernel_size =2, strides =1, activation='relu')
self.flat = tf.keras.layers.Flatten()
self.d1 = tf.keras.layers.Dense(512, activation='relu')
self.d2 = tf.keras.layers.Dense(128, activation='relu')
self.fd =tf.keras.layers.Dense(self.class_size, activation='softmax')
def call(self, inputs):
x = self.conv1(inputs)
x = self.conv2(x)
x = self.conv3(x)
x = self.flat(x)
x = self.d1(x)
x = self.d2(x)
return self.fd(x)
model = Mymodel(10)
train_images, train_labels), (test_images, test_labels) = datasets.cifar10.load_data()
train_images, test_images = train_images / 255.0, test_images / 255.0
train_ds = tf.data.Dataset.from_tensor_slices(
(train_images, train_labels)).shuffle(1000).batch(32)
test_ds = tf.data.Dataset.from_tensor_slices((test_images, test_labels)).batch(32)
# define the training and testing objects
loss_object = tf.keras.losses.SparseCategoricalCrossentropy()
optimizer = tf.keras.optimizers.Adam()
#tf.function
def train_step(images, labels):
with tf.GradientTape() as tape:
predictions = model(images)
loss = loss_object(labels, predictions)
gradients = tape.gradient(loss, model.trainable_variables)
optimizer.apply_gradients(zip(gradients, model.trainable_variables))
loss(loss)
accuracy(labels, predictions)
tf.function
def test_step(images, labels):
predictions = model(images)
t_loss = loss_object(labels, predictions)
loss(t_loss)
accuracy(labels, predictions)
def train():
EPOCHS = 5
for epoch in range(EPOCHS):
for images, labels in train_ds:
train_step(images, labels)
for test_images, test_labels in test_ds:
test_step(test_images, test_labels)
template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'
print(template.format(epoch+1,
train_loss.result(),
train_accuracy.result()*100,
test_loss.result(),
test_accuracy.result()*100))
# Reset the metrics for the next epoch
train_loss.reset_states()
train_accuracy.reset_states()
test_loss.reset_states()
test_accuracy.reset_states()
train()
It works when I replaced the compile and fit functions.
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
model.fit(train_images, train_labels, batch_size= 200, epochs=6, validation_data=(test_images, test_labels))
Highly appropriated for any help.
set from_logits=True in the loss function.
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
It solves the error!
I think you can use input_shape param first.
self.conv1 = tf.keras.layers.Conv2D(32, kernel_size =3, strides =2, activation='relu', input_shape=(w,h,n_channel)
I am trying to understand how Keras custom layers works, but I am facing a problem with the validation accuracy of my model.
I tried to reproduce a simple convolutional network on MNIST dataset but with a custom layer combining the Conv2D operator and the BatchNormalisation.
First, the data I used :
from keras.datasets import mnist
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train = np.array([x.reshape(28, 28, 1) for x in X_train])
X_test = np.array([x.reshape(28, 28, 1) for x in X_test])
y_train = pd.get_dummies(y_train)
y_test = pd.get_dummies(y_test)
Here is the original implementation which works well :
def get_model():
input_ = Input(shape=(28, 28, 1))
x = Conv2D(filters=64, kernel_size=3, activation="relu", input_shape=(28,28,1))(input_)
x = BatchNormalization()(x)
x = MaxPool2D(pool_size=(2,2))(x)
x = Conv2D(filters=128, kernel_size=3, activation="relu")(input_)
x = BatchNormalization()(x)
x = MaxPool2D(pool_size=(2,2))(x)
x = Conv2D(filters=256, kernel_size=3, activation="relu")(input_)
x = BatchNormalization()(x)
x = MaxPool2D(pool_size=(2,2))(x)
x = Flatten()(x)
x = Dense(128, activation="relu")(x)
x = Dense(64, activation="relu")(x)
x = Dense(10, activation="softmax")(x)
mod = Model(inputs=input_, outputs=x)
return mod
optim = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, clipvalue=K.epsilon())
model = get_model()
model.compile(optimizer=optim, loss='categorical_crossentropy', metrics=["accuracy"])
model.fit(X_train, y_train, batch_size=128, epochs=3, validation_data=(X_test, y_test))
With this initial model, after 3 epochs, I get a train accuracy of 97% and validation 97%
And here is my custom layer :
class Conv2DLayer(Layer):
def __init__(self, filters, kernel_size, dropout_ratio=None, strides=(1, 1), activation="relu", use_bn=True, *args, **kwargs):
self._filters = filters
self._kernel_size = kernel_size
self._dropout_ratio = dropout_ratio
self._strides = strides
self.use_bn = use_bn
self._activation = activation
self._args = args
self._kwargs = kwargs
super(Conv2DLayer, self).__init__(*args, **kwargs)
def build(self, input_shape):
self.conv = Conv2D(self._filters,
kernel_size=self._kernel_size,
activation=self._activation,
strides=self._strides,
input_shape=input_shape,
*self._args,
**self._kwargs)
self.conv.build(input_shape)
self.out_conv_shape = self.conv.compute_output_shape(input_shape)
self._trainable_weights = self.conv._trainable_weights
self._non_trainable_weights = self.conv._non_trainable_weights
if self.use_bn:
self.bn = BatchNormalization()
self.bn.build(self.out_conv_shape)
self._trainable_weights.extend(self.bn._trainable_weights)
self._non_trainable_weights.extend(self.bn._non_trainable_weights)
if self._dropout_ratio is not None:
self.dropout = Dropout(rate=self._dropout_ratio)
self.dropout.build(self.out_conv_shape)
self._trainable_weights.extend(self.dropout._trainable_weights)
self._non_trainable_weights.extend(self.dropout._non_trainable_weights)
super(Conv2DLayer, self).build(input_shape)
def call(self, inputs):
x = self.conv(inputs)
if self.use_bn:
x = self.bn(x)
if self._dropout_ratio is not None:
x = self.dropout(x)
return x
def compute_output_shape(self, input_shape):
return self.out_conv_shape
Finally, here is the modified model :
def get_model():
input_ = Input(shape=(28, 28, 1))
x = Conv2DLayer(filters=64, kernel_size=3, activation="relu")(input_)
x = MaxPool2D(pool_size=(2,2))(x)
x = Conv2DLayer(filters=128, kernel_size=3, activation="relu")(input_)
x = MaxPool2D(pool_size=(2,2))(x)
x = Conv2DLayer(filters=256, kernel_size=3, activation="relu")(input_)
x = MaxPool2D(pool_size=(2,2))(x)
x = Flatten()(x)
x = Dense(128, activation="relu")(x)
x = Dense(64, activation="relu")(x)
x = Dense(10, activation="softmax")(x)
mod = Model(inputs=input_, outputs=x)
return mod
For this model with custom layer, I managed to get the same train accuracy (97%), but the validation accuracy get stuck around 50%.
EDIT
Thanks to Matias Valdenegro, I achieved to solve the problem by modifying the call method :
def call(self, inputs):
training = K.learning_phase()
x = self.conv(inputs)
if self.use_bn:
x = self.bn(x, training=training)
if self._dropout_ratio is not None:
x = self.dropout(x, training=training)
return x
With K the keras.backend module.
Both Dropout and Batch Normalization behave differently during training and testing/inference, and your layer does not have any of that behavior, so its using those inner layers as training mode during inference, producing incorrect results.
I am not sure but I think you can fix this by passing the training parameter in the call function call through the layers, something like:
def call(self, inputs, training=None):
x = self.conv(inputs)
if self.use_bn:
x = self.bn(x, training=training)
if self._dropout_ratio is not None:
x = self.dropout(x, training=training)
return x
This should make the inner layers work differently during train and testing/inference phases.