Problem with training cifar10 data in Tensorflow-2 - python

I got the following error in training cifar10 data in tensorflow-2. I used this tutorial.
TypeError: Expected float32 passed to parameter 'y' of op 'Equal',
got 'collections' of type 'str' instead. Error: Expected float32, got
'collections' of type 'str' instead.
My code looks like:
class Mymodel(tf.keras.Model):
def __init__(self, class_size):
"""Initialize parameters and build model.
"""
super(Mymodel, self).__init__()
self.class_size =class_size
self.conv1 = tf.keras.layers.Conv2D(32, kernel_size =3, strides =2, activation='relu')
self.conv2 = tf.keras.layers.Conv2D(64, kernel_size =2, strides =2, activation='relu')
self.conv3 = tf.keras.layers.Conv2D(64, kernel_size =2, strides =1, activation='relu')
self.flat = tf.keras.layers.Flatten()
self.d1 = tf.keras.layers.Dense(512, activation='relu')
self.d2 = tf.keras.layers.Dense(128, activation='relu')
self.fd =tf.keras.layers.Dense(self.class_size, activation='softmax')
def call(self, inputs):
x = self.conv1(inputs)
x = self.conv2(x)
x = self.conv3(x)
x = self.flat(x)
x = self.d1(x)
x = self.d2(x)
return self.fd(x)
model = Mymodel(10)
train_images, train_labels), (test_images, test_labels) = datasets.cifar10.load_data()
train_images, test_images = train_images / 255.0, test_images / 255.0
train_ds = tf.data.Dataset.from_tensor_slices(
(train_images, train_labels)).shuffle(1000).batch(32)
test_ds = tf.data.Dataset.from_tensor_slices((test_images, test_labels)).batch(32)
# define the training and testing objects
loss_object = tf.keras.losses.SparseCategoricalCrossentropy()
optimizer = tf.keras.optimizers.Adam()
#tf.function
def train_step(images, labels):
with tf.GradientTape() as tape:
predictions = model(images)
loss = loss_object(labels, predictions)
gradients = tape.gradient(loss, model.trainable_variables)
optimizer.apply_gradients(zip(gradients, model.trainable_variables))
loss(loss)
accuracy(labels, predictions)
tf.function
def test_step(images, labels):
predictions = model(images)
t_loss = loss_object(labels, predictions)
loss(t_loss)
accuracy(labels, predictions)
def train():
EPOCHS = 5
for epoch in range(EPOCHS):
for images, labels in train_ds:
train_step(images, labels)
for test_images, test_labels in test_ds:
test_step(test_images, test_labels)
template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'
print(template.format(epoch+1,
train_loss.result(),
train_accuracy.result()*100,
test_loss.result(),
test_accuracy.result()*100))
# Reset the metrics for the next epoch
train_loss.reset_states()
train_accuracy.reset_states()
test_loss.reset_states()
test_accuracy.reset_states()
train()
It works when I replaced the compile and fit functions.
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
model.fit(train_images, train_labels, batch_size= 200, epochs=6, validation_data=(test_images, test_labels))
Highly appropriated for any help.

set from_logits=True in the loss function.
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
It solves the error!

I think you can use input_shape param first.
self.conv1 = tf.keras.layers.Conv2D(32, kernel_size =3, strides =2, activation='relu', input_shape=(w,h,n_channel)

Related

Dropout not reducing loss in pytorch but works fine with keras

I was comparing loss for two simple MLP models with and without dropout on both TF/Keras and Pytorch frameworks (on Keras imdb dataset). But with PyTorch I am not getting the same results as I hoped for and was wondering perhaps what I am doing incorrectly.
# Keras - IMDB Dataset
model = Sequential()
model.add(Dense(16, activation = "relu", input_shape= (10000,)))
model.add(Dropout(0.5)) # comment out this line for no dropout model
model.add(Dense(16, activation = "relu"))
model.add(Dropout(0.5)) # comment out this line for no dropout model
model.add(Dense(1, activation = "sigmoid"))
model.compile(
optimizer = "rmsprop",
loss = "binary_crossentropy",
metrics = ["accuracy"]
)
history = model.fit(
X_train,
y_train,
epochs = 20,
batch_size = 512,
validation_data = (X_val, y_val)
)
The results I obtained in keras (Left figure without dropout and right with dropout)
# Pytorch - same IMDB dataset from keras
class MLP(nn.Module):
def __init__(self, in_dims, l1, l2, out_dims):
super(MLP, self).__init__()
self.fc1 = nn.Linear(in_dims, l1)
self.fc2 = nn.Linear(l1, l2)
self.fc3 = nn.Linear(l2, out_dims)
self.dropout = nn.Dropout(p=0.5)
def forward(self, X):
out = F.relu(self.fc1(X))
out = self.dropout(out) # comment out this line for no dropout model
out = F.relu(self.fc2(out))
out = self.dropout(out) # comment out this line for no dropout model
out = F.sigmoid(self.fc3(out))
return out
model = MLP(10000, 16, 16, 1)
optimizer = optim.RMSprop(model.parameters(), lr = 0.001)
criterion = nn.BCELoss()
min_val_loss = np.inf
losses = []
val_losses = []
accuracy = []
val_accuracy = []
for e in range(0,20):
running_loss = 0
for i,(X_train, y_train) in enumerate(train_loader):
yhat = model.forward(X_train)
loss = criterion(yhat.flatten(), y_train)
running_loss += loss.item()
optimizer.zero_grad()
loss.backward()
optimizer.step()
losses.append(running_loss / (i+1)) #note its i+1 since i starts from 0
model.eval()
with torch.no_grad():
running_val_loss = 0
for i,(X_val, y_val) in enumerate(val_loader):
yhat_val = model.forward(X_val)
val_loss = criterion(yhat_val.flatten(), y_val)
running_val_loss += val_loss.item()
val_losses.append(running_val_loss / (i + 1))
if val_loss < min_val_loss:
best_params = model.state_dict()
min_val_loss = val_loss
print(f"epochs : {e}, train_loss : {loss}, val_loss : {val_loss}")
Figure on the left is the result from no dropout model which has similar results to the keras model. However the one with dropout doesnot have the same behaviour.

Adversarial Autoencoder is not working and not learning properly

I am trying to get an Adversarial AutoEncoder going using keras Fit method on a keras.model class
but for some reason it is not working.
Keep in mind that I tried updating encoder and decoder at the same time.
I tried giving the disc loss to the encoder with and without the reconstruction loss
The reconstruction loss stayed the same while encoder disc loss kept increasing as the discriminator's own loss kept dropping.
discriminator = keras.Sequential(
[
keras.Input(shape=(4, 4, 128)),
layers.Flatten(),
layers.Dense(128, activation="relu"),
layers.Dense(128, activation="relu"),
layers.Dense(128, activation="relu"),
layers.Dense(1, activation="sigmoid"),
],
name="discriminator",
)
discriminator.summary()
encoder = keras.Sequential(
[
keras.Input(shape=(28, 28, 1)),
layers.Conv2D(24, 3, activation="relu", strides=2, padding="same"),
layers.Conv2D(48, 3, activation="relu", strides=2, padding="same"),
layers.Conv2D(96, 3, activation="relu", strides=2, padding="same"),
layers.Flatten(),
layers.Dense(4 * 4 * 128, activation="linear"),
layers.Reshape((4, 4, 128)),
],
name="encoder",
)
encoder.summary()
decoder = keras.Sequential(
[
keras.Input(shape=(4, 4, 128)),
layers.Flatten(),
layers.Dense(7 * 7 * 64, activation="relu"),
layers.Reshape((7, 7, 64)),
layers.Conv2DTranspose(64, 3, activation="relu", strides=2, padding="same"),
layers.Conv2DTranspose(32, 3, activation="relu", strides=2, padding="same"),
layers.Conv2DTranspose(1, 3, activation="sigmoid", strides=1, padding="same"),
],
name="decoder",
)
I am not sure If it is in the model itself of not. I am using MNIST Dataset for this
class AAE(keras.Model):
def __init__(self, encoder, decoder, discriminator):
super(AAE, self).__init__()
self.encoder = encoder
self.decoder = decoder
self.discriminator = discriminator
self.total_loss_tracker = keras.metrics.Mean(name="total_loss")
self.reconstruction_loss_tracker = keras.metrics.Mean(name="reconstruction_loss")
self.disc_tracker = keras.metrics.Mean(name="disc_loss")
self.discEnc_tracker = keras.metrics.Mean(name="discEnc_loss")
#property
def metrics(self):
return [
self.total_loss_tracker,
self.reconstruction_loss_tracker,
self.disc_tracker,
self.discEnc_tracker,
]
def compile(self, di_optimizer, e_optimizer,de_optimizer, loss_fn):
super(AAE, self).compile()
self.dis_optimizer = di_optimizer
self.e_optimizer = e_optimizer
self.de_optimizer = de_optimizer
self.lossBCE = loss_fn[0]
self.lossMAE = loss_fn[1]
def train_step(self, data):
latent = self.encoder(data)
batch_size = 200
dists = tf.random.normal((batch_size,4,4,128))
y_real = tf.ones((batch_size, 1))
y_fake = tf.zeros((batch_size, 1))
real_dist_mix = tf.concat((dists, latent),axis=0)
y_real_fake_mix = tf.concat((y_real, y_fake),axis=0)
with tf.GradientTape() as tape:
predictions = self.discriminator(real_dist_mix)
d_loss = self.lossBCE(y_real_fake_mix, predictions)
grads = tape.gradient(d_loss, self.discriminator.trainable_weights)
self.dis_optimizer.apply_gradients(zip(grads, self.discriminator.trainable_weights))
with tf.GradientTape() as Etape, tf.GradientTape() as Dtape:
latent = self.encoder(data)
reconstruction = self.decoder(latent)
reconstruction_loss = self.lossMAE(data, reconstruction)
total_loss = reconstruction_loss
Egrads = Etape.gradient(total_loss, self.encoder.trainable_weights)
self.e_optimizer.apply_gradients(zip(Egrads, self.encoder.trainable_weights))
Dgrads = Dtape.gradient(total_loss, self.decoder.trainable_weights)
self.de_optimizer.apply_gradients(zip(Dgrads, self.decoder.trainable_weights))
with tf.GradientTape() as tape:
latent = self.encoder(data)
predictions = self.discriminator(latent)
e_loss = self.lossBCE(y_fake, predictions)
grads = tape.gradient(e_loss, self.encoder.trainable_weights)
self.e_optimizer.apply_gradients(zip(grads, self.encoder.trainable_weights))
self.total_loss_tracker.update_state(total_loss)
self.reconstruction_loss_tracker.update_state(reconstruction_loss)
self.disc_tracker.update_state(d_loss)
self.discEnc_tracker.update_state(e_loss)
return {
"loss": self.total_loss_tracker.result(),
"reconstruction_loss": self.reconstruction_loss_tracker.result(),
"disc_loss": self.disc_tracker.result(),
"discEnc_loss": self.discEnc_tracker.result(),
}
(x_train, _), (x_test, _) = keras.datasets.mnist.load_data()
mnist_digits = np.concatenate([x_train, x_test], axis=0)
mnist_digits = np.expand_dims(mnist_digits, -1).astype("float32") / 255
Aae = AAE(encoder, decoder, discriminator)
#vae.compile(optimizer=keras.optimizers.Adam())
Aae.compile(
di_optimizer=keras.optimizers.Adam(learning_rate=0.00001),
e_optimizer=keras.optimizers.Adam(learning_rate=0.0001),
de_optimizer=keras.optimizers.Adam(learning_rate=0.0001),
loss_fn=[tf.keras.losses.BinaryCrossentropy(),tf.keras.losses.MeanAbsoluteError()]
)
h=Aae.fit(mnist_digits, epochs=15, batch_size=200)
I think that the error is here:
with tf.GradientTape() as tape:
latent = self.encoder(data)
predictions = self.discriminator(latent)
e_loss = self.lossBCE(y_fake, predictions)
grads = tape.gradient(e_loss, self.encoder.trainable_weights)
self.e_optimizer.apply_gradients(zip(grads, self.encoder.trainable_weights))
I would put e_loss = self.lossBCE(y_real, predictions), because the encoder tries to fool the discriminator.

tensorflow tensorboard hparams

import tensorflow as tf
from tensorboard.plugins.hparams import api as hp
####### load the model and data here
mnist = tf.keras.datasets.mnist
(x_train, y_train),(x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0
HP_NUM_UNITS = hp.HParam('num_units', hp.Discrete([32,64,128,256, 512]))
HP_DROPOUT = hp.HParam('dropout', hp.RealInterval(0.1, 0.9))
HP_OPTIMIZER = hp.HParam('optimizer', hp.Discrete(['Nadam','SGD','RMSprop','adam','Adagrad']))
HP_L2 = hp.HParam('l2 regularizer', hp.RealInterval(.00001,.01))
HP_LeakyReLU=hp.HParam('alpha', hp.RealInterval(0.1, 0.9))
METRIC_ACCURACY = 'accuracy'
with tf.summary.create_file_writer('raw-img/log/hparam_tuning/').as_default():
hp.hparams_config(
hparams=[HP_NUM_UNITS, HP_DROPOUT, HP_OPTIMIZER,HP_L2,HP_LeakyReLU],
metrics=[hp.Metric(METRIC_ACCURACY, display_name='Accuracy')],
)
def train_test_model(hparams):
model = tf.keras.models.Sequential([
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(hparams[HP_NUM_UNITS], kernel_regularizer=tf.keras.regularizers.l2(0.001)),
tf.keras.layers.LeakyReLU(hparams[HP_LeakyReLU]),
tf.keras.layers.Dropout(hparams[HP_DROPOUT]),
tf.keras.layers.Dense(10, activation='softmax'),
])
model.compile(
optimizer=hparams[HP_OPTIMIZER],
loss='sparse_categorical_crossentropy',
metrics=['accuracy'],
)
model.fit(x_train, y_train, epochs=2)
_, accuracy = model.evaluate(x_test, y_test)
return accuracy
def run(run_dir, hparams):
with tf.summary.create_file_writer(run_dir).as_default():
hp.hparams(hparams) # record the values used in this trial
accuracy = train_test_model(hparams)
tf.summary.scalar(METRIC_ACCURACY, accuracy, step=2)
session_num = 0
for num_units in HP_NUM_UNITS.domain.values:
for dropout_rate in (HP_DROPOUT.domain.min_value, HP_DROPOUT.domain.max_value):
for l2 in (HP_L2.domain.min_value, HP_L2.domain.max_value):
for alpha in (HP_LeakyReLU.domain.min_value, HP_LeakyReLU.domain.max_value):
for optimizer in HP_OPTIMIZER.domain.values:
hparams = {
HP_NUM_UNITS: num_units,
HP_DROPOUT: dropout_rate,
HP_L2: l2,
HP_LeakyReLU:alpha,
HP_OPTIMIZER: optimizer,
}
run_name = "run-%d" % session_num
print('--- Starting trial: %s' % run_name)
print({h.name: hparams[h] for h in hparams})
run('raw-img/log/hparam_tuning/' + run_name, hparams)
session_num += 1
I have tried to use hparams in TF. I have set dropout, l2 and OPTIMIZER.
I need to set value for learning_rate and test it.
What should I do to set learning_rate like dropout and l2 and test it?
I have tried to do this:
model.compile(
optimizer=hparams[HP_OPTIMIZER](lr=0.001),
loss='sparse_categorical_crossentropy',
metrics=['accuracy'],
but it doesn't work. I want to select learning_rate different value of learning_rate like(dropout,l2)
You want to separate the used optimizer into a separate variable:
if hparams[HP_OPTIMIZER] == "SGD":
optimizer = tf.keras.optimizers.SGD(learning_rate=float(hparams[HP_LR]))
elif hparams[HP_OPTIMIZER] == "adam":
optimizer = tf.keras.optimizers.Adam(learning_rate=float(hparams[HP_LR]))
else:
raise ValueError("unexpected optimizer name: %r" % hparams[HP_OPTIMIZER])
model.compile(
optimizer=optimizer,
loss='sparse_categorical_crossentropy',
metrics=['accuracy'],
)
I found the solution here.

ValueError: logits and labels must have the same shape ((None, 1) vs ())

I am getting a ValueError: logits and labels must have the same shape ((None, 1) vs ()) when doing a model evaluate. I get the model to train but when I evaluate is when I have the problem. I used a tf.expand_dims for logits but wondering if this needs to be applied to the labels as well?
here is my code below.
import tensorflow as tf
import tensorflow_datasets as tfds
dataset, info = tfds.load('imdb_reviews', with_info=True,
as_supervised=True)
train_dataset, test_dataset = dataset['train'], dataset['test']
BUFFER_SIZE = 10000
BATCH_SIZE = 64
train_dataset = train_dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE).prefetch(1)
VOCAB_SIZE, EMBED_SIZE, NUM_OOV_BUCKETS = 10000, 128, 1000
encoder = tf.keras.layers.experimental.preprocessing.TextVectorization(
max_tokens=VOCAB_SIZE)
encoder.adapt(train_dataset.map(lambda text, label: text))
class AttentionLayer(tf.keras.layers.Layer):
def __init__(self, **kwargs):
super(AttentionLayer, self).__init__(**kwargs)
self.query_layer = tf.keras.layers.Conv1D(
filters=100,
kernel_size=4,
padding='same'
)
self.value_layer = tf.keras.layers.Conv1D(
filters=100,
kernel_size=4,
padding='same'
)
self.attention_layer = tf.keras.layers.Attention()
def call(self, inputs):
query = self.query_layer(inputs)
value = self.value_layer(inputs)
attention = self.attention_layer([query, value])
return tf.keras.layers.concatenate([query, attention])
attention_layer = AttentionLayer()
model1 = tf.keras.models.Sequential([
tf.keras.Input(shape=(),batch_size=1, dtype=tf.string, name='InputLayer'),
encoder,
tf.keras.layers.Embedding(VOCAB_SIZE + NUM_OOV_BUCKETS, EMBED_SIZE, mask_zero=True, name='Embedding_Layer'),
attention_layer,
tf.keras.layers.Conv1D(filters=32, kernel_size=4, padding = 'same', activation = 'relu', name='Conv1DLayer'),
tf.keras.layers.MaxPooling1D(pool_size=2, name='MaxPoolLayer'),
tf.keras.layers.LSTM(64, dropout = 0.2, name='DropoutLayer'),
tf.keras.layers.Dense(250, activation = 'relu', name='DenseLayer'),
tf.keras.layers.Dense(1, activation='sigmoid', name='Output_Layer')
])
model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])
def preprocess_y(x, y):
return x, tf.expand_dims(y, -1)
history1 = model1.fit(
train_dataset.map(preprocess_y),
batch_size=BATCH_SIZE,
epochs=1)
model1.evaluate(test_dataset)
ValueError: logits and labels must have the same shape ((None, 1) vs ())

Failed to train with tf.keras.applications.MobileNetV2

Environment:
TF2.0
Python 3.5
ubuntu 16.04
Problem:
I try to use the pre-trained mobilenet_V2 but accuracy doesn't increase:
base_model = tf.keras.applications.MobileNetV2(input_shape=IMG_SHAPE,
include_top=False,
weights='imagenet')
The script is copied from the tutorial of the tensorflow 2.0(https://www.tensorflow.org/tutorials/images/transfer_learning?hl=zh-cn)
The only change I made is the dataset which feed into the network. The original code makes binary classification between dogs and cats, and everything works. However, the accuracy never increases while using multi-classes datasets like: "mnist", "tf_flowers". Please note that, I used the correct loss function and metrics.
Naive model and results:
Keras.mobilenetv2:
Here is the code:
from __future__ import absolute_import, division, print_function, unicode_literals
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Flatten, Conv2D, GlobalAveragePooling2D
from tensorflow.keras import Model
keras = tf.keras
import tensorflow_datasets as tfds
# tfds.disable_progress_bar()
IMG_SIZE = 224
IMG_SHAPE = (IMG_SIZE, IMG_SIZE, 3)
def format_example(image, label):
if image.shape[-1] == 1:
image = tf.concat([image, image, image], 2)
image = tf.cast(image, tf.float32)
image = (image/127.5) - 1
image = tf.image.resize(image, (IMG_SIZE, IMG_SIZE))
return image, label
##----functional model----##
class TinyModel():
def __init__(self, num_classes, hiddens=32, input_shape=IMG_SHAPE):
import tensorflow as tf
self.num_classes = num_classes
self.input_shape = input_shape
self.hiddens = hiddens
def build(self):
inputs = Input(shape=self.input_shape)
x = Conv2D(16, 3, activation="relu", strides=2)(inputs)
x = Conv2D(32, 3, activation="relu", strides=2)(x)
x = Conv2D(32, 3, activation="relu", strides=2)(x)
x = Conv2D(16, 3, activation="relu")(x)
x = Flatten()(x)
x = Dense(self.hiddens, activation="relu")(x)
outputs = Dense(self.num_classes, activation="softmax")(x)
model = Model(inputs=inputs, outputs=outputs, name='my_model')
return model
def assemble_model(num_classes, model_name='MobileNetV2'):
import tensorflow as tf
base_model = tf.keras.applications.MobileNetV2(input_shape=IMG_SHAPE,
weights='imagenet',
include_top=False)
model = tf.keras.Sequential([
base_model,
GlobalAveragePooling2D(),
Dense(num_classes, activation='softmax')
])
model.trainable = True
return model
## ---- dataset preparation -----##
SPLIT_WEIGHTS = (8, 1, 1)
splits = tfds.Split.TRAIN.subsplit(weighted=SPLIT_WEIGHTS)
(raw_train, raw_validation, raw_test), metadata = tfds.load(
'tf_flowers', split=list(splits),
with_info=True, as_supervised=True)
get_label_name = metadata.features['label'].int2str
train = raw_train.map(format_example)
validation = raw_validation.map(format_example)
test = raw_test.map(format_example)
BATCH_SIZE = 32
SHUFFLE_BUFFER_SIZE = 1000
train_ds = train.shuffle(SHUFFLE_BUFFER_SIZE).batch(BATCH_SIZE)
validation_ds = validation.batch(BATCH_SIZE)
test_ds = test.batch(BATCH_SIZE)
IMG_SHAPE = (IMG_SIZE, IMG_SIZE, 3)
## ----- model config ---- ##
# Create an instance of the model
model = TinyModel(num_classes=5).build() # model 1
# model = assemble_model(num_classes=5) # model 2
model.summary()
## ----- training config -----##
loss_object = tf.keras.losses.SparseCategoricalCrossentropy()
optimizer = tf.keras.optimizers.Adam()
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')
test_loss = tf.keras.metrics.Mean(name='test_loss')
test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')
## ----- training loop -----##
#tf.function
def train_step(images, labels):
with tf.GradientTape() as tape:
predictions = model(images)
loss = loss_object(labels, predictions)
gradients = tape.gradient(loss, model.trainable_variables)
optimizer.apply_gradients(zip(gradients, model.trainable_variables))
train_loss(loss)
train_accuracy(labels, predictions)
#tf.function
def test_step(images, labels):
predictions = model(images)
t_loss = loss_object(labels, predictions)
test_loss(t_loss)
test_accuracy(labels, predictions)
EPOCHS = 5
for epoch in range(EPOCHS):
# Reset the metrics at the start of the next epoch
train_loss.reset_states()
train_accuracy.reset_states()
test_loss.reset_states()
test_accuracy.reset_states()
for images, labels in train_ds:
train_step(images, labels)
for test_images, test_labels in test_ds:
test_step(test_images, test_labels)
template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'
print(template.format(epoch+1,
train_loss.result(),
train_accuracy.result()*100,
test_loss.result(),
test_accuracy.result()*100))
----------------------SOLVED-----------------------
Solution:add the argument "training=True" when training the keras.application.. For example
model = tf.keras.applications.MobileNetV2(input_shape=IMG_SHAPE,weights="imagenet",include_top=False)
pred = model(inputs, training=True)
The reason might be caused by "batchnorm" layer. Those model which has BN layers works well in keras training loop, "model.fit()", and nothing to takecare. However, they cannot learn anything by costume training loop if you forget to set training=True in model()
The problem is that you set all your parameters to be non-trainable, check this on hte summary of the model, you will see something like this
Change this line, (or just delete it)
base_model.trainable = False
To
base_model.trainable = True
And everything will work fine

Categories