Keras, using a generator for the data (VAE) - python

I'm currently trying to implement a variational autoencoder but I'm quite stuck, I cannot understand how to use a datagenerator in Keras. What I have so far is:
import keras
import tensorflow as tf
from tensorflow.keras import layers
class Sampling(layers.Layer):
def call(self, inputs):
z_mean, z_log_var = inputs
batch = tf.shape(z_mean)[0]
dim = tf.shape(z_mean)[1]
epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
return z_mean + tf.exp(z_log_var / 2) * epsilon
class factor_vae(keras.Model):
def __init__(self):
super(factor_vae, self).__init__()
self.encoder = self.encoder_factor_vae()
self.decoder = self.decoder_factor_vae()
self.classifier = self.MLP_classifier()
def train_step(self, data):
data = data[0]
with tf.GradientTape() as tape:
z, z_mean, z_log_var = self.encoder(data)
reconstruction = self.decoder(z)
reconstruction_loss = tf.reduce_mean(
keras.losses.mse(data, reconstruction))
reconstruction_loss *= 4096 #denna kan ändras
kl_loss = 1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var)
kl_loss = tf.reduce_mean(kl_loss)
kl_loss *= -0.5
total_loss = reconstruction_loss + (kl_loss)
grads = tape.gradient(total_loss, self.trainable_weights)
self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
return {
"loss": total_loss,
"reconstruction_loss": reconstruction_loss,
"kl_loss": kl_loss,
}
def encoder_factor_vae(self):
x_inp = Input(shape=(64, 64, 1))
z = layers.Conv2D(filters=32, kernel_size=(4, 4), activation="relu", strides=2, padding="same")(x_inp)
z = BatchNormalization()(z)
z = layers.Conv2D(filters=32, kernel_size=(4, 4), activation="relu", strides=2, padding="same")(z)
z = BatchNormalization()(z)
z = layers.Conv2D(filters=64, kernel_size=(4, 4), activation="relu", strides=2, padding="same")(z)
z = BatchNormalization()(z)
z = layers.Conv2D(filters=64, kernel_size=(4, 4), activation="relu", strides=2, padding="same")(z)
z = BatchNormalization()(z)
z = layers.Flatten()(z)
z = Dense(units=128, activation='relu')(z)
z = BatchNormalization()(z)
z_mean = Dense(units=10, activation='relu')(z) # här tror jag samplingen ska ske
z_log_var = Dense(units=10, activation='sigmoid')(z) # bör vara sampling från reparameterizationen
z = Sampling()([z_mean, z_log_var])
encoder = keras.Model(x_inp, [z, z_mean, z_log_var], name="encoder")
encoder.summary()
return encoder
def decoder_factor_vae(self):
z_inp = Input(shape=(10,))
x_rec = Dense(units=128, activation='relu')(z_inp)
x_rec = BatchNormalization()(x_rec)
x_rec = Dense(units=1024, activation='relu')(x_rec) #hit fungerar
x_rec = BatchNormalization()(x_rec)
x_rec = layers.Reshape((4, 4, 64))(x_rec)
x_rec = layers.Conv2DTranspose(filters=64, kernel_size=(4, 4), activation='relu', strides=2, padding='same')(
x_rec)
x_rec = BatchNormalization()(x_rec)
x_rec = layers.Conv2DTranspose(filters=32, kernel_size=(4, 4), activation='relu', strides=2, padding='same')(
x_rec)
x_rec = BatchNormalization()(x_rec)
x_rec = layers.Conv2DTranspose(filters=32, kernel_size=(4, 4), activation='relu', strides=2, padding='same')(
x_rec)
x_rec = BatchNormalization()(x_rec)
x_rec = layers.Conv2DTranspose(filters=1, kernel_size=(4, 4), strides=2, padding='same')(
x_rec)
decoder = keras.Model(z_inp, x_rec, name="decoder") # går att skicka in vilken batchsize som helst
decoder.summary()
return decoder
def MLP_classifier(self):
z_inp = Input(shape=(10,))
x_rec = Dense(units=1000)(z_inp) #1
x_rec = LeakyReLU(alpha=0.3)(x_rec)
x_rec = BatchNormalization()(x_rec)
x_rec = Dense(units=1000)(x_rec) #2
x_rec = LeakyReLU(alpha=0.3)(x_rec)
x_rec = BatchNormalization()(x_rec)
x_rec = Dense(units=1000)(x_rec) # 3
x_rec = LeakyReLU(alpha=0.3)(x_rec)
x_rec = BatchNormalization()(x_rec)
x_rec = Dense(units=1000)(x_rec) # 4
x_rec = LeakyReLU(alpha=0.3)(x_rec)
x_rec = BatchNormalization()(x_rec)
x_rec = Dense(units=1000)(x_rec) # 5
x_rec = LeakyReLU(alpha=0.3)(x_rec)
x_rec = BatchNormalization()(x_rec)
x_rec = Dense(units=2)(x_rec) # 6
classifier = keras.Model(z_inp, x_rec, name="clasifier")
return classifier
def generate_batches(data):
L = 50
start = 0
end = start + L
y_L_real = np.zeros((L, 2))
y_L_fake = np.zeros((L, 2))
y_L_real[:, 0] = 1
y_L_fake[:, 1] = 1
#total_y = np.vstack((y_L_real, y_L_fake))
while True:
x_L_real = data[start:end] #antalet värden är 2xL
x_L_fake = np.roll(x_L_real, shift=2, axis=0)
total_x = np.vstack((x_L_real, x_L_fake))
start += L
end += L
if start >= data.shape[0]:
start = 0
end = L
yield total_x, total_x
data = dsprite()
factor = factor_vae()
xyz = np.load("C:\\Users\\joaki\\OneDrive\\Skrivbord\\images\\dsprites_ndarray_"
"co1sh3sc6or40x32y32_64x64.npz")
test_data = xyz['imgs']
train_steps = 3000
steps_epoch = 300
factor.compile(optimizer=keras.optimizers.Adam(0.001))
train_generator = generate_batches(test_data)
factor.fit_generator(train_generator, steps_per_epoch=steps_epoch, epochs=50)
There is a lot of code, but it does work fine as long as I used my entire dataset, but as soon as I try to use my implemented "train_generator" it breaks down and I get the error message:
NotImplementedError: When subclassing the Model class, you should implement a call method. So I know there is something wrong with my implementation of the train_generator, but I dont understand what I've missed, can someone provide me more information?

Try reading this forum page, seems that you should call method in your class when subclassing:
https://github.com/tensorflow/tensorflow/issues/43173

Although all subclasses of keras.Model must implement call, it is missing in several examples of Keras (see here or here). Under certain conditions, the error 'When subclassing the Model class, you should implement a call method.' is thrown.
I encounter this problem when including a DataGenerator (subclassed from keras.utils.Sequence) and solved it by implementing call() like this:
Autoencoder
...
def call(self, inputs, training=None, mask=None):
z = self.encoder(inputs=inputs, training=training, mask=mask)
return self.decoder(z)
...
GAN
...
def call(self, inputs, training=None, mask=None):
batch_size = tf.shape(inputs)[0]
random_latent_vector = tf.random.normal(shape=(batch_size, self.latent_dim))
x = self.generator(inputs=random_latent_vector, training=training, mask=mask)
if len(x.shape) != len(inputs.shape):
raise Exception(f'Fake signal ({x.shape}) and real signal ({inputs.shape}) do not have same shape dimension')
return self.critic(inputs=x, training=training, mask=mask)
...
It seems to be a known problem (see here)

Related

Varational autoencoder using tf.GradientTape

Here is an example of tensorflow GradientTape provided by keras for a typical variotional autoencoder:
VAE-keras-example
The train_step function is implemented inside the model and it is trained with the "model.fit()". The example performs great and no problem at all.
However, for another application, I need to implement the train_step function outside of the model definition. In the beginning, I started with above mentioned example as the target application is also a kind of VAE. Accordingly, I applied some modifications and tried to train the same model structure; please find the whole code in the next; however, I get very weird numbers for the loss values comparing to the original code; even after a couple of iterations it gets nan values for losses.
Could you please let me know what's the mistake and why this happens?
Thanks in advance
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras import backend as K
from tensorflow import keras
import numpy as np
print(tf.test.is_gpu_available()) # prints True
print(tf.__version__) # prints '2.0.0-beta1'
class Sampling(layers.Layer):
"""Uses (z_mean, z_log_var) to sample z, the vector encoding a digit."""
def call(self, inputs):
z_mean, z_log_var = inputs
batch = tf.shape(z_mean)[0]
dim = tf.shape(z_mean)[1]
epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
return z_mean + tf.exp(0.5 * z_log_var) * epsilon
latent_dim = 2
encoder_inputs = keras.Input(shape=(28, 28, 1))
x = layers.Conv2D(32, 3, activation="relu", strides=2, padding="same")(encoder_inputs)
x = layers.Conv2D(64, 3, activation="relu", strides=2, padding="same")(x)
x = layers.Flatten()(x)
x = layers.Dense(16, activation="relu")(x)
z_mean = layers.Dense(latent_dim, name="z_mean")(x)
z_log_var = layers.Dense(latent_dim, name="z_log_var")(x)
z = Sampling()([z_mean, z_log_var])
x = layers.Dense(7 * 7 * 64, activation="relu")(z)
x = layers.Reshape((7, 7, 64))(x)
x = layers.Conv2DTranspose(64, 3, activation="relu", strides=2, padding="same")(x)
x = layers.Conv2DTranspose(32, 3, activation="relu", strides=2, padding="same")(x)
decoder_outputs = layers.Conv2DTranspose(1, 3, activation="sigmoid", padding="same")(x)
model = keras.Model(encoder_inputs, [decoder_outputs, z_mean, z_log_var] , name="decoder")
model.summary()
optimizer = tf.keras.optimizers.Adam(lr=0.001)
objective = tf.keras.losses.SparseCategoricalCrossentropy()
(x_train, _), (x_test, _) = keras.datasets.mnist.load_data()
num_samples = x_train.shape[0]
epochs=1
batch_size=128
#tf.function
def train_step(data):
with tf.GradientTape() as tape:
reconstruction, z_mean, z_log_var = model(data, training=True)
data = tf.expand_dims(data, axis=-1)
reconstruction_loss = tf.reduce_mean(
tf.reduce_sum(keras.losses.binary_crossentropy(data, reconstruction), axis=(1, 2)))
kl_loss = -0.5 * (1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var))
kl_loss = tf.reduce_mean(tf.reduce_sum(kl_loss, axis=1))
total_loss = (reconstruction_loss + kl_loss)
grads = tape.gradient(total_loss, model.trainable_variables)
optimizer.apply_gradients(zip(grads, model.trainable_variables))
return total_loss, reconstruction_loss, kl_loss
with tf.device('gpu:0'):
for epoch in range (epochs):
for step in range(num_samples//batch_size):
s = step*batch_size
e = s+batch_size
x_batch = x_train[s:e,:,:]
total_loss, reconstruction_loss, kl_loss = train_step(x_batch)
print("-----------------")
print(f"epoch: {epoch} step: {step}")
print(f"reconstruction_loss: {reconstruction_loss} ")
print(f"kl_loss: {kl_loss} ")
print(f"total_loss: {total_loss}")
I think you forgot to normalize your data as shown in the tutorial you are referring to:
(x_train, _), (x_test, _) = keras.datasets.mnist.load_data()
x_train = x_train.astype("float32") / 255
Otherwise, your code seems to be running fine and the loss in not nan. Here is the code for reference:
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras import backend as K
from tensorflow import keras
import numpy as np
print(tf.test.is_gpu_available()) # prints True
print(tf.__version__) # prints '2.0.0-beta1'
class Sampling(layers.Layer):
"""Uses (z_mean, z_log_var) to sample z, the vector encoding a digit."""
def call(self, inputs):
z_mean, z_log_var = inputs
batch = tf.shape(z_mean)[0]
dim = tf.shape(z_mean)[1]
epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
return z_mean + tf.exp(0.5 * z_log_var) * epsilon
latent_dim = 2
encoder_inputs = keras.Input(shape=(28, 28, 1))
x = layers.Conv2D(32, 3, activation="relu", strides=2, padding="same")(encoder_inputs)
x = layers.Conv2D(64, 3, activation="relu", strides=2, padding="same")(x)
x = layers.Flatten()(x)
x = layers.Dense(16, activation="relu")(x)
z_mean = layers.Dense(latent_dim, name="z_mean")(x)
z_log_var = layers.Dense(latent_dim, name="z_log_var")(x)
z = Sampling()([z_mean, z_log_var])
x = layers.Dense(7 * 7 * 64, activation="relu")(z)
x = layers.Reshape((7, 7, 64))(x)
x = layers.Conv2DTranspose(64, 3, activation="relu", strides=2, padding="same")(x)
x = layers.Conv2DTranspose(32, 3, activation="relu", strides=2, padding="same")(x)
decoder_outputs = layers.Conv2DTranspose(1, 3, activation="sigmoid", padding="same")(x)
model = keras.Model(encoder_inputs, [decoder_outputs, z_mean, z_log_var] , name="decoder")
model.summary()
optimizer = tf.keras.optimizers.Adam(lr=0.001)
objective = tf.keras.losses.SparseCategoricalCrossentropy()
(x_train, _), (x_test, _) = keras.datasets.mnist.load_data()
x_train = x_train.astype("float32") / 255
num_samples = x_train.shape[0]
epochs=4
batch_size=128
#tf.function
def train_step(data):
with tf.GradientTape() as tape:
reconstruction, z_mean, z_log_var = model(data, training=True)
reconstruction_loss = tf.reduce_mean(
tf.reduce_sum(keras.losses.binary_crossentropy(data, reconstruction), axis=(1, 2)))
kl_loss = -0.5 * (1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var))/batch_size
kl_loss = tf.reduce_mean(tf.reduce_sum(kl_loss, axis=1))/batch_size
total_loss = (reconstruction_loss + kl_loss)
grads = tape.gradient(total_loss, model.trainable_variables)
optimizer.apply_gradients(zip(grads, model.trainable_variables))
return total_loss, reconstruction_loss, kl_loss
with tf.device('gpu:0'):
for epoch in range (epochs):
for step in range(num_samples//batch_size):
s = step*batch_size
e = s+batch_size
x_batch = x_train[s:e,:,:, tf.newaxis]
print(x_batch.shape)
total_loss, reconstruction_loss, kl_loss = train_step(x_batch)
print("-----------------")
print(f"epoch: {epoch} step: {step}")
print(f"reconstruction_loss: {reconstruction_loss} ")
print(f"kl_loss: {kl_loss} ")
print(f"total_loss: {total_loss}")

3D - Variational Autoencoder implementation Error

I'm trying to implement a 3D variational autoencoder following instructions in the book Generative Deep Learning and taking also some things from here link, those examples are in 2D so I have adapted it.
This is the code i'm using:
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras.layers import Input, BatchNormalization, Conv3D, Dense, Flatten, Lambda, Reshape, UpSampling3D
from tensorflow.keras.regularizers import l2
from tensorflow.keras.models import Model
from tensorflow.keras import backend as K
## Functional API
input_shape = (32, 32, 32, 1)
z_dim = 3000
########### Encoder ############
enc_in = Input(shape = input_shape, name="encoder_input")
enc_conv1 = tf.keras.layers.Conv3D(filters=8, kernel_size=5, strides=(1,1,1), padding='same', activation="ReLU", kernel_initializer="he_uniform", name="conv_1")(enc_in)
max1 = tf.keras.layers.MaxPool3D(pool_size=(2,2,2), strides=(2,2,2), padding='valid', name="max_pool_1")(enc_conv1)
enc_conv2 = tf.keras.layers.Conv3D(filters=16, kernel_size=3, padding='same', activation="ReLU", kernel_initializer="he_uniform", name="conv_2")(max1)
max2 = tf.keras.layers.MaxPool3D(pool_size=2, name="max_pool_2")(enc_conv2)
enc_fc1 = Dense(units = 512, kernel_initializer = 'he_uniform', activation = 'ReLU')(Flatten()(max2))
mu = Dense(units = z_dim, kernel_initializer = 'he_uniform', activation = None, name="mu")(enc_fc1)
log_variance = Dense(units = z_dim, kernel_initializer = 'he_uniform', activation = None, name="log_variance")(enc_fc1)
def sampling(args):
mu, log_variance = args
epsilon = K.random_normal(shape=K.shape(mu), mean= 0., stddev= 1.)
return mu + K.exp(0.5 * log_variance) * epsilon
z = Lambda(sampling, output_shape = (z_dim, ), name="z")([mu, log_variance])
encoder = Model(enc_in, [mu, log_variance, z], name="encoder")
########## Decoder ############
dec_in = Input(shape = (z_dim, ), name="decoder_input")
dec_fc1 = Dense(units = 512, kernel_initializer = 'he_uniform', activation = 'ReLU')(dec_in)
dec_unflatten = Reshape(target_shape = (8,8,8,1))(dec_fc1)
dec_conv1 = tf.keras.layers.Conv3D(filters=32, kernel_size=3, padding='same', activation="ReLU", kernel_initializer="he_uniform", name="deconv_1")(dec_unflatten)
ups1 = tf.keras.layers.UpSampling3D(size=(2, 2, 2), name="ups_1")(dec_conv1)
dec_conv2 = tf.keras.layers.Conv3D(filters=16, kernel_size=3, padding='same', activation="ReLU", kernel_initializer="he_uniform", name="deconv_2")(ups1)
ups2 = tf.keras.layers.UpSampling3D(size=(2, 2, 2), name="ups_2")(dec_conv2)
dec_conv4 = Conv3D(filters = 1, kernel_size = (3, 3, 3), padding = 'same', activation="ReLU", kernel_initializer = 'he_uniform', name='decorder_output')(ups2)
decoder = Model(dec_in, dec_conv4, name="decoder")
model_input = enc_in
model_output = decoder(z)
v_autoencoder = Model(model_input, model_output)
######################
def vae_r_loss(T1_input, model_output):
r_loss = K.mean(K.square(T1_input - model_output), axis = [1,2,3])
return r_loss_factor * r_loss
def vae_kl_loss(T1_input, model_output):
kl_loss = -0.5 * K.sum(1 + log_variance - K.square(mu) - K.exp(log_variance), axis = 1)
return kl_loss
def vae_loss(T1_input, model_output):
r_loss = vae_r_loss(T1_input, model_output)
kl_loss = vae_kl_loss(T1_input, model_output)
return r_loss + kl_loss
learning_rate=0.001
r_loss_factor = 500
optimizer = keras.optimizers.Adam(lr=learning_rate)
v_autoencoder.compile(optimizer=optimizer, loss = vae_loss, metrics = [vae_r_loss, vae_kl_loss])
v_autoencoder.summary()
But when I try to train the model:
history = v_autoencoder.fit(T1_input, T1_input, epochs=10, validation_split=0.2, shuffle=True)
I get this error:
TypeError: You are passing KerasTensor(type_spec=TensorSpec(shape=(), dtype=tf.float32, name=None), name='Placeholder:0', description="created by layer 'tf.cast_4'"), an intermediate Keras symbolic input/output, to a TF API that does not allow registering custom dispatchers, such as `tf.cond`, `tf.function`, gradient tapes, or `tf.map_fn`. Keras Functional model construction only supports TF API calls that *do* support dispatching, such as `tf.math.add` or `tf.reshape`. Other APIs cannot be called directly on symbolic Kerasinputs/outputs. You can work around this limitation by putting the operation in a custom Keras layer `call` and calling that layer on this symbolic input/output.
The encoder and the decoder model seem to disconnected and the KerasTensor are not able to flow correctly. Also, the encoder returns three tensors [ mu , variance , z] whereas the decoder only requires z as an input.
Replace these lines,
model_input = enc_in
model_output = decoder(z)
v_autoencoder = Model(model_input, model_output)
with,
vae_input = Input(shape = input_shape, name="encoder_input")
mu_ , variance_ , z_ = encoder( vae_input )
vae_output = decoder( z_ )
v_autoencoder = Model( vae_input , [vae_output , mu_ , variance_] )
So, basically, the model will now output mu_ and variance_ which are the outputs of the encoder, and are required for vae_kl_loss. Earlier, you were using these KerasTensor directly in vae_kl_loss and hence you got the error.
Remove the axis argument from vae_kl_loss,
def vae_kl_loss( mu , log_variance ):
kl_loss = -0.5 * K.sum(1 + log_variance - K.square(mu) - K.exp(log_variance))
return kl_loss
Pass mu and variance to vae_kl_loss,
def vae_loss(T1_input, model_output):
mu = model_output[ 1 ]
variance = model_output[ 2 ]
vae_output = model_output[ 0 ]
r_loss = vae_r_loss(T1_input, vae_output )
kl_loss = vae_kl_loss( mu , variance )
return r_loss + kl_loss
Modified code:
import tensorflow as tf
from tensorflow.keras.layers import Input, BatchNormalization, Conv3D, Dense, Flatten, Lambda, Reshape, UpSampling3D
from tensorflow.keras.regularizers import l2
from tensorflow.keras.models import Model
from tensorflow.keras import backend as K
## Functional API
input_shape = (32, 32, 32, 1)
z_dim = 3000
########### Encoder ############
enc_in = Input(shape = input_shape, name="encoder_input")
enc_conv1 = tf.keras.layers.Conv3D(filters=8, kernel_size=5, strides=(1,1,1), padding='same', activation="ReLU", kernel_initializer="he_uniform", name="conv_1")(enc_in)
max1 = tf.keras.layers.MaxPool3D(pool_size=(2,2,2), strides=(2,2,2), padding='valid', name="max_pool_1")(enc_conv1)
enc_conv2 = tf.keras.layers.Conv3D(filters=16, kernel_size=3, padding='same', activation="ReLU", kernel_initializer="he_uniform", name="conv_2")(max1)
max2 = tf.keras.layers.MaxPool3D(pool_size=2, name="max_pool_2")(enc_conv2)
enc_fc1 = Dense(units = 512, kernel_initializer = 'he_uniform', activation = 'ReLU')(Flatten()(max2))
mu = Dense(units = z_dim, kernel_initializer = 'he_uniform', activation = None, name="mu")(enc_fc1)
log_variance = Dense(units = z_dim, kernel_initializer = 'he_uniform', activation = None, name="log_variance")(enc_fc1)
def sampling(args):
mu, log_variance = args
epsilon = K.random_normal(shape=K.shape(mu), mean= 0., stddev= 1.)
return mu + K.exp(0.5 * log_variance) * epsilon
z = Lambda(sampling, output_shape = (z_dim, ), name="z")([mu, log_variance])
encoder = Model(enc_in, [mu, log_variance, z], name="encoder")
########## Decoder ############
dec_in = Input(shape = (z_dim, ), name="decoder_input")
dec_fc1 = Dense(units = 512, kernel_initializer = 'he_uniform', activation = 'ReLU')(dec_in)
dec_unflatten = Reshape(target_shape = (8,8,8,1))(dec_fc1)
dec_conv1 = tf.keras.layers.Conv3D(filters=32, kernel_size=3, padding='same', activation="ReLU", kernel_initializer="he_uniform", name="deconv_1")(dec_unflatten)
ups1 = tf.keras.layers.UpSampling3D(size=(2, 2, 2), name="ups_1")(dec_conv1)
dec_conv2 = tf.keras.layers.Conv3D(filters=16, kernel_size=3, padding='same', activation="ReLU", kernel_initializer="he_uniform", name="deconv_2")(ups1)
ups2 = tf.keras.layers.UpSampling3D(size=(2, 2, 2), name="ups_2")(dec_conv2)
dec_conv4 = Conv3D(filters = 1, kernel_size = (3, 3, 3), padding = 'same', activation="ReLU", kernel_initializer = 'he_uniform', name='decorder_output')(ups2)
decoder = Model(dec_in, dec_conv4, name="decoder")
vae_input = Input(shape = input_shape, name="encoder_input")
mu_ , variance_ , z_ = encoder( vae_input )
vae_output = decoder( z_ )
v_autoencoder = Model( vae_input , [vae_output , mu_ , variance_] )
######################
def vae_r_loss(T1_input, model_output):
r_loss = K.mean(K.square(T1_input - model_output), axis = [1,2,3])
return r_loss_factor * r_loss
def vae_kl_loss( mu , log_variance ):
kl_loss = -0.5 * K.sum(1 + log_variance - K.square(mu) - K.exp(log_variance))
return kl_loss
def vae_loss(T1_input, model_output):
mu = model_output[ 1 ]
variance = model_output[ 2 ]
vae_output = model_output[ 0 ]
r_loss = vae_r_loss(T1_input, vae_output )
kl_loss = vae_kl_loss( mu , variance )
return r_loss + kl_loss
learning_rate=0.001
r_loss_factor = 500
optimizer = tf.keras.optimizers.Adam(lr=learning_rate)
v_autoencoder.compile(optimizer=optimizer, loss = vae_loss, metrics = [vae_r_loss, vae_kl_loss])
v_autoencoder.summary()

Differences between model results

I've been experimenting with VAEs for weeks now, trying to get similar results as in a tutorial code, but being unsuccessful in it. The only difference between mine and the tutorial's code is that it compiles the model (demonstration will be below), and runs fit on it, but I run each layer manually. What I've done is set up weight initialization with numpy arrays, just as with epsilons, saved into .npy files, and ran the two models. The results were different (the tutorial's model gives way more smooth results)
As of my understanding of neural networks so far, given the same training data in the same order, with the same layers, with same initial weights, same optimizer, and hyperparameters, the results should be the same all the time.
I've made 3 simple examples. MNIST data with two dense layers with sizes 16, and 10. Weight initialization:
import numpy as np
w1 = np.random.uniform(-1, 1, size=(784, 16))
w2 = np.random.uniform(-1, 1, size=(16, 10))
np.save('w1', w1)
np.save('w2', w2)
First version (manually executing layers):
import numpy as np
import tensorflow as tf
import math
w1 = np.load('w1.npy')
w2 = np.load('w2.npy')
class Model(tf.keras.Model):
def __init__(self):
super().__init__()
self.flat = tf.keras.layers.Flatten()
self.w1 = tf.keras.layers.Dense(
units=16,
activation='relu',
use_bias=False,
kernel_initializer=tf.keras.initializers.constant(w1)
)
self.w2 = tf.keras.layers.Dense(
units=10,
activation='sigmoid',
use_bias=False,
kernel_initializer=tf.keras.initializers.constant(w2)
)
self.optimizer = tf.keras.optimizers.Adam()
#tf.function
def call(self, x, y):
with tf.GradientTape() as tape:
x = self.flat(x)
a = self.w1(x)
y_hat = self.w2(a)
loss = tf.keras.losses.sparse_categorical_crossentropy(y, y_hat)
gradients = tape.gradient(loss, self.trainable_variables)
self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))
return loss
def train(x, y, epochs, batch_size):
batch_quantity = math.floor(x.shape[0] / batch_size)
for e in range(epochs):
for b in range(batch_quantity):
train_x = x[b:b + batch_size]
train_y = y[b:b + batch_size]
loss = model(train_x, train_y)
print(
'Epoch:', e,
'Batch:', b,
'Loss:', loss.numpy().sum(),
)
model = Model()
(x, y), _ = tf.keras.datasets.mnist.load_data()
epochs = 30
batch_size = 100
train(x, y, epochs, batch_size)
This one gives the exact same results no matter how many times you run it. As expected.
Second version (saving layers into a Model - manually calculating gradients, optimization - compile, and fit):
import numpy as np
import tensorflow as tf
w1 = np.load('w1.npy')
w2 = np.load('w2.npy')
input = tf.keras.Input(shape=(28, 28))
x = tf.keras.layers.Flatten()(input)
a = tf.keras.layers.Dense(
units=16,
activation='relu',
use_bias=False,
kernel_initializer=tf.keras.initializers.constant(w1)
)(x)
y_hat = tf.keras.layers.Dense(
units=10,
activation='sigmoid',
use_bias=False,
kernel_initializer=tf.keras.initializers.constant(w2)
)(a)
model = tf.keras.Model(input, y_hat, name='model')
model.summary()
class Model(tf.keras.Model):
def __init__(self, model):
super().__init__()
self.model = model
def train_step(self, data):
x, y = data[0]
with tf.GradientTape() as tape:
y_hat = self.model(x)
loss = tf.keras.losses.sparse_categorical_crossentropy(y, y_hat)
gradients = tape.gradient(loss, self.trainable_variables)
self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))
return {
'loss': tf.reduce_sum(loss)
}
model = Model(model)
train_data, _ = tf.keras.datasets.mnist.load_data()
epochs = 30
batch_size = 100
model.compile(optimizer=tf.keras.optimizers.Adam())
model.fit(train_data, epochs=epochs, batch_size=batch_size shuffle=False)
Third version (saving layers into a Model compile, and fit):
import numpy as np
import tensorflow as tf
w1 = np.load('w1.npy')
w2 = np.load('w2.npy')
model = tf.keras.models.Sequential([
tf.keras.layers.Flatten(input_shape=(28, 28)),
tf.keras.layers.Dense(
units=16,
activation='relu',
use_bias=False,
kernel_initializer=tf.keras.initializers.constant(w1)
),
tf.keras.layers.Dense(
units=10,
activation='sigmoid',
use_bias=False,
kernel_initializer=tf.keras.initializers.constant(w2)
)
])
(x, y), _ = tf.keras.datasets.mnist.load_data()
batch_size = 100
epochs = 30
model.compile(
loss=tf.keras.losses.SparseCategoricalCrossentropy(reduction=tf.keras.losses.Reduction.SUM),
optimizer=tf.keras.optimizers.Adam()
)
model.fit(
x, y, batch_size=batch_size, epochs=epochs, shuffle=False
)
The second, and third versions are giving almost the exact same results, but different than the first. That I can't understand, as the optimizer, the loss function, the loss reduction, initial weights, training data, everything is exactly the same.
If you're curious, the VAE examples, and their differences can be tested.
Initialize weights:
import numpy as np
epsilon = np.random.normal(size=(100, 2))
wc1 = np.random.uniform(-0.022, 0.022, size=(3,3,1,32))
wc2 = np.random.uniform(-0.022, 0.022, size=(3,3,32,64))
wd1 = np.random.uniform(-0.022, 0.022, size=(3136,16))
wm = np.random.uniform(-0.022, 0.022, size=(16,2))
ws = np.random.uniform(-0.022, 0.022, size=(16,2))
wd2 = np.random.uniform(-0.022, 0.022, size=(2,3136))
wct1 = np.random.uniform(-0.022, 0.022, size=(3,3,64,64))
wct2 = np.random.uniform(-0.022, 0.022, size=(3,3,32,64))
wct3 = np.random.uniform(-0.022, 0.022, size=(3,3,1,32))
np.save('epsilon', epsilon)
np.save('wc1', wc1)
np.save('wc2', wc2)
np.save('wd1', wd1)
np.save('wm', wm)
np.save('ws', ws)
np.save('wd2', wd2)
np.save('wct1', wct1)
np.save('wct2', wct2)
np.save('wct3', wct3)
Tutorial's VAE:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
wc1 = np.load('wc1.npy')
wc2 = np.load('wc2.npy')
wd1 = np.load('wd1.npy')
wm = np.load('wm.npy')
ws = np.load('ws.npy')
wd2 = np.load('wd2.npy')
wct1 = np.load('wct1.npy')
wct2 = np.load('wct2.npy')
wct3 = np.load('wct3.npy')
epsilon = np.load('epsilon.npy')
class Sampling(layers.Layer):
"""Uses (z_mean, z_log_var) to sample z, the vector encoding a digit."""
def call(self, inputs):
z_mean, z_log_var = inputs
# batch = tf.shape(z_mean)[0]
# dim = tf.shape(z_mean)[1]
# epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
return z_mean + tf.exp(0.5 * z_log_var) * epsilon
latent_dim = 2
encoder_inputs = keras.Input(shape=(28, 28, 1))
x = layers.Conv2D(32, 3, activation="relu", strides=2, padding="same", use_bias=False, kernel_initializer=tf.keras.initializers.constant(wc1))(encoder_inputs)
x = layers.Conv2D(64, 3, activation="relu", strides=2, padding="same", use_bias=False, kernel_initializer=tf.keras.initializers.constant(wc2))(x)
x = layers.Flatten()(x)
x = layers.Dense(16, activation="relu", use_bias=False, kernel_initializer=tf.keras.initializers.constant(wd1))(x)
z_mean = layers.Dense(latent_dim, name="z_mean", use_bias=False, kernel_initializer=tf.keras.initializers.constant(wm))(x)
z_log_var = layers.Dense(latent_dim, name="z_log_var", use_bias=False, kernel_initializer=tf.keras.initializers.constant(ws))(x)
z = Sampling()([z_mean, z_log_var])
encoder = keras.Model(encoder_inputs, [z_mean, z_log_var, z], name="encoder")
encoder.summary()
latent_inputs = keras.Input(shape=(latent_dim,))
x = layers.Dense(7 * 7 * 64, activation="relu", use_bias=False, kernel_initializer=tf.keras.initializers.constant(wd2))(latent_inputs)
x = layers.Reshape((7, 7, 64))(x)
x = layers.Conv2DTranspose(64, 3, activation="relu", strides=2, padding="same", use_bias=False, kernel_initializer=tf.keras.initializers.constant(wct1))(x)
x = layers.Conv2DTranspose(32, 3, activation="relu", strides=2, padding="same", use_bias=False, kernel_initializer=tf.keras.initializers.constant(wct2))(x)
decoder_outputs = layers.Conv2DTranspose(1, 3, activation="sigmoid", padding="same", use_bias=False, kernel_initializer=tf.keras.initializers.constant(wct3))(x)
decoder = keras.Model(latent_inputs, decoder_outputs, name="decoder")
decoder.summary()
class VAE(keras.Model):
def __init__(self, encoder, decoder, **kwargs):
super(VAE, self).__init__(**kwargs)
self.encoder = encoder
self.decoder = decoder
def train_step(self, data):
if isinstance(data, tuple):
data = data[0]
with tf.GradientTape() as tape:
z_mean, z_log_var, z = encoder(data)
reconstruction = decoder(z)
reconstruction_loss = tf.reduce_mean(
keras.losses.binary_crossentropy(data, reconstruction)
)
reconstruction_loss *= 28 * 28
kl_loss = 1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var)
kl_loss = tf.reduce_mean(kl_loss)
kl_loss *= -0.5
total_loss = reconstruction_loss + kl_loss
grads = tape.gradient(total_loss, self.trainable_weights)
self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
return {
'mean': z_mean,
"loss": total_loss,
"reconstruction_loss": reconstruction_loss,
"kl_loss": kl_loss,
}
(x_train, _), (x_test, _) = keras.datasets.mnist.load_data()
mnist_digits = x_train
mnist_digits = np.expand_dims(mnist_digits, -1).astype("float32") / 255
epochs=30
batch_size=100
vae = VAE(encoder, decoder)
vae.compile(optimizer=keras.optimizers.Adam())
vae.fit(mnist_digits, epochs=epochs, batch_size=batch_size)
import matplotlib.pyplot as plt
def plot_latent(encoder, decoder):
# display a n*n 2D manifold of digits
n = 30
digit_size = 28
scale = 2.0
figsize = 15
figure = np.zeros((digit_size * n, digit_size * n))
# linearly spaced coordinates corresponding to the 2D plot
# of digit classes in the latent space
grid_x = np.linspace(-scale, scale, n)
grid_y = np.linspace(-scale, scale, n)[::-1]
for i, yi in enumerate(grid_y):
for j, xi in enumerate(grid_x):
z_sample = np.array([[xi, yi]])
x_decoded = decoder.predict(z_sample)
digit = x_decoded[0].reshape(digit_size, digit_size)
figure[
i * digit_size : (i + 1) * digit_size,
j * digit_size : (j + 1) * digit_size,
] = digit
plt.figure(figsize=(figsize, figsize))
start_range = digit_size // 2
end_range = n * digit_size + start_range + 1
pixel_range = np.arange(start_range, end_range, digit_size)
sample_range_x = np.round(grid_x, 1)
sample_range_y = np.round(grid_y, 1)
plt.xticks(pixel_range, sample_range_x)
plt.yticks(pixel_range, sample_range_y)
plt.xlabel("z[0]")
plt.ylabel("z[1]")
plt.imshow(figure, cmap="Greys_r")
plt.savefig('trg.png')
plt.close()
plot_latent(encoder, decoder)
def plot_label_clusters(encoder, decoder, data, labels):
# display a 2D plot of the digit classes in the latent space
z_mean, _, _ = encoder.predict(data)
plt.figure(figsize=(12, 10))
plt.scatter(z_mean[:, 0], z_mean[:, 1], c=labels)
plt.colorbar()
plt.xlabel("z[0]")
plt.ylabel("z[1]")
plt.savefig('wer.png')
plt.close()
(x_train, y_train), _ = keras.datasets.mnist.load_data()
x_train = np.expand_dims(x_train, -1).astype("float32") / 255
plot_label_clusters(encoder, decoder, x_train, y_train)
My VAE:
import math
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
batch_size = 100
epochs = 30
(x, y), _ = tf.keras.datasets.mnist.load_data()
x = tf.expand_dims(x.astype("float32") / 255, 3)
wc1 = np.load('wc1.npy')
wc2 = np.load('wc2.npy')
wd1 = np.load('wd1.npy')
wm = np.load('wm.npy')
ws = np.load('ws.npy')
wd2 = np.load('wd2.npy')
wct1 = np.load('wct1.npy')
wct2 = np.load('wct2.npy')
wct3 = np.load('wct3.npy')
epsilon = np.load('epsilon.npy')
class VAE(tf.keras.Model):
def __init__(self):
super().__init__()
self.encoder = [
tf.keras.layers.Conv2D(32, 3, activation="relu", strides=2, padding="same", use_bias=False, kernel_initializer=tf.keras.initializers.constant(wc1)),
tf.keras.layers.Conv2D(64, 3, activation="relu", strides=2, padding="same", use_bias=False, kernel_initializer=tf.keras.initializers.constant(wc2)),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(16, activation="relu", use_bias=False, kernel_initializer=tf.keras.initializers.constant(wd1)),
]
self.wm = tf.keras.layers.Dense(2, use_bias=False, kernel_initializer=tf.keras.initializers.constant(wm))
self.wv = tf.keras.layers.Dense(2, use_bias=False, kernel_initializer=tf.keras.initializers.constant(ws))
self.decoder = [
tf.keras.layers.Dense(7 * 7 * 64, activation="relu", use_bias=False, kernel_initializer=tf.keras.initializers.constant(wd2)),
tf.keras.layers.Reshape((7, 7, 64)),
tf.keras.layers.Conv2DTranspose(64, 3, activation="relu", strides=2, padding="same", use_bias=False, kernel_initializer=tf.keras.initializers.constant(wct1)),
tf.keras.layers.Conv2DTranspose(32, 3, activation="relu", strides=2, padding="same", use_bias=False, kernel_initializer=tf.keras.initializers.constant(wct2)),
tf.keras.layers.Conv2DTranspose(1, 3, activation="sigmoid", padding="same", use_bias=False, kernel_initializer=tf.keras.initializers.constant(wct3)),
]
self.optimizer = tf.keras.optimizers.Adam()
#tf.function
def call(self, x):
with tf.GradientTape() as tape:
z = x
for layer in self.encoder:
z = layer(z)
mean = self.wm(z)
stdev = self.wv(z)
# epsilon = tf.random.normal(mean.shape)
z = mean + tf.exp(0.5 * stdev) * epsilon
y_pred = z
for layer in self.decoder:
y_pred = layer(y_pred)
reconstruction_loss = tf.reduce_mean(
tf.keras.losses.binary_crossentropy(x, y_pred)
)
reconstruction_loss *= 28 * 28
kl_loss = 1 + stdev - tf.square(mean) - tf.exp(stdev)
kl_loss = tf.reduce_mean(kl_loss)
kl_loss *= -0.5
loss = reconstruction_loss + kl_loss
gradients = tape.gradient(loss, self.trainable_weights)
self.optimizer.apply_gradients(zip(gradients, self.trainable_weights))
return loss, reconstruction_loss, kl_loss
model = VAE()
def train(x):
batch_quantity = math.floor(x.shape[0] / batch_size)
for i in range(epochs):
for bi in range(batch_quantity):
train_x = x[bi:bi + batch_size]
loss, reconstruction_loss, kl_loss = model(train_x)
print(
'Epoch:', i,
'Batch:', bi,
'Loss:', loss.numpy(),
'Reconstruction:', reconstruction_loss.numpy(),
'KL:', kl_loss.numpy()
)
train(x)
# display a 2D plot of the digit classes in the latent space
z = x
for layer in model.encoder:
z = layer(z)
mean = model.wm(z)
plt.figure(figsize=(12, 10))
plt.scatter(mean[:, 0], mean[:, 1], c=y)
plt.colorbar()
plt.xlabel("z[0]")
plt.ylabel("z[1]")
plt.savefig('def.png')
plt.close()
n = 30
d = 2.0
s = 28
grid_x = np.linspace(-d, d, n)
grid_y = np.linspace(d, -d, n)
image_width = s*n
image_height = image_width
image = np.zeros((image_height, image_width))
for row, x in enumerate(grid_x):
for col, y in enumerate(grid_y):
z = np.array([[x, y]])
for layer in model.decoder:
z = layer(z)
digit = tf.squeeze(z)
image[row * s: (row + 1) * s,
col * s: (col + 1) * s] = digit.numpy()
plt.figure(figsize=(15, 15))
plt.imshow(image, cmap='Greys_r')
plt.axis('Off')
plt.savefig('asd.png')
plt.close()

ValueError in model subclassing with tensorflow 2

I'm trying to implement a WideResnet using Model subclassing in keras. I cannot understand what's wrong in my code:
class ResidualBlock(layers.Layer):
def __init__(self, filters, kernel_size, dropout, dropout_percentage, strides=1, **kwargs):
super(ResidualBlock, self).__init__(**kwargs)
self.conv_1 = layers.Conv2D(filters, (1, 1), strides=strides)
self.bn_1 = layers.BatchNormalization()
self.rel_1 = layers.ReLU()
self.conv_2 = layers.Conv2D(filters, kernel_size, padding="same", strides=strides)
self.dropout = layers.Dropout(dropout_percentage)
self.bn_2 = layers.BatchNormalization()
self.rel_2 = layers.ReLU()
self.conv_3 = layers.Conv2D(filters, kernel_size, padding="same")
self.add = layers.Add()
self.dropout = dropout
self.strides = strides
def call(self, inputs):
x = inputs
if self.strides > 1:
x = self.conv_1(x)
res_x = self.bn_1(x)
res_x = self.rel_1(x)
res_x = self.conv_2(x)
if self.dropout:
res_x = self.dropout(x)
res_x = self.bn_2(x)
res_x = self.rel_2(x)
res_x = self.conv_3(x)
inputs = self.add([x, res_x])
return inputs
class WideResidualNetwork(models.Model):
def __init__(self, input_shape, n_classes, d, k, kernel_size=(3, 3), dropout=False, dropout_percentage=0.3, strides=1, **kwargs):
super(WideResidualNetwork, self).__init__(**kwargs)
if (d-4)%6 != 0:
raise ValueError('Please choose a correct depth!')
self.rel_1 = layers.ReLU()
self.conv_1 = layers.Conv2D(16, (3, 3), padding='same')
self.conv_2 = layers.Conv2D(16*k, (1, 1))
self.dense = layers.Dense(n_classes)
self.dropout = dropout
self.dropout_percentage = dropout_percentage
self.N = int((d - 4) / 6)
self.k = k
self.d = d
self.kernel_size = kernel_size
def build(self, input_shape):
self.bn_1 = layers.BatchNormalization(input_shape=input_shape)
def call(self, inputs):
x = self.bn_1(inputs)
x = self.rel_1(x)
x = self.conv_1(x)
x = self.conv_2(x)
for _ in range(self.N):
x = ResidualBlock(16*self.k, self.kernel_size, self.dropout, self.dropout_percentage)(x)
x = ResidualBlock( 32*self.k, self.kernel_size, self.dropout, self.dropout_percentage, strides=2)(x)
for _ in range(self.N-1):
x = ResidualBlock( 32*self.k, self.kernel_size, self.dropout, self.dropout_percentage)(x)
x = ResidualBlock( 64*self.k, self.kernel_size, self.dropout, self.dropout_percentage, strides=2)(x)
for _ in range(self.N-1):
x = ResidualBlock( 64*self.k, self.kernel_size, self.dropout, self.dropout_percentage)(x)
x = layers.GlobalAveragePooling2D()(x)
x = self.dense(x)
x = layers.Activation("softmax")(x)
return x
When i try to fit the model in this way:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()
model = WideResidualNetwork(x_train[0].shape, 10, 28, 1)
x_train, x_test = x_train/255. , x_test/255.
model = WideResidualNetwork(x_train[0].shape, 10, 28, 1)
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
epochs = 40
batch_size = 64
validation_split = 0.2
h = model.fit(x_train, y_train, epochs=epochs, batch_size=batch_size, validation_split=validation_split)
I got the following error:
...
<ipython-input-26-61c1bdb3546c>:31 call *
x = ResidualBlock(16*self.k, self.kernel_size, self.dropout, self.dropout_percentage)(x)
<ipython-input-9-3fea1e77cb6e>:23 call *
res_x = self.bn_1(x)
...
ValueError: tf.function-decorated function tried to create variables on non-first call.
So I didn't understand where is the problem, I also tried to move the initialization into the build, but without results, the error persists. Probably I have some gaps in my knowledge
Thank you in advance
You are initializing ResidualBlocks, GlobalAveragePooling2D, and Activation layers into the call method. Try to move them into the init, as you did for other layers, and it shouldn't give you that error.

Error: When subclassing the `Model` class, you should implement a `call` method. on tensorflow custom model

I am trying to train my custom model on Cifar 10 dataset.
My model's code is below: -
class cifar10Model(keras.Model):
def __init__(self):
super(cifar10Model, self).__init__()
self.conv1 = keras.layers.Conv2D(32, 3, activation='relu', input_shape=(32, 32, 3))
self.pool1 = keras.layers.MaxPool2D((3, 3))
self.batch_norm1 = keras.layers.BatchNormalization()
self.dropout1 = keras.layers.Dropout(0.1)
self.conv2 = keras.layers.Conv2D(64, 3, activation='relu')
self.pool2 = keras.layers.MaxPool2D((3, 3))
self.batch_norm2 = keras.layers.BatchNormalization()
self.dropout2 = keras.layers.Dropout(0.2)
self.conv3 = keras.layers.Conv2D(128, 3, activation='relu')
self.pool3 = keras.layers.MaxPool2D((3, 3))
self.batch_norm3 = keras.layers.BatchNormalization()
self.dropout3 = keras.layers.Dropout(0.3)
self.flatten = keras.layers.Flatten()
self.dense1 = keras.layers.Dense(128, activation='relu')
self.dense2 = keras.layers.Dense(10)
def call(self, x):
x = self.conv1(x)
x = self.pool1(x)
x = self.batch_norm1(X)
x = self.dropout1(x)
x = self.conv2(x)
x = self.pool2(x)
x = self.batch_norm2(X)
x = self.dropout2(x)
x = self.conv3(x)
x = self.pool3(x)
x = self.batch_norm3(x)
x = self.dropout3(x)
x = self.flatten(x)
x = self.dense1(x)
return self.dense2(x)
model = cifar10Model()
When i run this code this gives me no error.
Then i defined my training loop
loss_object = keras.losses.SparseCategoricalCrossentropy(from_logits=True)
optimizer = keras.optimizers.Adam()
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')
test_loss = tf.keras.metrics.Mean(name='test_loss')
test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')
#tf.function
def train_step(images, labels):
with tf.GradientTape() as tape:
predictions = model(images, training=True)
loss = loss_object(labels, predictions)
grad = tape.gradient(loss, model.trainable_variables)
optimizer.apply_gradients(zip(grad, model.trainable_variables))
train_loss(loss)
train_accuracy(labels, predictions)
#tf.function
def test_step(images, labels):
predictions = model(images)
t_loss = loss_object(labels, predictions)
test_loss(t_loss)
test_accuracy(labels, predictions)
epochs = 10
for epoch in range(epochs):
train_loss.reset_states()
train_accuracy.reset_states()
test_loss.reset_states()
test_accuracy.reset_states()
for images, labels in train_dataset:
train_step(images, labels)
for images, labels in test_dataset:
test_step(images, labels)
template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'
print(template.format(epoch + 1,
train_loss.result(),
train_accuracy.result() * 100,
test_loss.result(),
test_accuracy.result() * 100))
When i run this code, i get the following error
NotImplementedError: When subclassing the `Model` class, you should implement a `call` method.
I am currently running my code on google colab.
My colab link is https://colab.research.google.com/drive/1sOlbRpPRdyOCJI0zRFfIA-Trj1vrIbWY?usp=sharing
My tensorflow version on colab is 2.2.0
Also, when i tried to predict labels from untrained model by this code :-
print(model(train_images))
This also gives me the same error.
The error is saying that i have not implemented the call method on model.
but, i have defined the call method.
I also tried by changing the call method to __call__ method.
But still, it gives me the same error.
Thanks in advance :-
The problem is with indentation. You've defined call method inside __init__. Try defining it outside the __init__ method as follows:
class cifar10Model(keras.Model):
def __init__(self):
super(cifar10Model, self).__init__()
self.conv1 = keras.layers.Conv3D(32, 3, activation='relu', input_shape=(32, 32, 3))
self.pool1 = keras.layers.MaxPool3D((3, 3, 3))
self.batch_norm1 = keras.layers.BatchNormalization()
self.dropout1 = keras.layers.Dropout(0.1)
self.conv2 = keras.layers.Conv3D(64, 3, activation='relu')
self.pool2 = keras.layers.MaxPool3D((3, 3, 3))
self.batch_norm2 = keras.layers.BatchNormalization()
self.dropout2 = keras.layers.Dropout(0.2)
self.conv3 = keras.layers.Conv3D(128, 3, activation='relu')
self.pool3 = keras.layers.MaxPool3D((3, 3, 3))
self.batch_norm3 = keras.layers.BatchNormalization()
self.dropout3 = keras.layers.Dropout(0.3)
self.flatten = keras.layers.Flatten()
self.dense1 = keras.layers.Dense(128, activation='relu')
self.dense2 = keras.layers.Dense(10)
def call(self, x):
x = self.conv1(x)
x = self.pool1(x)
x = self.batch_norm1(X)
x = self.dropout1(x)
x = self.conv2(x)
x = self.pool2(x)
x = self.batch_norm2(X)
x = self.dropout2(x)
x = self.conv3(x)
x = self.pool3(x)
x = self.batch_norm3(X)
x = self.dropout3(x)
x = self.flatten(x)
x = self.dense1(x)
return self.dense2(x)
model = cifar10Model()
Hope this helps.

Categories