Related
I have a pytorch-lightning model that has a dense layer like so:
def __init__(...)
...
self.dense = nn.Linear(channels[-1], 64, bias=True)
...
for my project, I need to get the activation values of this layer as a list
I have tried this code which I found on the pytorch discussion forum:
activation = {}
def get_activation(name):
def hook(model, input, output):
activation[name] = output.detach()
return hook
test_img = cv.imread(f'digimage/100.jpg')
test_img = cv.resize(test_img, (128, 128))
test_img = np.moveaxis(test_img, 2, 0)
modelftr = load_feature_model(**model_dict)
num_ftrs = modelftr.fc.in_features
modelftr.fc = torch.nn.Linear(num_ftrs, 228)
modelftr.load_state_dict(torch.load('...'))
modelftr.dense.register_forward_hook(get_activation('dense'))
with torch.no_grad():
modelatt.to('cpu')
modelatt.eval()
test_img = torch.tensor(test_img).view(-1, 3, 128, 128).float()
output = modelcat(test_img)
print(activation['dense'])
But this gives a keyerror:
8 test_img = torch.tensor(test_img).view(-1, 3, 128, 128).float()
9 output = modelcat(test_img)
---> 10 print(activation['dense'])
KeyError: 'dense'
Update:
This is my full model code.
As you can see there is a linear layer named dense
class FAtNet(pl.LightningModule):
def __init__(self, image_size, in_channels, num_blocks, channels,
num_classes=20, block_types=['C', 'C', 'T', 'T'], lr=0.0001, loss_function=nn.CrossEntropyLoss()):
super().__init__()
self.lr = lr
self.loss_function = loss_function
ih, iw = image_size
block = {'C': MBConv, 'T': Transformer}
self.s0 = self._make_layer(
conv_3x3_bn, in_channels, channels[0], num_blocks[0], (ih // 2, iw // 2))
self.s1 = self._make_layer(
block[block_types[0]], channels[0], channels[1], num_blocks[1], (ih // 4, iw // 4))
self.s2 = self._make_layer(
block[block_types[1]], channels[1], channels[2], num_blocks[2], (ih // 8, iw // 8))
self.s3 = self._make_layer(
block[block_types[2]], channels[2], channels[3], num_blocks[3], (ih // 16, iw // 16))
self.s4 = self._make_layer(
block[block_types[3]], channels[3], channels[4], num_blocks[4], (ih // 32, iw // 32))
self.pool = nn.AvgPool2d(ih // 32, 1)
self.dense = nn.Linear(channels[-1], 64, bias=True)
self.fc = nn.Linear(64, num_classes, bias=False)
def forward(self, x):
x = self.s0(x)
x = self.s1(x)
x = self.s2(x)
x = self.s3(x)
x = self.s4(x)
x = self.pool(x).view(-1, x.shape[1])
x = self.dense(x)
x = self.fc(x)
return x
def _make_layer(self, block, inp, oup, depth, image_size):
layers = nn.ModuleList([])
for i in range(depth):
if i == 0:
layers.append(block(inp, oup, image_size, downsample=True))
else:
layers.append(block(oup, oup, image_size))
return nn.Sequential(*layers)
def configure_optimizers(self):
return optim.Adam(self.parameters(), lr=self.lr)
def training_step(self, batch, batch_idx):
X, y = batch
y_hat = self(X)
loss = self.loss_function(y_hat, y)
self.log('train_loss', loss)
return loss
def test_step(self, batch, batch_idx):
X, y = batch
y_hat = self(X)
loss = self.loss_function(y_hat, y)
self.log('test_loss', loss)
return loss
### custom prediction function ###
def predict(self, dm):
X_test = dm.X_test
self.eval()
X_test = torch.tensor(X_test).float()
self.to(device='cuda')
pred = []
with torch.no_grad():
for data in X_test:
output = self(data)
pred.append(output)
pred = pred[0].detach()
pred = pred.cpu()
self.to(device='cpu')
self.train()
return pred
def count_parameters(model):
return sum(p.numel() for p in model.parameters() if p.requires_grad)
It seems like you model does not have 'dense' layer, only 'fc'.
Try:
modelftr.fc.register_forward_hook(get_activation('fc'))
I've been experimenting with VAEs for weeks now, trying to get similar results as in a tutorial code, but being unsuccessful in it. The only difference between mine and the tutorial's code is that it compiles the model (demonstration will be below), and runs fit on it, but I run each layer manually. What I've done is set up weight initialization with numpy arrays, just as with epsilons, saved into .npy files, and ran the two models. The results were different (the tutorial's model gives way more smooth results)
As of my understanding of neural networks so far, given the same training data in the same order, with the same layers, with same initial weights, same optimizer, and hyperparameters, the results should be the same all the time.
I've made 3 simple examples. MNIST data with two dense layers with sizes 16, and 10. Weight initialization:
import numpy as np
w1 = np.random.uniform(-1, 1, size=(784, 16))
w2 = np.random.uniform(-1, 1, size=(16, 10))
np.save('w1', w1)
np.save('w2', w2)
First version (manually executing layers):
import numpy as np
import tensorflow as tf
import math
w1 = np.load('w1.npy')
w2 = np.load('w2.npy')
class Model(tf.keras.Model):
def __init__(self):
super().__init__()
self.flat = tf.keras.layers.Flatten()
self.w1 = tf.keras.layers.Dense(
units=16,
activation='relu',
use_bias=False,
kernel_initializer=tf.keras.initializers.constant(w1)
)
self.w2 = tf.keras.layers.Dense(
units=10,
activation='sigmoid',
use_bias=False,
kernel_initializer=tf.keras.initializers.constant(w2)
)
self.optimizer = tf.keras.optimizers.Adam()
#tf.function
def call(self, x, y):
with tf.GradientTape() as tape:
x = self.flat(x)
a = self.w1(x)
y_hat = self.w2(a)
loss = tf.keras.losses.sparse_categorical_crossentropy(y, y_hat)
gradients = tape.gradient(loss, self.trainable_variables)
self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))
return loss
def train(x, y, epochs, batch_size):
batch_quantity = math.floor(x.shape[0] / batch_size)
for e in range(epochs):
for b in range(batch_quantity):
train_x = x[b:b + batch_size]
train_y = y[b:b + batch_size]
loss = model(train_x, train_y)
print(
'Epoch:', e,
'Batch:', b,
'Loss:', loss.numpy().sum(),
)
model = Model()
(x, y), _ = tf.keras.datasets.mnist.load_data()
epochs = 30
batch_size = 100
train(x, y, epochs, batch_size)
This one gives the exact same results no matter how many times you run it. As expected.
Second version (saving layers into a Model - manually calculating gradients, optimization - compile, and fit):
import numpy as np
import tensorflow as tf
w1 = np.load('w1.npy')
w2 = np.load('w2.npy')
input = tf.keras.Input(shape=(28, 28))
x = tf.keras.layers.Flatten()(input)
a = tf.keras.layers.Dense(
units=16,
activation='relu',
use_bias=False,
kernel_initializer=tf.keras.initializers.constant(w1)
)(x)
y_hat = tf.keras.layers.Dense(
units=10,
activation='sigmoid',
use_bias=False,
kernel_initializer=tf.keras.initializers.constant(w2)
)(a)
model = tf.keras.Model(input, y_hat, name='model')
model.summary()
class Model(tf.keras.Model):
def __init__(self, model):
super().__init__()
self.model = model
def train_step(self, data):
x, y = data[0]
with tf.GradientTape() as tape:
y_hat = self.model(x)
loss = tf.keras.losses.sparse_categorical_crossentropy(y, y_hat)
gradients = tape.gradient(loss, self.trainable_variables)
self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))
return {
'loss': tf.reduce_sum(loss)
}
model = Model(model)
train_data, _ = tf.keras.datasets.mnist.load_data()
epochs = 30
batch_size = 100
model.compile(optimizer=tf.keras.optimizers.Adam())
model.fit(train_data, epochs=epochs, batch_size=batch_size shuffle=False)
Third version (saving layers into a Model compile, and fit):
import numpy as np
import tensorflow as tf
w1 = np.load('w1.npy')
w2 = np.load('w2.npy')
model = tf.keras.models.Sequential([
tf.keras.layers.Flatten(input_shape=(28, 28)),
tf.keras.layers.Dense(
units=16,
activation='relu',
use_bias=False,
kernel_initializer=tf.keras.initializers.constant(w1)
),
tf.keras.layers.Dense(
units=10,
activation='sigmoid',
use_bias=False,
kernel_initializer=tf.keras.initializers.constant(w2)
)
])
(x, y), _ = tf.keras.datasets.mnist.load_data()
batch_size = 100
epochs = 30
model.compile(
loss=tf.keras.losses.SparseCategoricalCrossentropy(reduction=tf.keras.losses.Reduction.SUM),
optimizer=tf.keras.optimizers.Adam()
)
model.fit(
x, y, batch_size=batch_size, epochs=epochs, shuffle=False
)
The second, and third versions are giving almost the exact same results, but different than the first. That I can't understand, as the optimizer, the loss function, the loss reduction, initial weights, training data, everything is exactly the same.
If you're curious, the VAE examples, and their differences can be tested.
Initialize weights:
import numpy as np
epsilon = np.random.normal(size=(100, 2))
wc1 = np.random.uniform(-0.022, 0.022, size=(3,3,1,32))
wc2 = np.random.uniform(-0.022, 0.022, size=(3,3,32,64))
wd1 = np.random.uniform(-0.022, 0.022, size=(3136,16))
wm = np.random.uniform(-0.022, 0.022, size=(16,2))
ws = np.random.uniform(-0.022, 0.022, size=(16,2))
wd2 = np.random.uniform(-0.022, 0.022, size=(2,3136))
wct1 = np.random.uniform(-0.022, 0.022, size=(3,3,64,64))
wct2 = np.random.uniform(-0.022, 0.022, size=(3,3,32,64))
wct3 = np.random.uniform(-0.022, 0.022, size=(3,3,1,32))
np.save('epsilon', epsilon)
np.save('wc1', wc1)
np.save('wc2', wc2)
np.save('wd1', wd1)
np.save('wm', wm)
np.save('ws', ws)
np.save('wd2', wd2)
np.save('wct1', wct1)
np.save('wct2', wct2)
np.save('wct3', wct3)
Tutorial's VAE:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
wc1 = np.load('wc1.npy')
wc2 = np.load('wc2.npy')
wd1 = np.load('wd1.npy')
wm = np.load('wm.npy')
ws = np.load('ws.npy')
wd2 = np.load('wd2.npy')
wct1 = np.load('wct1.npy')
wct2 = np.load('wct2.npy')
wct3 = np.load('wct3.npy')
epsilon = np.load('epsilon.npy')
class Sampling(layers.Layer):
"""Uses (z_mean, z_log_var) to sample z, the vector encoding a digit."""
def call(self, inputs):
z_mean, z_log_var = inputs
# batch = tf.shape(z_mean)[0]
# dim = tf.shape(z_mean)[1]
# epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
return z_mean + tf.exp(0.5 * z_log_var) * epsilon
latent_dim = 2
encoder_inputs = keras.Input(shape=(28, 28, 1))
x = layers.Conv2D(32, 3, activation="relu", strides=2, padding="same", use_bias=False, kernel_initializer=tf.keras.initializers.constant(wc1))(encoder_inputs)
x = layers.Conv2D(64, 3, activation="relu", strides=2, padding="same", use_bias=False, kernel_initializer=tf.keras.initializers.constant(wc2))(x)
x = layers.Flatten()(x)
x = layers.Dense(16, activation="relu", use_bias=False, kernel_initializer=tf.keras.initializers.constant(wd1))(x)
z_mean = layers.Dense(latent_dim, name="z_mean", use_bias=False, kernel_initializer=tf.keras.initializers.constant(wm))(x)
z_log_var = layers.Dense(latent_dim, name="z_log_var", use_bias=False, kernel_initializer=tf.keras.initializers.constant(ws))(x)
z = Sampling()([z_mean, z_log_var])
encoder = keras.Model(encoder_inputs, [z_mean, z_log_var, z], name="encoder")
encoder.summary()
latent_inputs = keras.Input(shape=(latent_dim,))
x = layers.Dense(7 * 7 * 64, activation="relu", use_bias=False, kernel_initializer=tf.keras.initializers.constant(wd2))(latent_inputs)
x = layers.Reshape((7, 7, 64))(x)
x = layers.Conv2DTranspose(64, 3, activation="relu", strides=2, padding="same", use_bias=False, kernel_initializer=tf.keras.initializers.constant(wct1))(x)
x = layers.Conv2DTranspose(32, 3, activation="relu", strides=2, padding="same", use_bias=False, kernel_initializer=tf.keras.initializers.constant(wct2))(x)
decoder_outputs = layers.Conv2DTranspose(1, 3, activation="sigmoid", padding="same", use_bias=False, kernel_initializer=tf.keras.initializers.constant(wct3))(x)
decoder = keras.Model(latent_inputs, decoder_outputs, name="decoder")
decoder.summary()
class VAE(keras.Model):
def __init__(self, encoder, decoder, **kwargs):
super(VAE, self).__init__(**kwargs)
self.encoder = encoder
self.decoder = decoder
def train_step(self, data):
if isinstance(data, tuple):
data = data[0]
with tf.GradientTape() as tape:
z_mean, z_log_var, z = encoder(data)
reconstruction = decoder(z)
reconstruction_loss = tf.reduce_mean(
keras.losses.binary_crossentropy(data, reconstruction)
)
reconstruction_loss *= 28 * 28
kl_loss = 1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var)
kl_loss = tf.reduce_mean(kl_loss)
kl_loss *= -0.5
total_loss = reconstruction_loss + kl_loss
grads = tape.gradient(total_loss, self.trainable_weights)
self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
return {
'mean': z_mean,
"loss": total_loss,
"reconstruction_loss": reconstruction_loss,
"kl_loss": kl_loss,
}
(x_train, _), (x_test, _) = keras.datasets.mnist.load_data()
mnist_digits = x_train
mnist_digits = np.expand_dims(mnist_digits, -1).astype("float32") / 255
epochs=30
batch_size=100
vae = VAE(encoder, decoder)
vae.compile(optimizer=keras.optimizers.Adam())
vae.fit(mnist_digits, epochs=epochs, batch_size=batch_size)
import matplotlib.pyplot as plt
def plot_latent(encoder, decoder):
# display a n*n 2D manifold of digits
n = 30
digit_size = 28
scale = 2.0
figsize = 15
figure = np.zeros((digit_size * n, digit_size * n))
# linearly spaced coordinates corresponding to the 2D plot
# of digit classes in the latent space
grid_x = np.linspace(-scale, scale, n)
grid_y = np.linspace(-scale, scale, n)[::-1]
for i, yi in enumerate(grid_y):
for j, xi in enumerate(grid_x):
z_sample = np.array([[xi, yi]])
x_decoded = decoder.predict(z_sample)
digit = x_decoded[0].reshape(digit_size, digit_size)
figure[
i * digit_size : (i + 1) * digit_size,
j * digit_size : (j + 1) * digit_size,
] = digit
plt.figure(figsize=(figsize, figsize))
start_range = digit_size // 2
end_range = n * digit_size + start_range + 1
pixel_range = np.arange(start_range, end_range, digit_size)
sample_range_x = np.round(grid_x, 1)
sample_range_y = np.round(grid_y, 1)
plt.xticks(pixel_range, sample_range_x)
plt.yticks(pixel_range, sample_range_y)
plt.xlabel("z[0]")
plt.ylabel("z[1]")
plt.imshow(figure, cmap="Greys_r")
plt.savefig('trg.png')
plt.close()
plot_latent(encoder, decoder)
def plot_label_clusters(encoder, decoder, data, labels):
# display a 2D plot of the digit classes in the latent space
z_mean, _, _ = encoder.predict(data)
plt.figure(figsize=(12, 10))
plt.scatter(z_mean[:, 0], z_mean[:, 1], c=labels)
plt.colorbar()
plt.xlabel("z[0]")
plt.ylabel("z[1]")
plt.savefig('wer.png')
plt.close()
(x_train, y_train), _ = keras.datasets.mnist.load_data()
x_train = np.expand_dims(x_train, -1).astype("float32") / 255
plot_label_clusters(encoder, decoder, x_train, y_train)
My VAE:
import math
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
batch_size = 100
epochs = 30
(x, y), _ = tf.keras.datasets.mnist.load_data()
x = tf.expand_dims(x.astype("float32") / 255, 3)
wc1 = np.load('wc1.npy')
wc2 = np.load('wc2.npy')
wd1 = np.load('wd1.npy')
wm = np.load('wm.npy')
ws = np.load('ws.npy')
wd2 = np.load('wd2.npy')
wct1 = np.load('wct1.npy')
wct2 = np.load('wct2.npy')
wct3 = np.load('wct3.npy')
epsilon = np.load('epsilon.npy')
class VAE(tf.keras.Model):
def __init__(self):
super().__init__()
self.encoder = [
tf.keras.layers.Conv2D(32, 3, activation="relu", strides=2, padding="same", use_bias=False, kernel_initializer=tf.keras.initializers.constant(wc1)),
tf.keras.layers.Conv2D(64, 3, activation="relu", strides=2, padding="same", use_bias=False, kernel_initializer=tf.keras.initializers.constant(wc2)),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(16, activation="relu", use_bias=False, kernel_initializer=tf.keras.initializers.constant(wd1)),
]
self.wm = tf.keras.layers.Dense(2, use_bias=False, kernel_initializer=tf.keras.initializers.constant(wm))
self.wv = tf.keras.layers.Dense(2, use_bias=False, kernel_initializer=tf.keras.initializers.constant(ws))
self.decoder = [
tf.keras.layers.Dense(7 * 7 * 64, activation="relu", use_bias=False, kernel_initializer=tf.keras.initializers.constant(wd2)),
tf.keras.layers.Reshape((7, 7, 64)),
tf.keras.layers.Conv2DTranspose(64, 3, activation="relu", strides=2, padding="same", use_bias=False, kernel_initializer=tf.keras.initializers.constant(wct1)),
tf.keras.layers.Conv2DTranspose(32, 3, activation="relu", strides=2, padding="same", use_bias=False, kernel_initializer=tf.keras.initializers.constant(wct2)),
tf.keras.layers.Conv2DTranspose(1, 3, activation="sigmoid", padding="same", use_bias=False, kernel_initializer=tf.keras.initializers.constant(wct3)),
]
self.optimizer = tf.keras.optimizers.Adam()
#tf.function
def call(self, x):
with tf.GradientTape() as tape:
z = x
for layer in self.encoder:
z = layer(z)
mean = self.wm(z)
stdev = self.wv(z)
# epsilon = tf.random.normal(mean.shape)
z = mean + tf.exp(0.5 * stdev) * epsilon
y_pred = z
for layer in self.decoder:
y_pred = layer(y_pred)
reconstruction_loss = tf.reduce_mean(
tf.keras.losses.binary_crossentropy(x, y_pred)
)
reconstruction_loss *= 28 * 28
kl_loss = 1 + stdev - tf.square(mean) - tf.exp(stdev)
kl_loss = tf.reduce_mean(kl_loss)
kl_loss *= -0.5
loss = reconstruction_loss + kl_loss
gradients = tape.gradient(loss, self.trainable_weights)
self.optimizer.apply_gradients(zip(gradients, self.trainable_weights))
return loss, reconstruction_loss, kl_loss
model = VAE()
def train(x):
batch_quantity = math.floor(x.shape[0] / batch_size)
for i in range(epochs):
for bi in range(batch_quantity):
train_x = x[bi:bi + batch_size]
loss, reconstruction_loss, kl_loss = model(train_x)
print(
'Epoch:', i,
'Batch:', bi,
'Loss:', loss.numpy(),
'Reconstruction:', reconstruction_loss.numpy(),
'KL:', kl_loss.numpy()
)
train(x)
# display a 2D plot of the digit classes in the latent space
z = x
for layer in model.encoder:
z = layer(z)
mean = model.wm(z)
plt.figure(figsize=(12, 10))
plt.scatter(mean[:, 0], mean[:, 1], c=y)
plt.colorbar()
plt.xlabel("z[0]")
plt.ylabel("z[1]")
plt.savefig('def.png')
plt.close()
n = 30
d = 2.0
s = 28
grid_x = np.linspace(-d, d, n)
grid_y = np.linspace(d, -d, n)
image_width = s*n
image_height = image_width
image = np.zeros((image_height, image_width))
for row, x in enumerate(grid_x):
for col, y in enumerate(grid_y):
z = np.array([[x, y]])
for layer in model.decoder:
z = layer(z)
digit = tf.squeeze(z)
image[row * s: (row + 1) * s,
col * s: (col + 1) * s] = digit.numpy()
plt.figure(figsize=(15, 15))
plt.imshow(image, cmap='Greys_r')
plt.axis('Off')
plt.savefig('asd.png')
plt.close()
I'm currently trying to implement a variational autoencoder but I'm quite stuck, I cannot understand how to use a datagenerator in Keras. What I have so far is:
import keras
import tensorflow as tf
from tensorflow.keras import layers
class Sampling(layers.Layer):
def call(self, inputs):
z_mean, z_log_var = inputs
batch = tf.shape(z_mean)[0]
dim = tf.shape(z_mean)[1]
epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
return z_mean + tf.exp(z_log_var / 2) * epsilon
class factor_vae(keras.Model):
def __init__(self):
super(factor_vae, self).__init__()
self.encoder = self.encoder_factor_vae()
self.decoder = self.decoder_factor_vae()
self.classifier = self.MLP_classifier()
def train_step(self, data):
data = data[0]
with tf.GradientTape() as tape:
z, z_mean, z_log_var = self.encoder(data)
reconstruction = self.decoder(z)
reconstruction_loss = tf.reduce_mean(
keras.losses.mse(data, reconstruction))
reconstruction_loss *= 4096 #denna kan ändras
kl_loss = 1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var)
kl_loss = tf.reduce_mean(kl_loss)
kl_loss *= -0.5
total_loss = reconstruction_loss + (kl_loss)
grads = tape.gradient(total_loss, self.trainable_weights)
self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
return {
"loss": total_loss,
"reconstruction_loss": reconstruction_loss,
"kl_loss": kl_loss,
}
def encoder_factor_vae(self):
x_inp = Input(shape=(64, 64, 1))
z = layers.Conv2D(filters=32, kernel_size=(4, 4), activation="relu", strides=2, padding="same")(x_inp)
z = BatchNormalization()(z)
z = layers.Conv2D(filters=32, kernel_size=(4, 4), activation="relu", strides=2, padding="same")(z)
z = BatchNormalization()(z)
z = layers.Conv2D(filters=64, kernel_size=(4, 4), activation="relu", strides=2, padding="same")(z)
z = BatchNormalization()(z)
z = layers.Conv2D(filters=64, kernel_size=(4, 4), activation="relu", strides=2, padding="same")(z)
z = BatchNormalization()(z)
z = layers.Flatten()(z)
z = Dense(units=128, activation='relu')(z)
z = BatchNormalization()(z)
z_mean = Dense(units=10, activation='relu')(z) # här tror jag samplingen ska ske
z_log_var = Dense(units=10, activation='sigmoid')(z) # bör vara sampling från reparameterizationen
z = Sampling()([z_mean, z_log_var])
encoder = keras.Model(x_inp, [z, z_mean, z_log_var], name="encoder")
encoder.summary()
return encoder
def decoder_factor_vae(self):
z_inp = Input(shape=(10,))
x_rec = Dense(units=128, activation='relu')(z_inp)
x_rec = BatchNormalization()(x_rec)
x_rec = Dense(units=1024, activation='relu')(x_rec) #hit fungerar
x_rec = BatchNormalization()(x_rec)
x_rec = layers.Reshape((4, 4, 64))(x_rec)
x_rec = layers.Conv2DTranspose(filters=64, kernel_size=(4, 4), activation='relu', strides=2, padding='same')(
x_rec)
x_rec = BatchNormalization()(x_rec)
x_rec = layers.Conv2DTranspose(filters=32, kernel_size=(4, 4), activation='relu', strides=2, padding='same')(
x_rec)
x_rec = BatchNormalization()(x_rec)
x_rec = layers.Conv2DTranspose(filters=32, kernel_size=(4, 4), activation='relu', strides=2, padding='same')(
x_rec)
x_rec = BatchNormalization()(x_rec)
x_rec = layers.Conv2DTranspose(filters=1, kernel_size=(4, 4), strides=2, padding='same')(
x_rec)
decoder = keras.Model(z_inp, x_rec, name="decoder") # går att skicka in vilken batchsize som helst
decoder.summary()
return decoder
def MLP_classifier(self):
z_inp = Input(shape=(10,))
x_rec = Dense(units=1000)(z_inp) #1
x_rec = LeakyReLU(alpha=0.3)(x_rec)
x_rec = BatchNormalization()(x_rec)
x_rec = Dense(units=1000)(x_rec) #2
x_rec = LeakyReLU(alpha=0.3)(x_rec)
x_rec = BatchNormalization()(x_rec)
x_rec = Dense(units=1000)(x_rec) # 3
x_rec = LeakyReLU(alpha=0.3)(x_rec)
x_rec = BatchNormalization()(x_rec)
x_rec = Dense(units=1000)(x_rec) # 4
x_rec = LeakyReLU(alpha=0.3)(x_rec)
x_rec = BatchNormalization()(x_rec)
x_rec = Dense(units=1000)(x_rec) # 5
x_rec = LeakyReLU(alpha=0.3)(x_rec)
x_rec = BatchNormalization()(x_rec)
x_rec = Dense(units=2)(x_rec) # 6
classifier = keras.Model(z_inp, x_rec, name="clasifier")
return classifier
def generate_batches(data):
L = 50
start = 0
end = start + L
y_L_real = np.zeros((L, 2))
y_L_fake = np.zeros((L, 2))
y_L_real[:, 0] = 1
y_L_fake[:, 1] = 1
#total_y = np.vstack((y_L_real, y_L_fake))
while True:
x_L_real = data[start:end] #antalet värden är 2xL
x_L_fake = np.roll(x_L_real, shift=2, axis=0)
total_x = np.vstack((x_L_real, x_L_fake))
start += L
end += L
if start >= data.shape[0]:
start = 0
end = L
yield total_x, total_x
data = dsprite()
factor = factor_vae()
xyz = np.load("C:\\Users\\joaki\\OneDrive\\Skrivbord\\images\\dsprites_ndarray_"
"co1sh3sc6or40x32y32_64x64.npz")
test_data = xyz['imgs']
train_steps = 3000
steps_epoch = 300
factor.compile(optimizer=keras.optimizers.Adam(0.001))
train_generator = generate_batches(test_data)
factor.fit_generator(train_generator, steps_per_epoch=steps_epoch, epochs=50)
There is a lot of code, but it does work fine as long as I used my entire dataset, but as soon as I try to use my implemented "train_generator" it breaks down and I get the error message:
NotImplementedError: When subclassing the Model class, you should implement a call method. So I know there is something wrong with my implementation of the train_generator, but I dont understand what I've missed, can someone provide me more information?
Try reading this forum page, seems that you should call method in your class when subclassing:
https://github.com/tensorflow/tensorflow/issues/43173
Although all subclasses of keras.Model must implement call, it is missing in several examples of Keras (see here or here). Under certain conditions, the error 'When subclassing the Model class, you should implement a call method.' is thrown.
I encounter this problem when including a DataGenerator (subclassed from keras.utils.Sequence) and solved it by implementing call() like this:
Autoencoder
...
def call(self, inputs, training=None, mask=None):
z = self.encoder(inputs=inputs, training=training, mask=mask)
return self.decoder(z)
...
GAN
...
def call(self, inputs, training=None, mask=None):
batch_size = tf.shape(inputs)[0]
random_latent_vector = tf.random.normal(shape=(batch_size, self.latent_dim))
x = self.generator(inputs=random_latent_vector, training=training, mask=mask)
if len(x.shape) != len(inputs.shape):
raise Exception(f'Fake signal ({x.shape}) and real signal ({inputs.shape}) do not have same shape dimension')
return self.critic(inputs=x, training=training, mask=mask)
...
It seems to be a known problem (see here)
I am trying to train my custom model on Cifar 10 dataset.
My model's code is below: -
class cifar10Model(keras.Model):
def __init__(self):
super(cifar10Model, self).__init__()
self.conv1 = keras.layers.Conv2D(32, 3, activation='relu', input_shape=(32, 32, 3))
self.pool1 = keras.layers.MaxPool2D((3, 3))
self.batch_norm1 = keras.layers.BatchNormalization()
self.dropout1 = keras.layers.Dropout(0.1)
self.conv2 = keras.layers.Conv2D(64, 3, activation='relu')
self.pool2 = keras.layers.MaxPool2D((3, 3))
self.batch_norm2 = keras.layers.BatchNormalization()
self.dropout2 = keras.layers.Dropout(0.2)
self.conv3 = keras.layers.Conv2D(128, 3, activation='relu')
self.pool3 = keras.layers.MaxPool2D((3, 3))
self.batch_norm3 = keras.layers.BatchNormalization()
self.dropout3 = keras.layers.Dropout(0.3)
self.flatten = keras.layers.Flatten()
self.dense1 = keras.layers.Dense(128, activation='relu')
self.dense2 = keras.layers.Dense(10)
def call(self, x):
x = self.conv1(x)
x = self.pool1(x)
x = self.batch_norm1(X)
x = self.dropout1(x)
x = self.conv2(x)
x = self.pool2(x)
x = self.batch_norm2(X)
x = self.dropout2(x)
x = self.conv3(x)
x = self.pool3(x)
x = self.batch_norm3(x)
x = self.dropout3(x)
x = self.flatten(x)
x = self.dense1(x)
return self.dense2(x)
model = cifar10Model()
When i run this code this gives me no error.
Then i defined my training loop
loss_object = keras.losses.SparseCategoricalCrossentropy(from_logits=True)
optimizer = keras.optimizers.Adam()
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')
test_loss = tf.keras.metrics.Mean(name='test_loss')
test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')
#tf.function
def train_step(images, labels):
with tf.GradientTape() as tape:
predictions = model(images, training=True)
loss = loss_object(labels, predictions)
grad = tape.gradient(loss, model.trainable_variables)
optimizer.apply_gradients(zip(grad, model.trainable_variables))
train_loss(loss)
train_accuracy(labels, predictions)
#tf.function
def test_step(images, labels):
predictions = model(images)
t_loss = loss_object(labels, predictions)
test_loss(t_loss)
test_accuracy(labels, predictions)
epochs = 10
for epoch in range(epochs):
train_loss.reset_states()
train_accuracy.reset_states()
test_loss.reset_states()
test_accuracy.reset_states()
for images, labels in train_dataset:
train_step(images, labels)
for images, labels in test_dataset:
test_step(images, labels)
template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'
print(template.format(epoch + 1,
train_loss.result(),
train_accuracy.result() * 100,
test_loss.result(),
test_accuracy.result() * 100))
When i run this code, i get the following error
NotImplementedError: When subclassing the `Model` class, you should implement a `call` method.
I am currently running my code on google colab.
My colab link is https://colab.research.google.com/drive/1sOlbRpPRdyOCJI0zRFfIA-Trj1vrIbWY?usp=sharing
My tensorflow version on colab is 2.2.0
Also, when i tried to predict labels from untrained model by this code :-
print(model(train_images))
This also gives me the same error.
The error is saying that i have not implemented the call method on model.
but, i have defined the call method.
I also tried by changing the call method to __call__ method.
But still, it gives me the same error.
Thanks in advance :-
The problem is with indentation. You've defined call method inside __init__. Try defining it outside the __init__ method as follows:
class cifar10Model(keras.Model):
def __init__(self):
super(cifar10Model, self).__init__()
self.conv1 = keras.layers.Conv3D(32, 3, activation='relu', input_shape=(32, 32, 3))
self.pool1 = keras.layers.MaxPool3D((3, 3, 3))
self.batch_norm1 = keras.layers.BatchNormalization()
self.dropout1 = keras.layers.Dropout(0.1)
self.conv2 = keras.layers.Conv3D(64, 3, activation='relu')
self.pool2 = keras.layers.MaxPool3D((3, 3, 3))
self.batch_norm2 = keras.layers.BatchNormalization()
self.dropout2 = keras.layers.Dropout(0.2)
self.conv3 = keras.layers.Conv3D(128, 3, activation='relu')
self.pool3 = keras.layers.MaxPool3D((3, 3, 3))
self.batch_norm3 = keras.layers.BatchNormalization()
self.dropout3 = keras.layers.Dropout(0.3)
self.flatten = keras.layers.Flatten()
self.dense1 = keras.layers.Dense(128, activation='relu')
self.dense2 = keras.layers.Dense(10)
def call(self, x):
x = self.conv1(x)
x = self.pool1(x)
x = self.batch_norm1(X)
x = self.dropout1(x)
x = self.conv2(x)
x = self.pool2(x)
x = self.batch_norm2(X)
x = self.dropout2(x)
x = self.conv3(x)
x = self.pool3(x)
x = self.batch_norm3(X)
x = self.dropout3(x)
x = self.flatten(x)
x = self.dense1(x)
return self.dense2(x)
model = cifar10Model()
Hope this helps.
I am trying to understand how Keras custom layers works, but I am facing a problem with the validation accuracy of my model.
I tried to reproduce a simple convolutional network on MNIST dataset but with a custom layer combining the Conv2D operator and the BatchNormalisation.
First, the data I used :
from keras.datasets import mnist
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train = np.array([x.reshape(28, 28, 1) for x in X_train])
X_test = np.array([x.reshape(28, 28, 1) for x in X_test])
y_train = pd.get_dummies(y_train)
y_test = pd.get_dummies(y_test)
Here is the original implementation which works well :
def get_model():
input_ = Input(shape=(28, 28, 1))
x = Conv2D(filters=64, kernel_size=3, activation="relu", input_shape=(28,28,1))(input_)
x = BatchNormalization()(x)
x = MaxPool2D(pool_size=(2,2))(x)
x = Conv2D(filters=128, kernel_size=3, activation="relu")(input_)
x = BatchNormalization()(x)
x = MaxPool2D(pool_size=(2,2))(x)
x = Conv2D(filters=256, kernel_size=3, activation="relu")(input_)
x = BatchNormalization()(x)
x = MaxPool2D(pool_size=(2,2))(x)
x = Flatten()(x)
x = Dense(128, activation="relu")(x)
x = Dense(64, activation="relu")(x)
x = Dense(10, activation="softmax")(x)
mod = Model(inputs=input_, outputs=x)
return mod
optim = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, clipvalue=K.epsilon())
model = get_model()
model.compile(optimizer=optim, loss='categorical_crossentropy', metrics=["accuracy"])
model.fit(X_train, y_train, batch_size=128, epochs=3, validation_data=(X_test, y_test))
With this initial model, after 3 epochs, I get a train accuracy of 97% and validation 97%
And here is my custom layer :
class Conv2DLayer(Layer):
def __init__(self, filters, kernel_size, dropout_ratio=None, strides=(1, 1), activation="relu", use_bn=True, *args, **kwargs):
self._filters = filters
self._kernel_size = kernel_size
self._dropout_ratio = dropout_ratio
self._strides = strides
self.use_bn = use_bn
self._activation = activation
self._args = args
self._kwargs = kwargs
super(Conv2DLayer, self).__init__(*args, **kwargs)
def build(self, input_shape):
self.conv = Conv2D(self._filters,
kernel_size=self._kernel_size,
activation=self._activation,
strides=self._strides,
input_shape=input_shape,
*self._args,
**self._kwargs)
self.conv.build(input_shape)
self.out_conv_shape = self.conv.compute_output_shape(input_shape)
self._trainable_weights = self.conv._trainable_weights
self._non_trainable_weights = self.conv._non_trainable_weights
if self.use_bn:
self.bn = BatchNormalization()
self.bn.build(self.out_conv_shape)
self._trainable_weights.extend(self.bn._trainable_weights)
self._non_trainable_weights.extend(self.bn._non_trainable_weights)
if self._dropout_ratio is not None:
self.dropout = Dropout(rate=self._dropout_ratio)
self.dropout.build(self.out_conv_shape)
self._trainable_weights.extend(self.dropout._trainable_weights)
self._non_trainable_weights.extend(self.dropout._non_trainable_weights)
super(Conv2DLayer, self).build(input_shape)
def call(self, inputs):
x = self.conv(inputs)
if self.use_bn:
x = self.bn(x)
if self._dropout_ratio is not None:
x = self.dropout(x)
return x
def compute_output_shape(self, input_shape):
return self.out_conv_shape
Finally, here is the modified model :
def get_model():
input_ = Input(shape=(28, 28, 1))
x = Conv2DLayer(filters=64, kernel_size=3, activation="relu")(input_)
x = MaxPool2D(pool_size=(2,2))(x)
x = Conv2DLayer(filters=128, kernel_size=3, activation="relu")(input_)
x = MaxPool2D(pool_size=(2,2))(x)
x = Conv2DLayer(filters=256, kernel_size=3, activation="relu")(input_)
x = MaxPool2D(pool_size=(2,2))(x)
x = Flatten()(x)
x = Dense(128, activation="relu")(x)
x = Dense(64, activation="relu")(x)
x = Dense(10, activation="softmax")(x)
mod = Model(inputs=input_, outputs=x)
return mod
For this model with custom layer, I managed to get the same train accuracy (97%), but the validation accuracy get stuck around 50%.
EDIT
Thanks to Matias Valdenegro, I achieved to solve the problem by modifying the call method :
def call(self, inputs):
training = K.learning_phase()
x = self.conv(inputs)
if self.use_bn:
x = self.bn(x, training=training)
if self._dropout_ratio is not None:
x = self.dropout(x, training=training)
return x
With K the keras.backend module.
Both Dropout and Batch Normalization behave differently during training and testing/inference, and your layer does not have any of that behavior, so its using those inner layers as training mode during inference, producing incorrect results.
I am not sure but I think you can fix this by passing the training parameter in the call function call through the layers, something like:
def call(self, inputs, training=None):
x = self.conv(inputs)
if self.use_bn:
x = self.bn(x, training=training)
if self._dropout_ratio is not None:
x = self.dropout(x, training=training)
return x
This should make the inner layers work differently during train and testing/inference phases.