I am working on generalizing the inputs to the sample variational autoencoder in the Keras repository, but seem to have made some elementary mistakes. In particular, only certain batch sizes work for the model below:
from keras.layers import Lambda, Input, Dense, Reshape
from keras.models import Model
from keras.losses import mse
from keras import backend as K
import numpy as np
# reparameterization trick
# instead of sampling from Q(z|X), sample epsilon = N(0,I)
# z = z_mean + sqrt(var) * epsilon
def sampling(args):
z_mean, z_log_var = args
batch = K.shape(z_mean)[0]
dim = K.int_shape(z_mean)[1]
# by default, random_normal has mean = 0 and std = 1.0
epsilon = K.random_normal(shape=(batch, dim))
return z_mean + K.exp(0.5 * z_log_var) * epsilon
# network parameters
original_dim = 45
input_shape = (original_dim, )
intermediate_dim = 512
latent_dim = 2
# VAE model = encoder + decoder
# build encoder model
inputs = Input(shape=input_shape, name='encoder_input')
x = Reshape((original_dim,))(inputs)
x = Dense(intermediate_dim, activation='relu')(x)
z_mean = Dense(latent_dim, name='z_mean')(x)
z_log_var = Dense(latent_dim, name='z_log_var')(x)
z = Lambda(sampling, output_shape=(latent_dim,), name='z')([z_mean, z_log_var])
encoder = Model(inputs, [z_mean, z_log_var, z], name='encoder')
# build decoder model
latent_inputs = Input(shape=(latent_dim,), name='z_sampling')
x = Dense(intermediate_dim, activation='relu')(latent_inputs)
x = Dense(original_dim, activation='sigmoid')(x)
outputs = Reshape(input_shape)(x)
decoder = Model(latent_inputs, outputs, name='decoder')
# instantiate VAE model
outputs = decoder(encoder(inputs)[2])
vae = Model(inputs, outputs, name='vae_mlp')
vae.add_loss(mse(inputs, outputs))
vae.compile(optimizer='adam')
x_train = np.random.rand(1000, 45)
vae.fit(x_train, epochs=100, batch_size=10) # works, while 23 fails
Can anyone help me understand why some batch sizes fail (e.g. 23)? I'd be grateful for any insights others can offer on this question.
You currently have unequal batch sizes if data%batch_size != 0.You can solve your problem by changing your code to:
x_train = np.random.rand(1000, 45)
batch_size = 23
vae.fit(x_train, epochs=100, steps_per_epoch = x_train.size//batch_size)
This results in all batches having the same size, here is the documentation of fit with its attributes.
Related
I'm having trouble understanding why this is throwing an error. This code is pulled directly from the PyTorch documentation for a NN classifier for the fashion MNIST dataset. However when I try to flip this to the MNIST handwritten digits data set it comes up with the following error:
RuntimeError: The size of tensor a (10) must match the size of tensor b (64) at non-singleton dimension 1
This occurs when using the loss function during the training loop function. Can anyone help me understand why this is happening. Thanks!
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor, Lambda
import torchvision.models as models
import matplotlib.pyplot as plt
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#device = "cpu"
print(f"Using {device} device")
training_data = datasets.MNIST(
root="data",
train=True,
download=True,
transform=ToTensor()
)
test_data = datasets.MNIST(
root="data",
train=False,
download=True,
transform=ToTensor()
)
train_dataloader = DataLoader(training_data, batch_size=64)
test_dataloader = DataLoader(test_data, batch_size=64)
class NeuralNetwork(nn.Module):
def __init__(self):
super(NeuralNetwork, self).__init__()
self.flatten = nn.Flatten()
self.linear_relu_stack = nn.Sequential(
nn.Linear(28*28, 512),
nn.ReLU(),
nn.Linear(512, 512),
nn.ReLU(),
nn.Linear(512, 10),
)
def forward(self, x):
x = self.flatten(x)
logits = self.linear_relu_stack(x)
return logits
def train_loop(dataloader, model, loss_fn, optimizer):
size = len(dataloader.dataset)
for batch, (X, y) in enumerate(dataloader):
# Compute prediction and loss
X, y = X.to(device), y.to(device)
pred = model(X)
loss = loss_fn(pred, y)
# Backpropagation
optimizer.zero_grad()
loss.backward()
optimizer.step()
if batch % 100 == 0:
loss, current = loss.item(), batch * len(X)
print(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}]")
def test_loop(dataloader, model, loss_fn):
size = len(dataloader.dataset)
num_batches = len(dataloader)
test_loss, correct = 0, 0
with torch.no_grad():
for X, y in dataloader:
X, y = X.to(device), y.to(device)
pred = model(X)
test_loss += loss_fn(pred, y).item()
correct += (pred.argmax(1) == y).type(torch.float).sum().item()
test_loss /= num_batches
correct /= size
print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
def save_checkpoint(state, filename = "checkpoint.pth.tar"):
print("=> Saving checkpoint")
torch.save(state, filename)
model = NeuralNetwork().to(device)
learning_rate = 1e-3
batch_size = 64
epochs = 10
# Initialize the loss function
loss_fn = nn.MSELoss()
optimiser = torch.optim.Adam(model.parameters(), lr=learning_rate)
for t in range(epochs):
print(f"Epoch {t+1}\n-------------------------------")
train_loop(train_dataloader, model, loss_fn, optimiser)
test_loop(test_dataloader, model, loss_fn)
print("Done!")
torch.nn.MSELoss is an implemention of mean squared error. You can't measure the difference between two tensors if they're different sizes (MSELoss does not allow for broadcasting). So if you're using MSELoss, then the predictions and the targets must be the same shape. In your case, preds is a tensor of shape [64, 10], and y is a tensor of shape [64].
The reason y is of shape [64] rather than [64, 10] is that most classification dataset implementations represent targets as integer labels rather than one-hot encoded vectors. In theory, you could convert these integer label targets to one-hot encoded targets.
But in reality, since this is a classification problem, you should probably be using something like nn.CrossEntropyLoss rather than nn.MSELoss. The former is a conventional classification loss function, and it allows the targets to be integer labels rather than one-hot labels (so just swapping out MSELoss for CrossEntropyLoss should solve your problem). MSELoss is better suited for regression tasks and such.
I'm trying to use Imagenet V2 with transfer-learning for multiclass classification (6 classes), but getting the following error. Can anyone please help?
ValueError: `logits` and `labels` must have the same shape, received ((None, 6) vs (None, 1)).
I borrowed this code from Andrew Ng's CNN course I took a while back but the original code was for binary classification. I tried to modify it for multiclass classification but got this error. Here's my code:
import matplotlib.pyplot as plt
import numpy as np
import os
import tensorflow as tf
import tensorflow.keras.layers as tfl
import datetime
from tensorflow.keras.preprocessing import image_dataset_from_directory
from tensorflow.keras.layers.experimental.preprocessing import RandomFlip, RandomRotation
BATCH_SIZE = 16
IMG_SIZE = (160, 160)
training_directory = "/content/drive/MyDrive/Microscopy Data/04112028_multiclass_maiden/Training/Actin"
validation_directory = "/content/drive/MyDrive/Microscopy Data/04112028_multiclass_maiden/Validation/Actin"
train_dataset = image_dataset_from_directory(training_directory,
shuffle=True,
batch_size=BATCH_SIZE,
image_size=IMG_SIZE,
seed=42)
validation_dataset = image_dataset_from_directory(validation_directory,
shuffle=True,
batch_size=BATCH_SIZE,
image_size=IMG_SIZE,
seed=42)
Output:
Found 600 files belonging to 6 classes.
Found 600 files belonging to 6 classes.
Code Continued...
class_names = train_dataset.class_names
AUTOTUNE = tf.data.experimental.AUTOTUNE
train_dataset = train_dataset.prefetch(buffer_size=AUTOTUNE)
preprocess_input = tf.keras.applications.mobilenet_v2.preprocess_input
IMG_SHAPE = IMG_SIZE + (3,)
base_model = tf.keras.applications.MobileNetV2(input_shape=IMG_SHAPE,
include_top=True,
weights='imagenet')
def huvec_model (image_shape=IMG_SIZE, data_augmentation=data_augmenter()):
''' Define a tf.keras model for binary classification out of the MobileNetV2 model
Arguments:
image_shape -- Image width and height
data_augmentation -- data augmentation function
Returns:
Returns:
tf.keras.model
'''
input_shape = image_shape + (3,)
# Freeze the base model by making it non trainable
# base_model.trainable = None
# create the input layer (Same as the imageNetv2 input size)
# inputs = tf.keras.Input(shape=None)
# apply data augmentation to the inputs
# x = None
# data preprocessing using the same weights the model was trained on
# x = preprocess_input(None)
# set training to False to avoid keeping track of statistics in the batch norm layer
# x = base_model(None, training=None)
# Add the new Binary classification layers
# use global avg pooling to summarize the info in each channel
# x = None()(x)
#include dropout with probability of 0.2 to avoid overfitting
# x = None(None)(x)
# create a prediction layer with one neuron (as a classifier only needs one)
# prediction_layer = None
base_model = tf.keras.applications.MobileNetV2(input_shape=IMG_SHAPE,
include_top=False,
weights='imagenet')
base_model.trainable = False
inputs = tf.keras.Input(shape=input_shape)
x = data_augmentation(inputs)
x = preprocess_input(x)
x = base_model(x, training=False)
x = tf.keras.layers.GlobalAveragePooling2D()(x)
x = tfl.Dropout(.2)(x)
prediction_layer = tf.keras.layers.Dense(units = len(class_names), activation='softmax')
# YOUR CODE ENDS HERE
outputs = prediction_layer(x)
model = tf.keras.Model(inputs, outputs)
return model
model2 = huvec_model(IMG_SIZE)
base_model.trainable = True
# Let's take a look to see how many layers are in the base model
print("Number of layers in the base model: ", len(base_model.layers))
# Fine-tune from this layer onwards
fine_tune_at = 120
# Freeze all the layers before the `fine_tune_at` layer
# for layer in base_model.layers[:fine_tune_at]:
# layer.trainable = None
# Define a BinaryCrossentropy loss function. Use from_logits=True
# loss_function=None
# Define an Adam optimizer with a learning rate of 0.1 * base_learning_rate
# optimizer = None
# Use accuracy as evaluation metric
# metrics=None
base_learning_rate = 0.01
# YOUR CODE STARTS HERE
for layer in base_model.layers[:fine_tune_at]:
layer.trainable = False
loss_function=tf.keras.losses.BinaryCrossentropy(from_logits=True)
optimizer= tf.keras.optimizers.Adam(learning_rate=0.1*base_learning_rate)
metrics=['accuracy']
# YOUR CODE ENDS HERE
model2.compile(loss=loss_function,
optimizer = optimizer,
metrics=metrics)
initial_epochs = 5
history = model2.fit(train_dataset, validation_data=validation_dataset, epochs=initial_epochs)
Looks like you yet have to one-hot-encode your labels, i.e. instead of having number i (between 0 and 5, inclusive) for a label of an image that belongs to the i-th class, which is of shape (None, 1), provide an array of all 0's except a 1 at index i, which is of shape (None, 6). Then labels has the same shape as logits.
It is easy you need to match the logits output or you need to remove softmax or distribution at the end of the model.
Almost correct, I change a bit on un-defined data_augmentation that is working.
It will have the output but the calculation is based on output expectation try to use meanquears you see errors or use class entropy that will provide different behavior.
Somebody told it boosted up accuracy output as they are using Binary cross entropy but not this way, it will highly boosted up when using with sequences of binary see the example of ALE games ( Street Fighters )
[ Sample ]:
import os
from os.path import exists
import tensorflow as tf
import matplotlib.pyplot as plt
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
Variables
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
BATCH_SIZE = 16
IMG_SIZE = (160, 160)
PATH = 'F:\\datasets\\downloads\\sample\\cats_dogs\\training'
training_directory = os.path.join(PATH, 'train')
validation_directory = os.path.join(PATH, 'validation')
train_dataset = tf.keras.utils.image_dataset_from_directory(training_directory,
shuffle=True,
batch_size=BATCH_SIZE,
image_size=IMG_SIZE,
seed=42)
validation_dataset = tf.keras.utils.image_dataset_from_directory(validation_directory,
shuffle=True,
batch_size=BATCH_SIZE,
image_size=IMG_SIZE,
seed=42)
class_names = train_dataset.class_names
print( "class_names: " + str( class_names ) )
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
Functions
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
def huvec_model (image_shape=IMG_SIZE, data_augmentation = tf.keras.Sequential([ tf.keras.layers.RandomFlip('horizontal'), tf.keras.layers.RandomRotation(0.2), ])):
# def huvec_model (image_shape=IMG_SIZE, data_augmentation=data_augmenter()):
''' Define a tf.keras model for binary classification out of the MobileNetV2 model
Arguments:
image_shape -- Image width and height
data_augmentation -- data augmentation function
Returns:
Returns:
tf.keras.model
'''
input_shape = image_shape + (3,)
base_model = tf.keras.applications.MobileNetV2(input_shape=IMG_SHAPE,
include_top=False,
weights='imagenet')
base_model.trainable = False
inputs = tf.keras.Input(shape=input_shape)
x = data_augmentation(inputs)
x = preprocess_input(x)
x = base_model(x, training=False)
x = tf.keras.layers.GlobalAveragePooling2D()(x)
x = tf.keras.layers.Dropout(.2)(x)
prediction_layer = tf.keras.layers.Dense(units = len(class_names), activation='softmax')
outputs = prediction_layer(x)
model = tf.keras.Model(inputs, outputs)
return model
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
DataSet
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
AUTOTUNE = tf.data.experimental.AUTOTUNE
train_dataset = train_dataset.prefetch(buffer_size=AUTOTUNE)
preprocess_input = tf.keras.applications.mobilenet_v2.preprocess_input
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Model Initialize
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
IMG_SHAPE = IMG_SIZE + (3,)
base_model = tf.keras.applications.MobileNetV2(input_shape=IMG_SHAPE,
include_top=True,
weights='imagenet')
base_model.summary()
model2 = huvec_model(IMG_SIZE)
base_model.trainable = True
# Let's take a look to see how many layers are in the base model
print("Number of layers in the base model: ", len(base_model.layers))
# Fine-tune from this layer onwards
fine_tune_at = 120
base_learning_rate = 0.01
for layer in base_model.layers[:fine_tune_at]:
layer.trainable = False
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Optimizer
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
optimizer = tf.keras.optimizers.Adam(learning_rate=0.1*base_learning_rate)
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Loss Fn
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
lossfn = tf.keras.losses.BinaryCrossentropy(from_logits=False)
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Model Summary
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
model2.compile(optimizer=optimizer, loss=lossfn, metrics=[ 'accuracy' ])
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Training
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
history = model2.fit(train_dataset, validation_data=validation_dataset, epochs=5)
input('...')
[ Output ]
...
Found the bug:
I had to redefine loss in the model compiler as loss='sparse_categorical_crossentropy' which was initially defined as loss=tf.keras.losses.BinaryCrossentropy(from_logits=True).
Refer to the SO thread Changing Keras Model from Binary Classification to Multi-classification for more details.
I'm trying to incorporate a RelativePositionEmbedding layer into a transformer example. The embedding layer can be found in the build_model method below:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from official.nlp.modeling.layers import position_embedding
def readucr(filename):
data = np.loadtxt(filename, delimiter="\t")
y = data[:, 0]
x = data[:, 1:]
return x, y.astype(int)
root_url = "https://raw.githubusercontent.com/hfawaz/cd-diagram/master/FordA/"
x_train, y_train = readucr(root_url + "FordA_TRAIN.tsv")
x_test, y_test = readucr(root_url + "FordA_TEST.tsv")
x_train = x_train.reshape((x_train.shape[0], x_train.shape[1], 1))
x_test = x_test.reshape((x_test.shape[0], x_test.shape[1], 1))
n_classes = len(np.unique(y_train))
idx = np.random.permutation(len(x_train))
x_train = x_train[idx]
y_train = y_train[idx]
y_train[y_train == -1] = 0
y_test[y_test == -1] = 0
# Build model
def transformer_encoder(inputs, head_size, num_heads, ff_dim, dropout=0):
# Attention and Normalization
x = layers.MultiHeadAttention(
key_dim=head_size, num_heads=num_heads, dropout=dropout
)(inputs, inputs)
x = layers.Dropout(dropout)(x)
x = layers.LayerNormalization(epsilon=1e-6)(x)
res = x + inputs
# Feed Forward Part
x = layers.Conv1D(filters=ff_dim, kernel_size=1, activation="relu")(res)
x = layers.Dropout(dropout)(x)
x = layers.Conv1D(filters=inputs.shape[-1], kernel_size=1)(x)
x = layers.LayerNormalization(epsilon=1e-6)(x)
return x + res
def build_model(
input_shape,
head_size,
num_heads,
ff_dim,
num_transformer_blocks,
mlp_units,
dropout=0,
mlp_dropout=0
):
inputs = keras.Input(shape=input_shape)
x = inputs # => shape is (None, 500, 1)
x = position_embedding.RelativePositionEmbedding(hidden_size=500)(x) # Now (500, 500)
# Add batch dimension back. But how to accept batch size greater than 1?
x = layers.Lambda(lambda x: tf.expand_dims(x, axis=0))(x) # Now (1, 500, 500)
for _ in range(num_transformer_blocks):
x = transformer_encoder(x, head_size, num_heads, ff_dim, dropout)
x = layers.GlobalAveragePooling1D(data_format="channels_first")(x)
for dim in mlp_units:
x = layers.Dense(dim, activation="relu")(x)
x = layers.Dropout(mlp_dropout)(x)
outputs = layers.Dense(n_classes, activation="softmax")(x)
return keras.Model(inputs, outputs)
input_shape = x_train.shape[1:]
model = build_model(
input_shape,
head_size=256,
num_heads=4,
ff_dim=4,
num_transformer_blocks=4,
mlp_units=[128],
mlp_dropout=0.4,
dropout=0.25
)
model.compile(
loss="sparse_categorical_crossentropy",
optimizer=keras.optimizers.Adam(learning_rate=1e-4),
metrics=["sparse_categorical_accuracy"]
)
callbacks = [
keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True),
keras.callbacks.TensorBoard(log_dir="./logs")
]
model.fit(
x_train,
y_train,
validation_split=0.2,
epochs=5,
batch_size=64,
callbacks=callbacks
)
model.evaluate(x_test, y_test, verbose=1)
The following blows up because I've specified batch_size of 64. However everything works fine when setting batch_size to 1 because the expand_dims operation only adds a size 1 batch dimension, as opposed to an Input layer that adds None for arbitrary batch sizes.
So how can I add "back in" a batch dimension greater than 1? Is there another way I should be using the RelativePositionEncoding layer to not interfere with batch sizes?
I've tried looking into the Reshape method as well without success.
I thought this question would solve my issue, but this only adds a leading 1 dimension like the Lambda layer I incorporated, rather than None, which I think would resolve the issue.
I do not think you can pass the output of the RelativePositionEmbedding directly to another layer. If you take a look here, the authors are adding the output of this layer to the original input. Your code will work if you change your model like this:
# ....
# Your code
def build_model(
input_shape,
head_size,
num_heads,
ff_dim,
num_transformer_blocks,
mlp_units,
dropout=0,
mlp_dropout=0
):
inputs = keras.Input(shape=input_shape)
x = inputs # => shape is (None, 500, 1)
pos_encoding = position_embedding.RelativePositionEmbedding(hidden_size=500)(x) # Now (500, 500)
x = inputs + pos_encoding
for _ in range(num_transformer_blocks):
x = transformer_encoder(x, head_size, num_heads, ff_dim, dropout)
x = layers.GlobalAveragePooling1D(data_format="channels_first")(x)
for dim in mlp_units:
x = layers.Dense(dim, activation="relu")(x)
x = layers.Dropout(mlp_dropout)(x)
outputs = layers.Dense(n_classes, activation="softmax")(x)
return keras.Model(inputs, outputs)
# ....
# Your code
45/45 [==============================] - 54s 1s/step - loss: 1.0281 - sparse_categorical_accuracy: 0.5111 - val_loss: 0.7387 - val_sparse_categorical_accuracy: 0.5645
42/42 [==============================] - 8s 187ms/step - loss: 0.7440 - sparse_categorical_accuracy: 0.5424
[0.7440475225448608, 0.5424242615699768]
I'm trying to create variational autoencoder and that means I need custom loss function. The problem is that inside loss function I have 2 different losses - mse and divergence. And mse is Tensor and divergence is KerasTensor ( because of dispersion and mu, I get out from encoder ). And I get such errors:
TypeError: Cannot convert a symbolic Keras input/output to a numpy
array. This error may indicate that you're trying to pass a symbolic
value to a NumPy call, which is not supported. Or, you may be trying
to pass Keras symbolic inputs/outputs to a TF API that does not
register dispatching, preventing Keras from automatically converting
the API call to a lambda layer in the Functional Model.
So here is my architecture:
import tensorflow.keras as keras
from keras.layers import Input, Dense, Flatten, Reshape
from keras.layers import Conv2D, MaxPooling2D, UpSampling2D, Conv2DTranspose
from keras.models import Model
import tensorflow as tf
import keras.backend as K
encoded_dim = 2
class Sampling(keras.layers.Layer):
"""Uses (z_mean, z_log_var) to sample z, the vector encoding a digit."""
def call(self, inputs):
z_mean, z_log_var = inputs
batch = tf.shape(z_mean)[0]
dim = tf.shape(z_mean)[1]
epsilon = K.random_normal(shape=(batch, dim))
return z_mean + tf.exp(0.5 * z_log_var) * epsilon
img = Input((28,28,1), name='img')
x = Conv2D(32, (3,3), padding='same', activation='relu')(img)
x = MaxPooling2D()(x)
x = Conv2D(64, (3,3), padding='same', activation='relu')(x)
x = MaxPooling2D()(x)
x = Flatten()(x)
x = Dense(16, activation='relu')(x)
mu = Dense(encoded_dim, name='mu')(x)
sigma = Dense(encoded_dim, name='sigma')(x)
z = Sampling()([mu,sigma])
# print(mu)
xx = Input((encoded_dim,))
x = Dense(7*7*64, activation='relu')(xx)
x = Reshape((7,7,64))(x)
x = Conv2DTranspose(64, 3, activation="relu", strides=2, padding="same")(x)
x = Conv2DTranspose(32, 3, activation="relu", strides=2, padding="same")(x)
out = Conv2DTranspose(1, 3, activation="sigmoid", padding="same")(x)
encoder = Model(img,z, name='encoder')
decoder = Model(xx,out,name='decoder')
autoencoder = Model(img,decoder(encoder(img)),name='autoencoder')
And the loss function:
def vae_loss(x, y):
loss = tf.reduce_mean(K.square(x-y))
kl_loss = -0.5 * tf.reduce_mean(1 + sigma - tf.square(mu) - tf.exp(sigma))
print(type(loss))
print(type(kl_loss))
return loss + kl_loss
autoencoder.compile(optimizer='adam',
loss = vae_loss)
autoencoder.fit(train,train,
epochs=1,
batch_size=60,
shuffle=True,
verbose = 2)
Types of loss and lk_loss:
class 'tensorflow.python.framework.ops.Tensor'
class 'tensorflow.python.keras.engine.keras_tensor.KerasTensor'
you need to pass mu and sigma to your loss function. vae_loss is now accepting 4 inputs:
def vae_loss(x, y, mu, sigma):
loss = tf.reduce_mean(K.square(x-y))
kl_loss = -0.5 * tf.reduce_mean(1 + sigma - tf.square(mu) - tf.exp(sigma))
return loss + kl_loss
you can use it inside your model simply using autoencoder.add_loss.
It's important also that encoder returns not only z but also mu and sigma.
z, mu, sigma = encoder(img)
out = decoder(z)
autoencoder = Model(img, out, name='autoencoder')
autoencoder.add_loss(vae_loss(img, out, mu, sigma)) # <======= add_loss
autoencoder.compile(loss=None, optimizer='adam')
here the running notebook https://colab.research.google.com/drive/1r5lMZ2Dc_Lq4KJDqiirXla1qfDVmdwxW?usp=sharing
I am a beginner in Keras programming. I just want to manually update the model weights manually in keras so as to get a deep understanding of gradient descent. However, when I tried it, the model either cannot get converged or the loss even gets exploded. My steps are listed as follows:
First, I use keras sequential model to fit a quadratic function y = 2*x*x - 7*x + 11
below is the code using the sequential model:
model = Sequential()
model.add(Dense(64, input_dim = 1, activation = 'relu'))
model.add(Dense(32, activation = 'relu'))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
model.summary()
training loss
fitted curved and original one
Then, I use the following code to update the weight manually:
class MyModel(keras.Model):
def __init__(self):
super().__init__()
self.layer1 = Dense(64, input_shape = (1, ))
self.layer2 = Dense(32)
self.layer3 = Dense(1)
def forward(self, x):
y = keras.activations.relu(self.layer1(x))
y = keras.activations.relu(self.layer2(y))
y = self.layer3(y)
return y
def loss_fun(y_pred, y):
return keras_backend.mean(keras.losses.mean_squared_error(y, y_pred))
def compute_loss(model, x, y, loss_fun = loss_fun):
logits = model.forward(x)
mse = loss_fun(y, logits)
return mse, logits
def compute_gradients(model, x, y, loss_fun = loss_fun):
with tf.GradientTape() as tape:
loss, _ = compute_loss(model, x, y, loss_fun)
return tape.gradient(loss, model.trainable_variables), loss
def apply_gradients(optimizer, gradients, variables):
optimizer.apply_gradients(zip(gradients, variables))
def train_batch(x, y, model, optimizer):
'''
one step batch training
'''
gradients, loss = compute_gradients(model, x, y)
apply_gradients(optimizer, gradients, model.trainable_variables)
return loss
model2 = MyModel()
epochs = 200
optimizer = keras.optimizers.Adam(learning_rate = 0.01) #据查这个0.01是keras默认的learning rate
loss = []
x_train = np.expand_dims(x_train, axis = 0)
y_train = np.expand_dims(y_train, axis = 0)
for i in range(epochs):
l = train_batch(x_train, y_train, model2, optimizer)
loss.append(l)
if i % 10 == 0:
print(f'current loss = {l}')
while the loss looks like this:
I also try another way to manually update the weights:
epochs = 200
lr = 0.01
optimizer = keras.optimizers.Adam(learning_rate = 0.01)
loss = []
x_train = np.expand_dims(x_train, axis = 0)
y_train = np.expand_dims(y_train, axis = 0)
x_train = tf.cast(x_train, tf.float32)
y_train = tf.cast(y_train, tf.float32)
for i in range(epochs):
y_pred = model5.forward(x_train)
l = k.mean(keras.losses.mean_squared_error(y_train, y_pred))
gradient = k.gradients(l, model5.trainable_weights)
new_weights = model5.get_weights() - 0.001 * np.array(gradients)
model5.set_weights(new_weights)
if i % 10 == 0:
loss.append(l)
print(f'{i}th loss is: {l}')
In this case, the loss explodes like this:
where is the problem?
I have figure out where the problem is.
When getting the model through the following code:
model = MyModel()
The trainable variables in model are null.
When I try to print them using this:
print(model.trainable_variables)
it outputs
[]
I try to make the weight trainable manually by the following code:
for layers in model.layers:
layers.trainable = True
But it still doesn't work at all.