Join metrics of every output in Keras (in multiple output) - python

I am working on a multiple output model in Keras. I've implemented two custom metrics, auroc and auprc, that are passed to the compile methods of Keras model:
def auc(y_true, y_pred, curve='PR'):
score, up_opt = tf.compat.v1.metrics.auc(y_true, y_pred, curve=curve, summation_method="careful_interpolation")
K.get_session().run(tf.local_variables_initializer())
with tf.control_dependencies([up_opt]):
score = tf.identity(score)
return score
def auprc(y_true, y_pred):
return auc(y_true, y_pred, curve='PR')
def auroc(y_true, y_pred):
return auc(y_true, y_pred, curve='ROC')
mlp_model.compile(loss=...,
optimizer=...,
metrics=[auprc, auroc])
Using this method, I obtain an auprc/auroc values for every output but, to optimize my hyperparameters with a Bayesian optimizator, I need a single metrics (e.g: the average or the sum of auprc for every output). I can't figure out how I can join my metrics in a single one.
EDIT: here an example of desired results
Now for every epochs the following metrics are printed:
out1_auprc: 0.0267 - out2_auprc: 0.0277 - out3_auprc: 0.0294
where out1, out2, out3 are my neural network outputs, I desire to obtain something like:
average_auprc: 0.0279 - out1_auprc: 0.0267 - out2_auprc: 0.0277 - out3_auprc: 0.0294
I am using Keras Tuner for Bayesian Optimization.
Any help is appreciated, thank you.

I override the problem creating a custom callback
class MergeMetrics(Callback):
def __init__(self,**kargs):
super(MergeMetrics,self).__init__(**kargs)
def on_epoch_begin(self,epoch, logs={}):
return
def on_epoch_end(self, epoch, logs={}):
logs['merge_metrics'] = 0.5*logs["y1_mse"]+0.5*logs["y2_mse"]
I use this callback to merge 2 metrics coming from 2 different outputs. I use a simple problem for example but you can integrate it easily in your problem and integrate it with a validation set
this is the dummy example
X = np.random.uniform(0,1, (1000,10))
y1 = np.random.uniform(0,1, 1000)
y2 = np.random.uniform(0,1, 1000)
inp = Input((10))
x = Dense(32, activation='relu')(inp)
out1 = Dense(1, name='y1')(x)
out2 = Dense(1, name='y2')(x)
m = Model(inp, [out1,out2])
m.compile('adam','mae', metrics='mse')
checkpoint = MergeMetrics()
m.fit(X, [y1,y2], epochs=10, callbacks=[checkpoint])
the printed output
loss: ..... y1_mse: 0.0863 - y2_mse: 0.0875 - merge_metrics: 0.0869

Related

Model does not train properly when explicitly applying the gradients

I’m trying to constrain the weight of my model by explicitly applying the gradients; shower, this is not working and I can’t figure out why.
I’m defining the model with the following function:
def init_model(num_hidden_layers=2, num_neurons_per_layer=64):
model = tf.keras.Sequential()
model.add(tf.keras.Input(shape=(2,)) )
for _ in range(num_hidden_layers):
model.add(tf.keras.layers.Dense(num_neurons_per_layer, activation=tf.keras.layers.LeakyReLU( ),kernel_initializer="glorot_uniform") )
model.add(tf.keras.layers.Dense(1,kernel_initializer="glorot_uniform"))
return model
When using the fit method, the loss function decreases and the model fits the data:
Nepochs = 1500
lr = 0.001
def my_loss(u_true, u_pred):
return tf.math.reduce_mean(tf.math.square(u_true - u_pred))
model_0 = init_model(num_hidden_layers=2, num_neurons_per_layer=64)
optim_0 = tf.keras.optimizers.Adam(learning_rate=lr)
model_0.compile(loss=my_loss, optimizer=optim_0)
model_0.summary()
history_0 = model_0.fit(X_train,u_train,validation_data=(X_test.numpy(),u_test.numpy()),epochs=Nepochs, batch_size=X_train.shape[0])
When I explicitly specify and apply the gradient, the loss function stagnates and the output does not fit the data (it is uniform everywhere):
Nepochs = 1500
lr = 0.001
def compute_loss(model, X_data, u_data):
u_pred = model(X_data)
loss = tf.math.reduce_mean(tf.math.square(u_data - u_pred))
return loss
#tf.function
def training(model, optim, X_train, u_train, X_test=None, u_test=None):
if X_test is not None:
validation_loss = compute_loss(model, X_test, u_test )
else:
validation_loss = None
with tf.GradientTape(persistent=True) as tape:
tape.watch(model.trainable_variables)
loss = compute_loss(model, X_train, u_train )
grad_theta = tape.gradient(loss, model.trainable_variables)
optim.apply_gradients(zip(grad_theta, model.trainable_variables))
return loss,validation_loss
model_G = init_model(num_hidden_layers=2, num_neurons_per_layer=64)
optim_G = tf.keras.optimizers.Adam(learning_rate=lr)
model_G.summary()
hist = {'val_loss':[],'loss':[]}
for i in range(Nepochs+1):
loss, val_loss = training(model_G,optim_G,X_train,u_train,X_test,u_test)
hist['loss'].append(loss.numpy())
hist['val_loss'].append(val_loss.numpy())
if val_loss is not None:
print('It {:05d}: loss = {:10.8e}, validation loss = {:10.8e} '.format(i,loss,val_loss))
else:
print('It {:05d}: loss = {:10.8e}'.format(i,loss))
Why do the two versions provide different results?
Thanks for the help.
Cesare
Finally, I found that expanding the dimension of the targets as follows:
u_train = tf.expand_dims(u_train,axis=-1)
u_test = tf.expand_dims(u_test,axis=-1)
the model training properly and the loss functions are correctly evaluated.
u_train and u_test previously had shapes equal to the number of entries N only; by expanding the dimension, the shape now is (N,1).
using fit the code works with both; when explicitly using the gradient, only with targets of shape (N,1).

Custom Dynamic Loss function: No gradients provided for any variable

I am using an RGB dataset for my x train and the loss is calculated in a dynamic loss function that gets the distances of pairs and compares them against the ideal distance dist_train. Here is the model:
class MyModel(Model):
def __init__(self):
super(MyModel, self).__init__()
self.d1 = Dense(3, activation='relu')
self.flatten = Flatten()
self.d2 = Dense(3, activation='relu')
self.d3 = Dense(2)
def call(self, x):
x = self.d1(x)
x = self.flatten(x)
x = self.d2(x)
return self.d3(x)
# Create an instance of the model
model = MyModel()
optimizer = tf.keras.optimizers.Adam()
train_loss = tf.keras.metrics.Mean(name='train_loss')
test_loss = tf.keras.metrics.Mean(name='test_loss')
#tf.function
def train_step(rgb):
with tf.GradientTape() as tape:
predictions = model(rgb, training=True)
loss = tf_function(predictions)
gradients = tape.gradient(loss, model.trainable_variables)
optimizer.apply_gradients(zip(gradients, model.trainable_variables))
train_loss(loss)
Here is the loss function and the tf.function wrapping it:
def mahal_loss(output):
mahal = sp.spatial.distance.pdist(output, metric='mahalanobis')
mahal = sp.spatial.distance.squareform(mahal, force='no', checks=True)
new_distance = []
mahal = np.ma.masked_array(mahal, mask=mahal==0)
for i in range(len(mahal)):
pw_dist = mahal[i, indices_train[i]]
new_distance.append(pw_dist)
mahal_loss = np.mean((dist_train - new_distance)**2)
return mahal_loss
#tf.function(input_signature=[tf.TensorSpec(None, tf.float32)])
def tf_function(pred):
y = tf.numpy_function(mahal_loss, [pred], tf.float32)
return y
Running the model:
for epoch in range(EPOCHS):
train_loss.reset_states()
test_loss.reset_states()
for i in x_train:
train_step(i)
print(
f'Epoch {epoch + 1}, '
f'Loss: {train_loss.result()}, '
f'Test Loss: {test_loss.result()}, '
)
I believe the reason I am running into problems lies in the dynamic loss function, as I need to calculate the distance between certain pairs to get the results I expect. This means that inside the loss function I have to calculate the mahalanobis distance of each pair to get the ones I will compare against the correct distances. The error I get is the following:
<ipython-input-23-0e975da5cbc2>:15 train_step *
optimizer.apply_gradients(zip(gradients, model.trainable_variables))
C:\Anaconda3\envs\colour_env\lib\site-packages\keras\optimizer_v2\optimizer_v2.py:622 apply_gradients **
grads_and_vars = optimizer_utils.filter_empty_gradients(grads_and_vars)
C:\Anaconda3\envs\colour_env\lib\site-packages\keras\optimizer_v2\utils.py:72 filter_empty_gradients
raise ValueError("No gradients provided for any variable: %s." %
ValueError: No gradients provided for any variable: ['my_model/dense/kernel:0', 'my_model/dense/bias:0', 'my_model/dense_1/kernel:0', 'my_model/dense_1/bias:0', 'my_model/dense_2/kernel:0', 'my_model/dense_2/bias:0'].```
The problem is the use of tf.numpy_function.
Specifically, everything that happens inside the with tf.GradientTape() as tape statement has to be differentiable. Because the conversion between tf.Tensor and numpy array is not differentiable, tf.numpy_function cannot be used for loss computation:
Since the function takes numpy arrays, you cannot take gradients through a numpy_function. If you require something that is differentiable, please consider using tf.py_function.
(Source: here in the official documentation)
So either wrap the loss comutation in tf.py_function as this accepts tf.Tensors or consider implementing it in tensorflow. Here is an example for that.

Tensorflow training - print multiple losses for one output

I would like to print all the different losses I have for one output separately.
At the moment it looks like:
1/1 [==============================] - 1s 1s/sample - loss: 4.2632
The goal is to have a history like:
1/1 [==============================] - 1s 1s/sample - loss1: 2.1, loss2: 2.1632
I have one output layer out1 and two loss functions loss1 and loss2.
def loss1(y_true, y_pred):
...
return ...
def loss2(y_true, y_pred):
...
return ...
When I do
model.compile(...)
I can either choose to have a single loss function,
model.compile(loss=lambda x: loss1(x) + loss2(x))
or defining a loss for each output in a dictionary
model.compile(loss={'out1': loss1(x), 'out2': loss2(x)})
Since I have only one output, this isn't an option for me.
Does anyone know how to print the losses separately when having only one output?
Just use the metrics argument:
model.compile(optimizer='adam', loss='mae', metrics=['mse'])
You will still need to choose one loss to minimize.
One workaround is to artificially create the same two outputs, and then combine them with weights equal 1. For the sake of concreteness, I wrote the example:
from tensorflow.keras import Model
from tensorflow.keras.layers import Input, Dense, Lambda
from tensorflow.keras.losses import mse, mae
import numpy as np
if __name__ == '__main__':
train_x = np.random.rand(10000, 200)
train_y = np.random.rand(10000, 1)
x_input = Input(shape=(200))
x = Dense(64)(x_input)
x = Dense(64)(x)
x = Dense(1)(x)
x1 = Lambda(lambda x: x, name='out1')(x)
x2 = Lambda(lambda x: x, name='out2')(x)
model = Model(inputs=x_input, outputs=[x1, x2])
model.compile(optimizer='adam', loss={'out1': mse, 'out2': mae}, loss_weights={'out1': 1, 'out2': 1})
model.fit(train_x, train_y, epochs=10)

Knowledge Distillation loss with Tensorflow 2 + Keras

I am trying to implement a very simple keras model that uses Knowledge Distillation [1] from another model.
Roughly, I need to replace the original loss L(y_true, y_pred) by L(y_true, y_pred)+L(y_teacher_pred, y_pred) where y_teacher_pred is the prediction of another model.
I've tried to do
def create_student_model_with_distillation(teacher_model):
inp = tf.keras.layers.Input(shape=(21,))
model = tf.keras.models.Sequential()
model.add(inp)
model.add(...)
model.add(tf.keras.layers.Dense(units=1))
teacher_pred = teacher_model(inp)
def my_loss(y_true,y_pred):
loss = tf.keras.losses.mean_squared_error(y_true, y_pred)
loss += tf.keras.losses.mean_squared_error(teacher_pred, y_pred)
return loss
model.compile(loss=my_loss, optimizer='adam')
return model
However, when I try to call fit on my model, I am getting
TypeError: An op outside of the function building code is being passed
a "Graph" tensor. It is possible to have Graph tensors
leak out of the function building context by including a
tf.init_scope in your function building code.
How can I solve this issue ?
Refs
[1] https://arxiv.org/abs/1503.02531
Actually, this blogpost is answer to your question: keras blog
But in short - you should use new TF2 API and call teacher's predict before the tf.GradientTape() block:
def train_step(self, data):
# Unpack data
x, y = data
# Forward pass of teacher
teacher_predictions = self.teacher(x, training=False)
with tf.GradientTape() as tape:
# Forward pass of student
student_predictions = self.student(x, training=True)
# Compute losses
student_loss = self.student_loss_fn(y, student_predictions)
distillation_loss = self.distillation_loss_fn(
tf.nn.softmax(teacher_predictions / self.temperature, axis=1),
tf.nn.softmax(student_predictions / self.temperature, axis=1),
)
loss = self.alpha * student_loss + (1 - self.alpha) * distillation_loss

Keras/Tensorflow: Combined Loss function for single output

I have only one output for my model, but I would like to combine two different loss functions:
def get_model():
# create the model here
model = Model(inputs=image, outputs=output)
alpha = 0.2
model.compile(loss=[mse, gse],
loss_weights=[1-alpha, alpha]
, ...)
but it complains that I need to have two outputs because I defined two losses:
ValueError: When passing a list as loss, it should have one entry per model outputs.
The model has 1 outputs, but you passed loss=[<function mse at 0x0000024D7E1FB378>, <function gse at 0x0000024D7E1FB510>]
Can I possibly write my final loss function without having to create another loss function (because that would restrict me from changing the alpha outside the loss function)?
How do I do something like (1-alpha)*mse + alpha*gse?
Update:
Both my loss functions are equivalent to the function signature of any builtin keras loss function, takes in y_true and y_pred and gives a tensor back for loss (which can be reduced to a scalar using K.mean()), but I believe, how these loss functions are defined shouldn't affect the answer as long as they return valid losses.
def gse(y_true, y_pred):
# some tensor operation on y_pred and y_true
return K.mean(K.square(y_pred - y_true), axis=-1)
Specify a custom function for the loss:
model = Model(inputs=image, outputs=output)
alpha = 0.2
model.compile(
loss=lambda y_true, y_pred: (1 - alpha) * mse(y_true, y_pred) + alpha * gse(y_true, y_pred),
...)
Or if you don't want an ugly lambda make it into an actual function:
def my_loss(y_true, y_pred):
return (1 - alpha) * mse(y_true, y_pred) + alpha * gse(y_true, y_pred)
model = Model(inputs=image, outputs=output)
alpha = 0.2
model.compile(loss=my_loss, ...)
EDIT:
If your alpha is not some global constant, you can have a "loss function factory":
def make_my_loss(alpha):
def my_loss(y_true, y_pred):
return (1 - alpha) * mse(y_true, y_pred) + alpha * gse(y_true, y_pred)
return my_loss
model = Model(inputs=image, outputs=output)
alpha = 0.2
my_loss = make_my_loss(alpha)
model.compile(loss=my_loss, ...)
Yes, define your own custom loss function and pass that to the loss argument upon compiling:
def custom_loss(y_true, y_pred):
return (1-alpha) * K.mean(K.square(y_true-y_pred)) + alpha * gse
(Not sure what you mean with gse). It can be helpful to have a look at how the vanilla losses are implemented in Keras: https://github.com/keras-team/keras/blob/master/keras/losses.py
loss function should be one function.You are giving your model a list of two functions
try:
def mse(y_true, y_pred):
return K.mean(K.square(y_pred - y_true), axis=-1)
model.compile(loss= (mse(y_true, y_pred)*(1-alpha) + gse(y_true, y_pred)*alpha),
, ...)
Not that this answer particularly addresses the original question, I thought of writing it because the same error occurs when trying to load a keras model that has a custom loss using keras.models.load_model, and it's not been properly answered anywhere. Specifically, following the VAE example code in keras github repository, this error occurs when loading the VAE model after been saved with model.save.
The solution is to save only the weights using vae.save_weights('file.h5') instead of saving the full model. However, you would have to build and compile the model again before loading the weights using vae.load_weights('file.h5').
Following is an example implementation.
class VAE():
def build_model(self): # latent_dim and intermediate_dim can be passed as arguments
def sampling(args):
"""Reparameterization trick by sampling from an isotropic unit Gaussian.
# Arguments
args (tensor): mean and log of variance of Q(z|X)
# Returns
z (tensor): sampled latent vector
"""
z_mean, z_log_var = args
batch = K.shape(z_mean)[0]
dim = K.int_shape(z_mean)[1]
# by default, random_normal has mean = 0 and std = 1.0
epsilon = K.random_normal(shape=(batch, dim))
return z_mean + K.exp(0.5 * z_log_var) * epsilon
# original_dim = self.no_features
# intermediate_dim = 256
latent_dim = 8
inputs = Input(shape=(self.no_features,))
x = Dense(256, activation='relu')(inputs)
x = Dense(128, activation='relu')(x)
x = Dense(64, activation='relu')(x)
z_mean = Dense(latent_dim, name='z_mean')(x)
z_log_var = Dense(latent_dim, name='z_log_var')(x)
# use reparameterization trick to push the sampling out as input
# note that "output_shape" isn't necessary with the TensorFlow backend
z = Lambda(sampling, output_shape=(latent_dim,), name='z')([z_mean, z_log_var])
# instantiate encoder model
encoder = Model(inputs, [z_mean, z_log_var, z], name='encoder')
# build decoder model
latent_inputs = Input(shape=(latent_dim,), name='z_sampling')
x = Dense(32, activation='relu')(latent_inputs)
x = Dense(48, activation='relu')(x)
x = Dense(64, activation='relu')(x)
outputs = Dense(self.no_features, activation='linear')(x)
# instantiate decoder model
decoder = Model(latent_inputs, outputs, name='decoder')
# instantiate VAE model
outputs = decoder(encoder(inputs)[2])
VAE = Model(inputs, outputs, name='vae_mlp')
reconstruction_loss = mse(inputs, outputs)
reconstruction_loss *= self.no_features
kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var)
kl_loss = K.sum(kl_loss, axis=-1)
kl_loss *= -0.5
vae_loss = K.mean(reconstruction_loss + kl_loss)
VAE.add_loss(vae_loss)
VAE.compile(optimizer='adam')
return VAE
Now,
vae_cls = VAE()
vae = vae_cls.build_model()
# vae.fit()
vae.save_weights('file.h5')
Load model and predict (if in a different script, you need to import the VAE class),
vae_cls = VAE()
vae = vae_cls.build_model()
vae.load_weights('file.h5')
# vae.predict()
Finally, The Difference: [ref]
Keras model.save saves,
Model weights
Model architecture
Model compilation details (loss function(s) and metrics)
Model optimizer and regularizer states
Keras model.save_weights saves only the model weights. Keras model.to_json() saves the model architecture.
Hope this helps someone experimenting with variational autoencoders.
Combine MAE and RMSE together:
import tensorflow as tf
from tensorflow import keras
def loss_fn_mae_rmse(y_true, y_pred, alpha=0.8):
mae = keras.losses.MeanAbsoluteError()
mse = keras.losses.MeanSquaredError()
return alpha * mae(y_true, y_pred) + (1 - alpha) * tf.sqrt(mse(y_true, y_pred))
model = keras.Model(inputs=..., outputs=...)
opt = keras.optimizers.Adam(learning_rate=1e-4)
model.compile(optimizer=opt, loss=loss_fn_mae_rmse, metrics=['mae'])
At the same time, if you want to load this model after training and saved to disk:
model = keras.models.load_model('path/to/model.h5', custom_objects={'loss_fn_mae_rmse': loss_fn_mae_rmse})

Categories