Access layer attribute in custom loss function in Keras - python

I want to write a custom loss function in Keras which depends on an attribute of a (custom) layer in the network.
The idea is the following:
I have a custom layer which modifies the input in each epoch based on a random variable
The output labels should be modified based on the same variable
Some example code to make it more clear:
import numpy as np
from keras import losses, layers, models
class MyLayer(layers.Layer):
def call(self, x):
a = np.random.rand()
self.a = a # <-- does this work as expected?
return x+a
def my_loss(layer):
def modified_loss(y_true, y_pred):
a = layer.a
y_true = y_true + a
return losses.mse(y_true, y_pred)
input_layer = layers.Input()
my_layer = MyLayer(input_layer, name="my_layer")
output_layer = layers.Dense(4)(my_layer)
model = models.Model(inputs=input_layer, outputs=output_layer)
model.compile('adam', my_loss(model.get_layer("my_layer")))
I expect that a is changing for every batch and that the same a is used in the layer and loss function.
Right now, it is not working the way I intended. It seems like the a in the loss function is never updated (and maybe not even in the layer).
How do I change the attribute/value of a in the layer at every call and access it in the loss function?

Not quite sure I am following the purpose on this (and I am bothered by the call to np inside the call() of your custom layer - could you not use the tf.random functions instead?) but you can certainly access the a property inside your loss function.
Perhaps something like:
class MyLayer(layers.Layer):
def call(self, x):
a = np.random.rand() # FIXME --> use tf.random
self.a = a
return x+a
input_layer = layers.Input()
my_layer = MyLayer(input_layer, name="my_layer")
output_layer = layers.Dense(4)(my_layer)
model = models.Model(inputs=input_layer, outputs=output_layer)
def my_loss(y_true, y_pred):
y_true = y_true + my_layer.a
return losses.mse(y_true, y_pred)
model.compile('adam', loss=my_loss)

Related

Keras - Adding loss to intermediate layer while ignoring the last layer

I've created the following Keras custom model:
import tensorflow as tf
from tensorflow.keras.layers import Layer
class MyModel(tf.keras.Model):
def __init__(self, num_classes):
super(MyModel, self).__init__()
self.dense_layer = tf.keras.layers.Dense(num_classes,activation='softmax')
self.lambda_layer = tf.keras.layers.Lambda(lambda x: tf.math.argmax(x, axis=-1))
def call(self, inputs):
x = self.dense_layer(inputs)
x = self.lambda_layer(x)
return x
# A convenient way to get model summary
# and plot in subclassed api
def build_graph(self, raw_shape):
x = tf.keras.layers.Input(shape=(raw_shape))
return tf.keras.Model(inputs=[x],
outputs=self.call(x))
The task is multi-class classification.
Model consists of a dense layer with softmax activation and a lambda layer as a post-processing unit that converts the dense output vector to a single value (predicted class).
The train targets are a one-hot encoded matrix like so:
[
[0,0,0,0,1]
[0,0,1,0,0]
[0,0,0,1,0]
[0,0,0,0,1]
]
It would be nice if I could define a categorical_crossentropy loss over the dense layer and ignore the lambda layer while still maintaining the functionality and outputting a single value when I call model.predict(x).
Please note
My workspace environment doesn't allow me to use a custom training loop as suggested by #alonetogether excellent answer.
You can try using a custom training loop, which is pretty straightforward IMO:
import tensorflow as tf
from tensorflow.keras.layers import Layer
class MyModel(tf.keras.Model):
def __init__(self, num_classes):
super(MyModel, self).__init__()
self.dense_layer = tf.keras.layers.Dense(num_classes,activation='softmax')
self.lambda_layer = tf.keras.layers.Lambda(lambda x: tf.math.argmax(x, axis=-1))
def call(self, inputs):
x = self.dense_layer(inputs)
x = self.lambda_layer(x)
return x
# A convenient way to get model summary
# and plot in subclassed api
def build_graph(self, raw_shape):
x = tf.keras.layers.Input(shape=(raw_shape))
return tf.keras.Model(inputs=[x],
outputs=self.call(x))
n_classes = 5
model = MyModel(n_classes)
labels = tf.keras.utils.to_categorical(tf.random.uniform((50, 1), maxval=5, dtype=tf.int32))
train_dataset = tf.data.Dataset.from_tensor_slices((tf.random.normal((50, 1)), labels)).batch(2)
optimizer = tf.keras.optimizers.Adam()
loss_fn = tf.keras.losses.CategoricalCrossentropy()
epochs = 2
for epoch in range(epochs):
print("\nStart of epoch %d" % (epoch,))
for step, (x_batch_train, y_batch_train) in enumerate(train_dataset):
with tf.GradientTape() as tape:
logits = model.layers[0](x_batch_train)
loss_value = loss_fn(y_batch_train, logits)
grads = tape.gradient(loss_value, model.trainable_weights)
optimizer.apply_gradients(zip(grads, model.trainable_weights))
And prediction:
print(model.predict(tf.random.normal((1, 1))))
[3]
I think there is a Model.predict_classes function that would replace the need for that lambda layer. But if it doesn't work:
There doesn't seem to be a way to do that without using one of these hacks:
Two inputs (one is the groud truth values Y)
Two outputs
Two models
I'm quite convinced there is no other workaround for this.
So, I believe the "two models" version is the best for your case where you seem to "need" a model with single input, single output and fit.
Then I'd do this:
inputs = tf.keras.layers.Input(input_shape_without_batch_size)
loss_outputs = tf.keras.layers.Dense(num_classes,activation='softmax')(inputs)
final_outputs = tf.keras.layers.Lambda(lambda x: tf.math.argmax(x, axis=-1))(loss_outputs)
training_model = tf.keras.models.Model(inputs, loss_outputs)
final_model = tf.keras.models.Model(inputs, final_outputs)
training_model.compile(.....)
training_model.fit(....)
results = final_model.predict(...)

How to add trainable parameters into Tensorflow2 Keras model loss function

I am trying to train an image denoiser network with Keras (Tensorflow2). For the loss function, I want to use something like (a1 * L1_loss + a2 * L2_loss), where a1 and a2 is trainable, which means after I gave them initial valuse, they can get updated each training iteration. But I am stuck here for a while and do know how should I implement this.
Here is some example code,
model_input = Input(shape=self.input_shape)
l1_weight = tf.Variable(0.5, trainable=True, name='L1_Loss_weight')
l2_weight = tf.Variable(0.5, trainable=True, name='L2_Loss_weight')
model_output= Conv3D(filters=self.filters, kernel_size=self.kernel_size, padding='same')(model_input)
self.model = Model(inputs=model_input,
outputs=model_output)
optimizer = tf.keras.optimizers.SGD()
model_loss = mixed_loss(L1_weight=l1_weight, L2_weight=l2_weight)
self.model.compile(optimizer=optimizer,
loss=model_loss)
where my loss function is defined as
def mixed_loss(L1_weight, L2_weight):
def mixed(y_true, y_pred):
return L1_weight * mean_absolute_error(y_true, y_pred) + L2_weight * mean_squared_error(y_true, y_pred)
return mixed
And then I use fit() function to pass the tf.data.Dataset, which including the training data, to do the training.
Although I can add two weights parameters this way, these weights are un-trainable and they wouldn't change as training. Really hope to get some hints or examples if anyone has some ideas about this problem. Any help is appreciated!

Knowledge Distillation loss with Tensorflow 2 + Keras

I am trying to implement a very simple keras model that uses Knowledge Distillation [1] from another model.
Roughly, I need to replace the original loss L(y_true, y_pred) by L(y_true, y_pred)+L(y_teacher_pred, y_pred) where y_teacher_pred is the prediction of another model.
I've tried to do
def create_student_model_with_distillation(teacher_model):
inp = tf.keras.layers.Input(shape=(21,))
model = tf.keras.models.Sequential()
model.add(inp)
model.add(...)
model.add(tf.keras.layers.Dense(units=1))
teacher_pred = teacher_model(inp)
def my_loss(y_true,y_pred):
loss = tf.keras.losses.mean_squared_error(y_true, y_pred)
loss += tf.keras.losses.mean_squared_error(teacher_pred, y_pred)
return loss
model.compile(loss=my_loss, optimizer='adam')
return model
However, when I try to call fit on my model, I am getting
TypeError: An op outside of the function building code is being passed
a "Graph" tensor. It is possible to have Graph tensors
leak out of the function building context by including a
tf.init_scope in your function building code.
How can I solve this issue ?
Refs
[1] https://arxiv.org/abs/1503.02531
Actually, this blogpost is answer to your question: keras blog
But in short - you should use new TF2 API and call teacher's predict before the tf.GradientTape() block:
def train_step(self, data):
# Unpack data
x, y = data
# Forward pass of teacher
teacher_predictions = self.teacher(x, training=False)
with tf.GradientTape() as tape:
# Forward pass of student
student_predictions = self.student(x, training=True)
# Compute losses
student_loss = self.student_loss_fn(y, student_predictions)
distillation_loss = self.distillation_loss_fn(
tf.nn.softmax(teacher_predictions / self.temperature, axis=1),
tf.nn.softmax(student_predictions / self.temperature, axis=1),
)
loss = self.alpha * student_loss + (1 - self.alpha) * distillation_loss

Custom loss function on Keras

I have a dataset containing a matrix of features X and a matrix of labels y of size N where each element y_i belongs to [0,1]. I have the following loss function
where g(.) is a function that depends on the input matrix X.
I know that Keras custom loss function has to be of the form customLoss(y_true,y_predicted), however, I'm having difficulties incorporating the term g(X) in the loss function since this depends on the input matrix.
For each data point in my dataset, my input is of the form X_i = (H, P) where these two parameters are matrices and the function g is defined for each data point as g(X_i) = H x P. Can I pass a = (H, P) in the loss function since this depends on each example or do I need to pass all the matrices at once by concatenating them?
Edit (based on Daniel's answer):
original_model_inputs = keras.layers.Input(shape=X_train.shape[1])
y_true_inputs = keras.layers.Input(shape=y_train.shape[1])
hidden1 = keras.layers.Dense(256, activation="relu")(original_model_inputs)
hidden2 = keras.layers.Dense(128, activation="relu")(hidden1)
output = keras.layers.Dense(K)(hidden2)
def lambdaLoss(x):
yTrue, yPred, alpha = x
return (K.log(yTrue) - K.log(yPred))**2+alpha*yPred
loss = Lambda(lambdaLoss)(y_true_inputs, output, a)
model = Keras.Model(inputs=[original_model_inputs, y_true_inputs], outputs=[output], loss)
def dummyLoss(true, pred):
return pred
model.compile(loss = dummyLoss, optimizer=Adam())
train_model = model.fit([X_train, y_train], None, batch_size = 32,
epochs = 50,
validation_data = ([X_valid, y_valid], None),
callbacks=callbacks)
Fixing the understanding of my answer:
original_model_inputs = keras.layers.Input(shape=X_train.shape[1:]) #must be a tuple, not an int
y_true_inputs = keras.layers.Input(shape=y_train.shape[1:]) #must be a tuple, not an int
hidden1 = keras.layers.Dense(256, activation="relu")(original_model_inputs)
hidden2 = keras.layers.Dense(128, activation="relu")(hidden1)
output = keras.layers.Dense(K)(hidden2)
You need something to do g(X), I have no idea of what it is, but you need to do it somewhere.
And yes, you need to pass the whole tensor at once, you cannot make x_i and everything else.
def g(x):
return something
gResults = Lambda(g)(original_model_inputs)
Continuing my answer:
def lambdaLoss(x):
yTrue, yPred, G = x
.... #wait.... where is Y_true in your loss formula?
loss = Lambda(lambdaLoss)([y_true_inputs, output, gResults]) #must be a list of inputs including G
You need a model for training and another to get the outputs, because we're doing a frankenstein model because of the different loss.
training_model = keras.Model(inputs=[original_model_inputs, y_true_inputs], outputs=loss)
prediction_model = keras.Model(original_model_inputs, output)
Only the training model must be compiled:
def dummyLoss(true, pred):
return pred
training_model.compile(loss = dummyLoss, optimizer=Adam())
training_model = model.fit([X_train, y_train], None, batch_size = 32,
epochs = 50,
validation_data = ([X_valid, y_valid], None),
callbacks=callbacks)
Use the other model to get result data:
results = prediction_model.predict(some_x)
Looks like a GAN of some sort. I will refer to (x) as "x_input", Two methods:
Method 1) Inherit from tf.keras.model class and write your own (not recommended, not shown)
Method 2) Inherit from tf.keras.losses.Loss class. and return tuple of (custom) tf.keras.losses.Loss instance and tf.keras.layers.Layer that does nothing more than act as shell to grab and save a copy of the x_input (x). This layer instance can then be added as the top layer in model. The (custom) tf.keraslosses. Loss instance can then access the input on demand. This method also has best future support throughout the life of Tensorflow.
First, create a custom layer and custom loss class:
class Acrylic_Layer(tf.keras.layers.Layer):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.x_input = None
def build(self, *args, **kwargs):
pass
def call(self, input):
self.x_input = input
return input # Pass input directly through to next layer
class Custom_Loss(tf.keras.losses.Loss):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.input_thief = Acrylic_Layer() # <<< Magic, python is pass by reference!
def __call__(self, y_true, y_pred, sample_weight=None):
x_input = self.input_thief.x_input # <<< x_input pulled from model
Second, add layer and loss function to model
loss_fn = Custom_Loss(*args, **kwargs)
input_thief = loss_fn.input_thief
model = tf.keras.models.Sequential([
input_thief, # <<< transparent layer
Other_layers,
])
model.fit(loss=loss_fn) # <<< loss function
Lastly, I'm the market looking for a ML/python role, giving a shout out.

Keras with activity_regularizer that is updated every iteration

I am building a simple neural network using Keras. It has activity regularization so that the output of the only hidden layer is forced to have small values. Here is the code:
import numpy as np
import math
import keras
from keras.models import Model, Sequential
from keras.layers import Input, Dense, Activation
from keras import regularizers
from keras import backend as K
a=1
def my_regularizer(inputs):
means=K.mean((inputs),axis=1)
return a*K.sum(means)**2
x_train=np.random.uniform(low=-1,high=1,size=(200,2))
model=Sequential([
Dense(20,input_shape=(2,),activity_regularizer=my_regularizer),
Activation('tanh'),
Dense(2,),
Activation('linear')
])
model.compile(optimizer='adam',loss='mean_squared_error')
model.fit(x_train,x_train,epochs=20,validation_split=0.1)
Questions:
1) Currently, parameter a is set at the beginning and it does not change. How can I change the code such that the parameter a is updated after each iteration such that
a_new=f(a_old,input)
where input is the values at the hidden layer and f(.) is an arbitrary function.
2) I want my activity regularizer to be applied after the first activation function tanh is applied. Have I written my code correctly? The term "activity_regularizer=my_regularizer" in
Dense(20,input_sahpe=(2,),activity_regularizer=my_regularizer)
makes me feel that the regularizer is being applied to values before the activation function tanh.
You can - but first, you need a valid Keras Regularizer object (your function won't work):
class MyActivityRegularizer(Regularizer):
def __init__(self, a=1):
self.a = K.variable(a, name='a')
# gets called at each train iteration
def __call__(self, x): # your custom function here
means = K.mean(x, axis=1)
return self.a * K.sum(means)**2
def get_config(self): # required class method
return {"a": float(K.get_value(self.a))}
Next, to work with .fit, you need a custom Keras Callback object (see alternative at bottom):
class ActivityRegularizerScheduler(Callback):
""" 'on_batch_end' gets automatically called by .fit when finishing
iterating over a batch. The model, and its attributes, are inherited by
'Callback' (except at __init__) and can be accessed via, e.g., self.model """
def __init__(self, model, update_fn):
self.update_fn=update_fn
self.activity_regularizers=_get_activity_regularizers(model)
def on_batch_end(self, batch, logs=None):
iteration = K.get_value(self.model.optimizer.iterations)
new_activity_reg = self.update_fn(iteration)
# 'activity_regularizer' references model layer's activity_regularizer (in this
# case 'MyActivityRegularizer'), so its attributes ('a') can be set directly
for activity_regularizer in self.activity_regularizers:
K.set_value(activity_regularizer.a, new_activity_reg)
def _get_activity_regularizers(model):
activity_regularizers = []
for layer in model.layers:
a_reg = getattr(layer,'activity_regularizer',None)
if a_reg is not None:
activity_regularizers.append(a_reg)
return activity_regularizers
Lastly, you'll need to create your model within the Keras CustomObjectScope - see in full ex. below.
Example usage:
from keras.layers import Dense
from keras.models import Sequential
from keras.regularizers import Regularizer
from keras.callbacks import Callback
from keras.utils import CustomObjectScope
from keras.optimizers import Adam
import keras.backend as K
import numpy as np
def make_model(my_reg):
return Sequential([
Dense(20, activation='tanh', input_shape=(2,), activity_regularizer=my_reg),
Dense(2, activation='linear'),
])
my_reg = MyActivityRegularizer(a=1)
with CustomObjectScope({'MyActivityRegularizer':my_reg}): # required for Keras to recognize
model = make_model(my_reg)
opt = Adam(lr=1e-4)
model.compile(optimizer=opt, loss='mse')
x = np.random.randn(320,2) # dummy data
y = np.random.randn(320,2) # dummy labels
update_fn = lambda x: .5 + .4*np.cos(x) #x = number of train updates (optimizer.iterations)
activity_regularizer_scheduler = ActivityRegularizerScheduler(model, update_fn)
model.fit(x,y,batch_size=32,callbacks=[activity_regularizer_scheduler],
epochs=4,verbose=1)
To TRACK your a and make sure it's changing, you can get its value at, e.g., each epoch end via:
for epoch in range(4):
model.fit(x,y,batch_size=32,callbacks=[activity_regularizer_scheduler],epochs=1)
print("Epoch {} activity_regularizer 'a': {}".format(epoch,
K.get_value(_get_activity_regularizers(model)[0].a)))
# My output:
# Epoch 0 activity_regularizer 'a': 0.7190816402435303
# Epoch 1 activity_regularizer 'a': 0.4982417821884155
# Epoch 2 activity_regularizer 'a': 0.2838689386844635
# Epoch 3 activity_regularizer 'a': 0.8644570708274841
Regarding (2), I'm afraid you're right - the 'tanh' outputs won't be used; you'll need to pass activation='tanh' instead.
Lastly, you can do it without a callback, via train_on_batch - but a drawback is, you'll need to feed data to the model yourself (and shuffle it, etc):
activity_regularizers = _get_activity_regularizers(model)
for iteration in range(100):
x, y = get_data()
model.train_on_batch(x,y)
iteration = K.get_value(model.optimizer.iterations)
for activity_regularizer in activity_regularizers:
K.set_value(activity_regularizer, update_fn(iteration))

Categories