I am trying to use a custom loss-function which depends on some arguments that the model does not have.
The model has two inputs (mel_specs and pred_inp) and expects a labels tensor for training:
def to_keras_example(example):
# Preparing inputs
return (mel_specs, pred_inp), labels
# Is a tf.train.Dataset for model.fit(train_data, ...)
train_data = load_dataset(fp, 'train).map(to_keras_example).repeat()
In my loss function I need to calculate the lengths of mel_specs and pred_inp. This means my loss looks like this:
def rnnt_loss_wrapper(y_true, y_pred, mel_specs_inputs_):
input_lengths = get_padded_length(mel_specs_inputs_[:, :, 0])
label_lengths = get_padded_length(y_true)
return rnnt_loss(
acts=y_pred,
labels=tf.cast(y_true, dtype=tf.int32),
input_lengths=input_lengths,
label_lengths=label_lengths
)
However, no matter which approach I choose, I am facing some issue.
Option 1) Setting the loss-function in model.compile()
If I actually wrap the loss function s.t. it returns a function which takes y_true and y_pred like this:
def rnnt_loss_wrapper(mel_specs_inputs_):
def inner_(y_true, y_pred):
input_lengths = get_padded_length(mel_specs_inputs_[:, :, 0])
label_lengths = get_padded_length(y_true)
return rnnt_loss(
acts=y_pred,
labels=tf.cast(y_true, dtype=tf.int32),
input_lengths=input_lengths,
label_lengths=label_lengths
)
return inner_
model = create_model(hparams)
model.compile(
optimizer=optimizer,
loss=rnnt_loss_wrapper(model.inputs[0]
)
Here I get a _SymbolicException after calling model.fit():
tensorflow.python.eager.core._SymbolicException: Inputs to eager execution function cannot be Keras symbolic tensors, but found [...]
Option 2) Using model.add_loss()
The documentation of add_loss() states:
[Adds a..] loss tensor(s), potentially dependent on layer inputs.
..
Arguments:
losses: Loss tensor, or list/tuple of tensors. Rather than tensors, losses
may also be zero-argument callables which create a loss tensor.
inputs: Ignored when executing eagerly. If anything ...
So I tried to do the following:
def rnnt_loss_wrapper(y_true, y_pred, mel_specs_inputs_):
input_lengths = get_padded_length(mel_specs_inputs_[:, :, 0])
label_lengths = get_padded_length(y_true)
return rnnt_loss(
acts=y_pred,
labels=tf.cast(y_true, dtype=tf.int32),
input_lengths=input_lengths,
label_lengths=label_lengths
)
model = create_model(hparams)
model.add_loss(
rnnt_loss_wrapper(
y_true=model.inputs[2],
y_pred=model.outputs[0],
mel_specs_inputs_=model.inputs[0],
),
inputs=True
)
model.compile(
optimizer=optimizer
)
However, calling model.fit() throws a ValueError:
ValueError: No gradients provided for any variable: [...]
Is any of the above options supposed to work?
I have used the add_loss method as follow:
def custom_loss(y_true, y_pred, input_):
# custom loss function
y_estim = input_[...,0]*y_pred
shape = tf.cast(tf.shape(y_true)[1], dtype='float32')
return tf.reduce_mean(1/shape*tf.reduce_sum(tf.pow(y_true-y_estim, 2), axis=1))
mix_input = layers.Input(shape=(301, 257, 4)) # input 1
ref_input = layers.Input(shape=(301, 257, 1)) # input 2
target = layers.Input(shape=(301, 257)) # output target
smss_model = Model(inputs=[mix_input, ref_input], outputs=smss) # my model that accept two inputs
model = Model(inputs=[mix_input, ref_input, target], outputs=smss) # this one used just to train the model, with the additional paramters
model.add_loss(custom_loss(target, smss, mix_input)) # the add_loss where to pass the custom loss function
model.summary()
model.compile(loss=None, optimizer='sgd')
model.fit([mix, ref, y], epochs=1, batch_size=1, verbose=1)
even do I have used this method and works, I still looking for another method, that not involve creating a training model
Did using lambda function work? (https://www.w3schools.com/python/python_lambda.asp)
loss = lambda x1, x2: rnnt_loss(x1, x2, acts, labels, input_lengths,
label_lengths, blank_label=0)
In this way your loss function should be a function accepting parameters x1 and x2, but rnnt_loss can also be aware of acts, labels, input_lengths, label_lengths and blank_label
Related
I am trying to use Keras' functional API to implement gradient descent for dataset with discrete values. I can't use MSE loss function since argmin function is used to get discrete values and it is not differentiable. However, when trying to use keras.losses.SparseCategoricalCrossentropy function I am getting following error:
TypeError: Expected float32 passed to parameter 'y' of op 'Equal', got 'auto' of type 'str' instead. Error: Expected float32, got 'auto' of type 'str' instead.
here is the code (modified from example). I have used SparseCategoricalCrossEntropy function(example from SparseCategoricalCrossEntropy shows relevant cases.) since it can replace argmin function which is non-differentiable yet still compatible with gradient descent:
loss_tracker = keras.metrics.Mean(name="loss")
mae_metric = keras.metrics.MeanAbsoluteError(name="mae")
class CustomModel(keras.Model):
def train_step(self, data):
x, y = data
with tf.GradientTape() as tape:
y_pred = self(x, training=True) # Forward pass
# Compute our own loss
#metric = tf.math.argmin(y_pred, axis=1)
####loss = keras.losses.mean_squared_error(y, metric)
loss = tf.keras.losses.SparseCategoricalCrossentropy(y, y_pred)
# Compute gradients
trainable_vars = self.trainable_variables
gradients = tape.gradient(loss, trainable_vars)
# Update weights
self.optimizer.apply_gradients(zip(gradients, trainable_vars))
# Compute our own metrics
loss_tracker.update_state(loss)
mae_metric.update_state(y, y_pred)
return {"loss": loss_tracker.result(), "mae": mae_metric.result()}
#property
def metrics(self):
# We list our `Metric` objects here so that `reset_states()` can be
# called automatically at the start of each epoch
# or at the start of `evaluate()`.
# If you don't implement this property, you have to call
# `reset_states()` yourself at the time of your choosing.
return [loss_tracker, mae_metric]
# Construct an instance of CustomModel
inputs = keras.Input(shape=(32,))
outputs = keras.layers.Dense(3)(inputs)
model = CustomModel(inputs, outputs)
# We don't passs a loss or metrics here.
model.compile(
optimizer="adam",
loss=tf.keras.losses.SparseCategoricalCrossentropy(
from_logits=True,
#ignore_class=None
)
)
# Just use `fit` as usual -- you can use callbacks, etc.
x = np.random.random((1000, 32))
y = np.random.randint(3, size=(1000,1))
model.fit(x, y, epochs=5)
Please let me know how to implement gradient descent using dataset with discrete values.
I am using a multiple output model in tensorflow 2.0
input_layer = layers.Input(shape=(INP_MAX_LENGTH), name="input")
embed_layer = layers.Embedding(EMBED_INP_DIM, embedding_size, name="embeddings", weights=[embedding_matrix], trainable=False, input_length=INP_MAX_LENGTH)(input_layer)
lstm_layer1 = layers.LSTM(1024, name="lstm1")(embed_layer)
lstm_layer2 = layers.LSTM(1024, name="lstm2")(embed_layer)
output_layer1 = layers.Dense(1, name="output1", activation='relu')(lstm_layer1)
output_layer2 = layers.Dense(1, name="output2", activation='relu')(lstm_layer2)
concat_layer = layers.Concatenate()([output_layer1, output_layer2])
output_layer3 = layers.Dense(1, name="output3", activation='relu')(concat_layer)
model = Model(input=input_layer, output=[output_layer1, output_layer2, output_layer3])
model.compile((optimizer='adagrad', loss={'output1': loss1, 'output2': loss1, 'output3':loss2})
I'm using quantile loss function as my loss1 and it is working fine.
I want my loss2 function to behave something like this
import tensorflow.keras.backend as K
def loss2(y_true, y_pred):
# y_true1 = y from output1
# y_true2 = y from output2
# y_pred1 = y from output_layer1
# y_pred2 = y from output_layer2
# loss = K.mean(y_true - (K.sqrt(K.square(y_true2 - y_pred2) + K.square(y_true1 - y_pred1)))), axis=-1)
return loss
In short, I'm trying to implement the distance formula as my loss function and minimize the distance between two points to 0.
Can I pass y_true and y_pred from output1 and output2 to loss2 function? I tried using Concatenate to at least pass the y_preds but somehow its not working.
Yes, when you have multiple output nodes you can have loss functions that take in multiple predictions.
The basic idea is to always treat your output as a vector so when you pass it from your generator function it should be a list and when it is passed to the loss functions it should remain a list.
Rather than having different loss functions for each output node you should ideally have one loss function for all the outputs combined.
I have a dataset containing a matrix of features X and a matrix of labels y of size N where each element y_i belongs to [0,1]. I have the following loss function
where g(.) is a function that depends on the input matrix X.
I know that Keras custom loss function has to be of the form customLoss(y_true,y_predicted), however, I'm having difficulties incorporating the term g(X) in the loss function since this depends on the input matrix.
For each data point in my dataset, my input is of the form X_i = (H, P) where these two parameters are matrices and the function g is defined for each data point as g(X_i) = H x P. Can I pass a = (H, P) in the loss function since this depends on each example or do I need to pass all the matrices at once by concatenating them?
Edit (based on Daniel's answer):
original_model_inputs = keras.layers.Input(shape=X_train.shape[1])
y_true_inputs = keras.layers.Input(shape=y_train.shape[1])
hidden1 = keras.layers.Dense(256, activation="relu")(original_model_inputs)
hidden2 = keras.layers.Dense(128, activation="relu")(hidden1)
output = keras.layers.Dense(K)(hidden2)
def lambdaLoss(x):
yTrue, yPred, alpha = x
return (K.log(yTrue) - K.log(yPred))**2+alpha*yPred
loss = Lambda(lambdaLoss)(y_true_inputs, output, a)
model = Keras.Model(inputs=[original_model_inputs, y_true_inputs], outputs=[output], loss)
def dummyLoss(true, pred):
return pred
model.compile(loss = dummyLoss, optimizer=Adam())
train_model = model.fit([X_train, y_train], None, batch_size = 32,
epochs = 50,
validation_data = ([X_valid, y_valid], None),
callbacks=callbacks)
Fixing the understanding of my answer:
original_model_inputs = keras.layers.Input(shape=X_train.shape[1:]) #must be a tuple, not an int
y_true_inputs = keras.layers.Input(shape=y_train.shape[1:]) #must be a tuple, not an int
hidden1 = keras.layers.Dense(256, activation="relu")(original_model_inputs)
hidden2 = keras.layers.Dense(128, activation="relu")(hidden1)
output = keras.layers.Dense(K)(hidden2)
You need something to do g(X), I have no idea of what it is, but you need to do it somewhere.
And yes, you need to pass the whole tensor at once, you cannot make x_i and everything else.
def g(x):
return something
gResults = Lambda(g)(original_model_inputs)
Continuing my answer:
def lambdaLoss(x):
yTrue, yPred, G = x
.... #wait.... where is Y_true in your loss formula?
loss = Lambda(lambdaLoss)([y_true_inputs, output, gResults]) #must be a list of inputs including G
You need a model for training and another to get the outputs, because we're doing a frankenstein model because of the different loss.
training_model = keras.Model(inputs=[original_model_inputs, y_true_inputs], outputs=loss)
prediction_model = keras.Model(original_model_inputs, output)
Only the training model must be compiled:
def dummyLoss(true, pred):
return pred
training_model.compile(loss = dummyLoss, optimizer=Adam())
training_model = model.fit([X_train, y_train], None, batch_size = 32,
epochs = 50,
validation_data = ([X_valid, y_valid], None),
callbacks=callbacks)
Use the other model to get result data:
results = prediction_model.predict(some_x)
Looks like a GAN of some sort. I will refer to (x) as "x_input", Two methods:
Method 1) Inherit from tf.keras.model class and write your own (not recommended, not shown)
Method 2) Inherit from tf.keras.losses.Loss class. and return tuple of (custom) tf.keras.losses.Loss instance and tf.keras.layers.Layer that does nothing more than act as shell to grab and save a copy of the x_input (x). This layer instance can then be added as the top layer in model. The (custom) tf.keraslosses. Loss instance can then access the input on demand. This method also has best future support throughout the life of Tensorflow.
First, create a custom layer and custom loss class:
class Acrylic_Layer(tf.keras.layers.Layer):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.x_input = None
def build(self, *args, **kwargs):
pass
def call(self, input):
self.x_input = input
return input # Pass input directly through to next layer
class Custom_Loss(tf.keras.losses.Loss):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.input_thief = Acrylic_Layer() # <<< Magic, python is pass by reference!
def __call__(self, y_true, y_pred, sample_weight=None):
x_input = self.input_thief.x_input # <<< x_input pulled from model
Second, add layer and loss function to model
loss_fn = Custom_Loss(*args, **kwargs)
input_thief = loss_fn.input_thief
model = tf.keras.models.Sequential([
input_thief, # <<< transparent layer
Other_layers,
])
model.fit(loss=loss_fn) # <<< loss function
Lastly, I'm the market looking for a ML/python role, giving a shout out.
My problem is I don't want the weights to be adjusted if y_true takes certain values. I do not want to simply remove those examples from training data because of the nature of the RNN I am trying to use.
Is there a way to write a conditional loss function in Keras with this behavior?
For example: if y_true is negative then apply zero gradient so that parameters in the model do not change, if y_true is positive loss = losses.mean_squared_error(y_true, y_pred).
You can define a custom loss function and simply use K.switch to conditionally get zero loss:
from keras import backend as K
from keras import losses
def custom_loss(y_true, y_pred):
loss = losses.mean_squared_error(y_true, y_pred)
return K.switch(K.flatten(K.equal(y_true, 0.)), K.zeros_like(loss), loss)
Test:
from keras import models
from keras import layers
model = models.Sequential()
model.add(layers.Dense(1, input_shape=(1,)))
model.compile(loss=custom_loss, optimizer='adam')
weights, bias = model.layers[0].get_weights()
x = np.array([1, 2, 3])
y = np.array([0, 0, 0])
model.train_on_batch(x, y)
# check if the parameters has not changed after training on the batch
>>> (weights == model.layers[0].get_weights()[0]).all()
True
>>> (bias == model.layers[0].get_weights()[1]).all()
True
Since the y's are in batches, you need to select those from the batch which are non-zero in the custom loss function
def myloss(y_true, y_pred):
idx = tf.not_equal(y_true, 0)
y_true = tf.boolean_mask(y_true, idx)
y_pred = tf.boolean_mask(y_pred, idx)
return losses.mean_squared_error(y_true, y_pred)
Then it can be used as such:
model = keras.Sequential([Dense(32, input_shape=(2,)), Dense(1)])
model.compile('adam', loss=myloss)
x = np.random.randn(2, 2)
y = np.array([1, 0])
model.fit(x, y)
But you might need extra logic in the loss function in case all y_true in the batch were zero, in this case, the loss function can be modified as such:
def myloss2(y_true, y_pred):
idx = tf.not_equal(y_true, 0)
y_true = tf.boolean_mask(y_true, idx)
y_pred = tf.boolean_mask(y_pred, idx)
loss = tf.cond(tf.equal(tf.shape(y_pred)[0], 0), lambda: tf.constant(0, dtype=tf.float32), lambda: losses.mean_squared_error(y_true, y_pred))
return loss
In Keras (with Tensorflow backend), is the current input pattern available to my custom loss function?
The current input pattern is defined as the input vector used to produce the prediction. For example, consider the following: X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42, shuffle=False). Then the current input pattern is the current X_train vector associated with the y_train (which is termed y_true in the loss function).
When designing a custom loss function, I intend to optimize/minimize a value that requires access to the current input pattern, not just the current prediction.
I've taken a look through https://github.com/fchollet/keras/blob/master/keras/losses.py
I've also looked through "Cost function that isn't just y_pred, y_true?"
I am also familiar with previous examples to produce a customized loss function:
import keras.backend as K
def customLoss(y_true,y_pred):
return K.sum(K.log(y_true) - K.log(y_pred))
Presumably (y_true,y_pred) are defined elsewhere. I've taken a look through the source code without success and I'm wondering whether I need to define the current input pattern myself or whether this is already accessible to my loss function.
You can wrap the loss function as a inner function and pass your input tensor to it (as commonly done when passing additional arguments to the loss function).
def custom_loss_wrapper(input_tensor):
def custom_loss(y_true, y_pred):
return K.binary_crossentropy(y_true, y_pred) + K.mean(input_tensor)
return custom_loss
input_tensor = Input(shape=(10,))
hidden = Dense(100, activation='relu')(input_tensor)
out = Dense(1, activation='sigmoid')(hidden)
model = Model(input_tensor, out)
model.compile(loss=custom_loss_wrapper(input_tensor), optimizer='adam')
You can verify that input_tensor and the loss value (mostly, the K.mean(input_tensor) part) will change as different X is passed to the model.
X = np.random.rand(1000, 10)
y = np.random.randint(2, size=1000)
model.test_on_batch(X, y) # => 1.1974642
X *= 1000
model.test_on_batch(X, y) # => 511.15466
You can use add_loss to pass external layers to your loss, in your case the input tensor.
Here an example:
def CustomLoss(y_true, y_pred, input_tensor):
return K.binary_crossentropy(y_true, y_pred) + K.mean(input_tensor)
X = np.random.uniform(0,1, (1000,10))
y = np.random.randint(0,2, 1000)
inp = Input(shape=(10,))
hidden = Dense(100, activation='relu')(inp)
out = Dense(1, activation='sigmoid')(hidden)
target = Input((1,))
model = Model([inp,target], out)
model.add_loss( CustomLoss( target, out, inp ) )
model.compile(loss=None, optimizer='adam')
model.fit(x=[X,y], y=None, epochs=3)
To use the model in inference mode (removing the target from inputs)
final_model = Model(model.input[0], model.output)
final_model.predict(X)