I would like to keep track of the gradients over tensorboard.
However, since session run statements are not a thing anymore and the write_grads argument of tf.keras.callbacks.TensorBoard is depricated, I would like to know how to keep track of gradients during training with Keras or tensorflow 2.0.
My current approach is to create a new callback class for this purpose, but without success. Maybe someone else knows how to accomplish this kind of advanced stuff.
The code created for testing is shown below, but runs into errors independently of printing a gradient value to console or tensorboard.
import tensorflow as tf
from tensorflow.python.keras import backend as K
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0
model = tf.keras.models.Sequential([
tf.keras.layers.Flatten(input_shape=(28, 28)),
tf.keras.layers.Dense(128, activation='relu', name='dense128'),
tf.keras.layers.Dropout(0.2),
tf.keras.layers.Dense(10, activation='softmax', name='dense10')
])
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
class GradientCallback(tf.keras.callbacks.Callback):
console = True
def on_epoch_end(self, epoch, logs=None):
weights = [w for w in self.model.trainable_weights if 'dense' in w.name and 'bias' in w.name]
loss = self.model.total_loss
optimizer = self.model.optimizer
gradients = optimizer.get_gradients(loss, weights)
for t in gradients:
if self.console:
print('Tensor: {}'.format(t.name))
print('{}\n'.format(K.get_value(t)[:10]))
else:
tf.summary.histogram(t.name, data=t)
file_writer = tf.summary.create_file_writer("./metrics")
file_writer.set_as_default()
# write_grads has been removed
tensorboard_cb = tf.keras.callbacks.TensorBoard(histogram_freq=1, write_grads=True)
gradient_cb = GradientCallback()
model.fit(x_train, y_train, epochs=5, callbacks=[gradient_cb, tensorboard_cb])
Priniting bias gradients to console (console parameter = True)
leads to: AttributeError: 'Tensor' object has no attribute 'numpy'
Writing to tensorboard (console parameter = False) creates:
TypeError: Using a tf.Tensor as a Python bool is not allowed. Use if t is not None: instead of if t: to test if a tensor is defined, and use TensorFlow ops such as tf.cond to execute subgraphs conditioned on the
value of a tensor.
To compute the gradients of the loss against the weights, use
with tf.GradientTape() as tape:
loss = model(model.trainable_weights)
tape.gradient(loss, model.trainable_weights)
This is (arguably poorly) documented on GradientTape.
We do not need to tape.watch the variable because trainable parameters are watched by default.
As a function, it can be written as
def gradient(model, x):
x_tensor = tf.convert_to_tensor(x, dtype=tf.float32)
with tf.GradientTape() as t:
t.watch(x_tensor)
loss = model(x_tensor)
return t.gradient(loss, x_tensor).numpy()
Also have a look here: https://github.com/tensorflow/tensorflow/issues/31542#issuecomment-630495970
richardwth wrote a child class of Tensorboard.
I adapted it as follows:
class ExtendedTensorBoard(tf.keras.callbacks.TensorBoard):
def _log_gradients(self, epoch):
writer = self._writers['train']
with writer.as_default(), tf.GradientTape() as g:
# here we use test data to calculate the gradients
features, y_true = list(val_dataset.batch(100).take(1))[0]
y_pred = self.model(features) # forward-propagation
loss = self.model.compiled_loss(y_true=y_true, y_pred=y_pred) # calculate loss
gradients = g.gradient(loss, self.model.trainable_weights) # back-propagation
# In eager mode, grads does not have name, so we get names from model.trainable_weights
for weights, grads in zip(self.model.trainable_weights, gradients):
tf.summary.histogram(
weights.name.replace(':', '_') + '_grads', data=grads, step=epoch)
writer.flush()
def on_epoch_end(self, epoch, logs=None):
# This function overwrites the on_epoch_end in tf.keras.callbacks.TensorBoard
# but we do need to run the original on_epoch_end, so here we use the super function.
super(ExtendedTensorBoard, self).on_epoch_end(epoch, logs=logs)
if self.histogram_freq and epoch % self.histogram_freq == 0:
self._log_gradients(epoch)
Related
I am using tensorflow 2.8. I followed a tutorial from tensorflow on how to create your own fit function by overwriting the train_step function in your custom keras model class.
I wanted to add class_weight but in their section "Supporting sample_weight & class_weight" they don't show how to actually use class_weight, only sample_weight.
Is there a way to use class_weight in a custom train_step function?
I also found this Colab notebook in a GitHub issue. However this creates a custom model class but doesn't even use it and is therefore of no help either.
When actually creating the custom model and calling fit() I get the error: TypeError: __call__() got an unexpected keyword argument 'class_weight', when the loss in train_step() is calculated.
Example code (with the error) of what I'm trying to do:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np
# Prepare the training dataset.
batch_size = 64
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
x_train = np.reshape(x_train, (-1, 784))
x_test = np.reshape(x_test, (-1, 784))
# Reserve 10,000 samples for validation.
x_val = x_train[-10000:]
y_val = y_train[-10000:]
x_train = x_train[:-10000]
y_train = y_train[:-10000]
# Prepare the training dataset.
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_dataset = train_dataset.shuffle(buffer_size=1024).batch(batch_size)
# Get model
inputs = keras.Input(shape=(784,), name="digits")
x = layers.Dense(64, activation="relu", name="dense_1")(inputs)
x = layers.Dense(64, activation="relu", name="dense_2")(x)
outputs = layers.Dense(10, name="predictions")(x)
# Instantiate an optimizer to train the model.
optimizer = keras.optimizers.SGD(learning_rate=1e-3)
# Instantiate a loss function.
loss_fn = keras.losses.SparseCategoricalCrossentropy(from_logits=True)
# Prepare the metrics.
train_acc_metric = keras.metrics.SparseCategoricalAccuracy()
class CustomModel(keras.Model):
def train_step(self, data):
# Unpack the data. Its structure depends on your model and
# on what you pass to `fit()`.
if len(data) == 3:
x, y, class_weight = data
else:
x, y = data
with tf.GradientTape() as tape:
logits = self(x, training=True) # Forward pass
# Compute the loss value.
# The loss function is configured in `compile()`.
loss = self.compiled_loss(
y,
logits,
class_weight=class_weight,
regularization_losses=self.losses,
)
# Compute gradients
trainable_vars = self.trainable_variables
gradients = tape.gradient(loss, trainable_vars)
# Update weights
self.optimizer.apply_gradients(zip(gradients, trainable_vars))
# Update the metrics.
# Metrics are configured in `compile()`.
self.compiled_metrics.update_state(y, y_pred, class_weight=class_weight)
# Return a dict mapping metric names to current value.
return {m.name: m.result() for m in self.metrics}
# Construct and compile an instance of CustomModel
model = CustomModel(inputs=inputs, outputs=outputs)
model.compile(optimizer=optimizer, loss=loss_fn, metrics=["accuracy"])
class_weight = {
0: 1.0,
1: 1.0,
2: 1.0,
3: 1.0,
4: 1.0,
# Set weight "2" for class "5",
# making this class 2x more important
5: 2.0,
6: 1.0,
7: 1.0,
8: 1.0,
9: 1.0,
}
model.fit(train_dataset, class_weight=class_weight, epochs=3)
All those examples do not work because loss functions don't take class_weight as an argument, as far as I understand when looking at the documentation.
I tried to fix this by creating my own loss function:
#tf.function
def weighted_sparse_categorical_crossentropy(labels, logits, class_weight=None):
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)(labels, logits)
if class_weight is None:
if debug:
print("None weights: NO WEIGHTS SET!")
return loss
# get all class weights as list
if type(class_weight) is not list:
class_weight = list(class_weight.values())
class_weights_gathered = tf.gather(class_weight, labels)
return tf.reduce_mean(class_weights_gathered * loss)
Then using this to compile my model and calling .fit()
model.compile(optimizer=optimizer, loss=weighted_sparse_categorical_crossentropy)
model.fit(X, class_weight=class_weight, epochs=1)
But I still get TypeError: __call__() got an unexpected keyword argument 'class_weight' despite class_weight being clearly an argument in my function.
I also looked at GitHub to see what Tensorflow actually does with the class_weight inside the .fit() function and seems to convert it to a sample_weight somehow.
So I'm not sure if what I want is even possible. But then the section in the official tensorflow tutorial would be wrong since there would be no support for class_weight.
Pretty sure the error is here:
loss = self.compiled_loss(
y,
logits,
class_weight=class_weight,
regularization_losses=self.losses,
)
because class_weight is not recognized... instead, you should use sample_weight, and from keras documentation seems that is as simple as expanding the incoming data, as you are doing:
def train_step(self, data):
x, y, sample_weight = data
your doubt about the "missing example", is due to the fact that keras will automatically transform your class_weight to sample_weight, as you can see from there code:
class M(K.Model):
def train_step(self, data):
tf.print(data)
return {"loss":0}
model = M()
model.compile(K.optimizers.Adam(), K.losses.SparseCategoricalCrossentropy(), run_eagerly=True)
model.fit(np.array([[-1], [-1], [-1]]), np.array([[0],[0],[1]]), class_weight={0:7, 1:9})
which prints:
([[-1][-1][-1]],
[[0] [1] [0]],
[7 9 7])
where you can clearly see that the first line are the x, the second line are the y, the third one are the associated class weights to y, which you can feed to a loss as usual:
x, y, sample_weight = data
...
loss = self.compiled_loss(
y,
logits,
sample_weight=sample_weight,
regularization_losses=self.losses,
)
According to the tensorflow documentation, it should be possible, to configure the loss inside the custom train_step function and only configure the optimizer in compile().
My class looks like this:
import tensorflow as tf
from tensorflow import keras
loss_tracker = tf.keras.metrics.Mean(name="loss")
loss_fn = keras.losses.SparseCategoricalCrossentropy(
reduction=tf.keras.losses.Reduction.NONE
)
class MaskedLanguageModel(tf.keras.Model):
def train_step(self, inputs):
if len(inputs) == 3:
features, labels, sample_weight = inputs
else:
features, labels = inputs
sample_weight = None
with tf.GradientTape() as tape:
predictions = self(features, training=True)
loss = loss_fn(labels, predictions, sample_weight=sample_weight)
# Compute gradients
trainable_vars = self.trainable_variables
gradients = tape.gradient(loss, trainable_vars)
# Update weights
self.optimizer.apply_gradients(zip(gradients, trainable_vars))
# Compute our own metrics
loss_tracker.update_state(loss, sample_weight=sample_weight)
# Return a dict mapping metric names to current value
return {"loss": loss_tracker.result()}
#property
def metrics(self):
# We list our `Metric` objects here so that `reset_states()` can be
# called automatically at the start of each epoch
# or at the start of `evaluate()`.
# If you don't implement this property, you have to call
# `reset_states()` yourself at the time of your choosing.
return [loss_tracker]
And the model is compiled with the following snippet.
mlm_model = MaskedLanguageModel(inputs, mlm_output, name="masked_bert_model")
optimizer = keras.optimizers.Adam(learning_rate=lr)
mlm_model.compile(optimizer=optimizer)
But I get the following error:
'The model cannot be compiled because it has no loss to optimize.'
(My tensorflow version is 2.1.0)
Thank you in advance for any hint. :)
While my code runs without any problems with Keras Tuner and standard loss functions like 'mse' I am trying to figure out how to write a custom loss function that accept an external argument in addition to true and forecasted y to use inside Keras Tuner for LSTM model selection. I am looking for the easiest and less painful way and I didn't find a working solution in old posts.
One approach I follewed is this one. Let's say I have these variables
# external vector needed in custom loss function
ex_loss= np.logical_not(klines_backtest.loc[i_sel,['d']].to_numpy(dtype=np.float32)[:sample_start])
# create data sequences for x and vector to forecasy y
x_train, y_train = lstm_data_sequence(dataset[:sample_start,:-1], dataset[:sample_start,-1], lstm_sequence)
# concatenate external vector to y so y shape is Nx2
y_train = np.vstack((y_train, ex_loss[lstm_sequence:,0])).T
I have defined the following loss function
def bande_loss(y_true, y_pred):
mse = K.square(y_pred - y_true[:,0])
i_loss = K.equal(y_true[:,1], 1) and K.greater_equal(y_pred, y_true[:,0])
i_loss = K.cast(~i_loss, 'float32')
return K.mean(mse*i_loss)
Basically I tryied to avoid the loss function override passing the additional variable (of the same size of y_true) I need in the loss function inside y_train where I expext to have y_true and the corresponding external variable correctly sized for the batch.
The LSTM for model selection is
def lstm_model(hp):
model = Sequential()
model.add(InputLayer(input_shape=(48*3, 13)))
num_layers = hp.Int('num_layers', min_value=4, max_value=8, step=2)
num_units = hp.Choice('units', values=[50, 100, 250, 500])
n_dropout = hp.Choice('n_dropout', values=[float(0), 0.10, 0.20])
n_rec_dropout = hp.Choice('n_rec_dropout', values=[float(0), 0.10, 0.20])
learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4, 1e-5, 1e-6])
for i in range(num_layers):
if i < num_layers - 1:
r_sequence = True
else:
r_sequence = False
model.add(LSTM(
units=num_units,
dropout=n_dropout,
recurrent_dropout=n_rec_dropout,
return_sequences=r_sequence))
model.add(Dense(1))
model.compile(
optimizer=keras.optimizers.Adam(learning_rate=learning_rate),
loss=bande_loss,
metrics=[bande_loss])
return model
Executing this code
tuner = Hyperband(
hypermodel=lstm_model,
objective=Objective("bande_loss", direction="min"),
max_epochs=50,
hyperband_iterations=2,
executions_per_trial=1,
overwrite=True,
project_name='hyperband_tuner')
stop_early = tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=3, verbose=1)
tuner.search(x_train, y_train, epochs=30, validation_split=p_train, callbacks=[stop_early],
shuffle=False, verbose=1)
I get this error
The second input must be a scalar, but it has shape [32]
[[{{node bande_loss/cond/switch_pred/_2736}}]] [Op:__inference_train_function_45266]
Function call stack:
train_function
Note that 32 is the (default) batch size.
Also running the same code with
def bande_loss(y_true, y_pred):
mse = K.square(y_pred - y_true[:,0])
return K.mean(mse)
seems to work fine while running with
def bande_loss(y_true, y_pred):
mse = K.square(y_pred - y_true[:,1])
return K.mean(mse)
gives me the same error and I cannot understand why.
I also tried the loss function override in this way
def lstm_model(hp):
model = Sequential()
model.add(InputLayer(input_shape=(48*3, 13)))
num_layers = hp.Int('num_layers', min_value=4, max_value=8, step=2)
num_units = hp.Choice('units', values=[50, 100, 250, 500])
n_dropout = hp.Choice('n_dropout', values=[float(0), 0.10, 0.20])
n_rec_dropout = hp.Choice('n_rec_dropout', values=[float(0), 0.10, 0.20])
learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4, 1e-5, 1e-6])
for i in range(num_layers):
if i < num_layers - 1:
r_sequence = True
else:
r_sequence = False
model.add(LSTM(
units=num_units,
dropout=n_dropout,
recurrent_dropout=n_rec_dropout,
return_sequences=r_sequence))
model.add(Dense(1))
model.compile(
optimizer=keras.optimizers.Adam(learning_rate=learning_rate),
loss=bande_loss(ex_loss),
metrics=[bande_loss(ex_loss)])
return model
def bande_loss(ex_loss):
def loss(y_true, y_pred):
mse = K.square(y_pred - y_true)
i_loss = K.equal(ex_loss, True) and K.greater_equal(y_pred, y_true)
i_loss = K.cast(~i_loss, 'float32')
return K.mean(mse*i_loss)
return loss
...
# external vector needed in custom loss function
ex_loss= np.logical_not(klines_backtest.loc[i_sel,['d']].to_numpy(dtype=np.float32)[:sample_start])
# create data sequences for x and vector to forecasy y
x_train, y_train = lstm_data_sequence(dataset[:sample_start,:-1], dataset[:sample_start,-1], lstm_sequence)
ex_loss = K.variable(ex_loss[lstm_sequence:], dtype=bool)
tuner = Hyperband(
hypermodel=lstm_model,
objective=Objective("bande_loss(ex_loss)", direction="min"),
max_epochs=50,
hyperband_iterations=2,
executions_per_trial=1,
overwrite=True,
project_name='hyperband_tuner')
stop_early = tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=3, verbose=1)
tuner.search(x_train, y_train, epochs=30, validation_split=p_train, callbacks=[stop_early],
shuffle=False, verbose=1)
But I get this error
tensorflow.python.framework.errors_impl.InvalidArgumentError: The second input must be a scalar, but it has shape [4176]
[[{{node cond/switch_pred/_12}}]] [Op:__inference_train_function_34471]
Function call stack:
train_function
Can anyone provide me help or a simpler and effective way to implement custom loss functions with external parameters inside Keras Tuner?
I'm trying to follow this example on the TensorFlow site, but it's not working.
Here's my code:
import tensorflow as tf
def vectorize(vector_like):
return tf.convert_to_tensor(vector_like)
def batchify(vector):
'''Make a batch out of a single example'''
return vectorize([vector])
data = [(batchify([0]), batchify([0, 0, 0])), (batchify([1]), batchify([0, 0, 0])), (batchify([2]), batchify([0, 0, 0]))]
num_hidden = 5
num_classes = 3
opt = tf.keras.optimizers.SGD(learning_rate=0.1)
model = tf.keras.Sequential()
model.add(tf.keras.layers.Dense(num_hidden, activation='relu'))
model.add(tf.keras.layers.Dense(num_classes, activation='sigmoid'))
loss_fn = lambda: tf.keras.backend.cast(tf.keras.losses.mse(model(input), output), tf.float32)
var_list_fn = lambda: model.trainable_weights
for input, output in data:
opt.minimize(loss_fn, var_list_fn)
For a while, I was getting a warning about the loss function having the wrong datatype (int instead of float), which is why I've added the casting to the loss function.
Instead of the network training, I'm instead getting the error:
ValueError: No gradients provided for any variable:
['sequential/dense/kernel:0', 'sequential/dense/bias:0',
'sequential/dense_1/kernel:0', 'sequential/dense_1/bias:0'].
Why aren't the gradients getting passed through? What am I doing wrong?
You need to use GradientTape if you want to manipulate gradients in TF2. For example, following works.
opt = tf.keras.optimizers.SGD(learning_rate=0.1)
model = tf.keras.Sequential()
model.add(tf.keras.layers.Dense(num_hidden, activation='relu'))
model.add(tf.keras.layers.Dense(num_classes, activation='sigmoid'))
with tf.GradientTape() as tape:
loss = tf.keras.backend.mean(tf.keras.losses.mse(model(input),tf.cast(output, tf.float32)))
gradients = tape.gradient(loss, model.trainable_variables)
opt.apply_gradients(zip(gradients, model.trainable_variables))
Edit:
You can actually get your sample to work by doing the following changes.
Using cast just for the output instead of the full loss_fn (note I'm also doing a mean as we optimize w.r.t mean of loss)
By "work", I mean it doesn't complain. But you will need to further investigate to make sure it's working as intended.
loss_fn = lambda: tf.keras.backend.mean(tf.keras.losses.mse(model(input), tf.cast(output, tf.float32)))
var_list_fn = lambda: model.trainable_weights
opt.minimize(loss_fn, var_list_fn)
I've been trying to update this tutorial to the latest version of tensorflow ie 0.12 and I've hit a snag in the custom model function definition to train the lstm model.
def _lstm_model(X, y):
stacked_lstm = tf.nn.rnn_cell.MultiRNNCell(
lstm_cells(rnn_layers),
state_is_tuple=True)
global_step = tf.Variable(0, trainable=False)
X = tf.cast(X, tf.float32)
y = tf.cast(y, tf.float32)
x_ = tf.unpack(X, axis=1, num=time_steps)
output, layers = tf.nn.rnn(stacked_lstm, x_, dtype=dtypes.float32)
output = dnn_layers(output[-1], dense_layers)
(predictions, loss) = learn.models.linear_regression(output, y)
if optim == 'Adagrad':
print("using AdagradOptimizer")
optimizer = tf.train.AdagradOptimizer(learning_rate)
else:
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
train_op = optimizer.minimize(
loss,
global_step=global_step)
return (predictions, loss, train_op)
I've tried both specifying the global step and not specifying it - and I've ended up with the same result - the step remains at 0 with loss being optimized and this continues until I stop the entire script. The code below is what I use to create an Estimator and try to fit the model.
regressor = learn.SKCompat(learn.Estimator(
model_fn=lstm_model(TIMESTEPS,
RNN_LAYERS,
DENSE_LAYERS,
optim='Adagrad',
learning_rate=0.03)))
regressor.fit(x=X['train'],
y=y['train'],
batch_size=BATCH_SIZE,
steps=TRAINING_STEPS
)
I realized that the adamOptimizer was not to be used on it's own - but with the neural network that I had created. The following edit on my code - helped me achieve what I wanted to :
# create model and features
stacked_lstm = tf.nn.rnn_cell.MultiRNNCell(
lstm_cells(params['rnn_layers']),
state_is_tuple=True)
features = tf.cast(features, tf.float32)
targets = tf.cast(targets, tf.float32)
features = tf.unpack(features, axis=1, num=params['time_steps'])
output, layers = tf.nn.rnn(stacked_lstm, features, dtype=dtypes.float32)
output = dnn_layers(output[-1], params['dense_layers'])
# Define Loss
(predictions, loss) = learn.models.linear_regression(output, targets)
# train_op
train_op = tf.contrib.layers.optimize_loss(
loss=loss,
global_step=contrib.framework.get_global_step(),
learning_rate=params['learning_rate'],
optimizer='Adagrad'
)
Here I created the neural Network layers first and then tried to find the optimization on that layer - this is what seemed to have caused me problems.
The tensorflow documentation helped me out quite a bit - hence I was able to run the code on tensorflow - v0.12.