AttributeError: validation_data can't be accessed in Custom Callback - python

I implemented a prediction model with a LSTM and wrote a custom callback in order to access some additional metrics on the reverse scaled input.
The Metrics class looks like this:
class Metrics(keras.callbacks.Callback):
def __init__(self, scaler):
self.scaler = scaler
def on_train_begin(self, logs):
self._data = []
def on_epoch_end(self, batch, logs):
val_data, val_target = self.validation_data[0], self.validation_data[1]
# calculating and appending the metric here
# self._data.append({metric})
return
def get_data(self):
return self._data
I then use it like this:
metrics = Metrics(scaler)
model = Sequential()
model.add(LSTM(32,
return_sequences=True,
activation='tanh',
input_shape=(dataset.X_train.shape[1], dataset.X_train.shape[2])))
# more layers and model.compile here
history = model.fit(dataset.X_train,
dataset.y_train,
epochs=EPOCHS,
validation_data=(dataset.X_valid, dataset.y_valid),
callbacks=[metrics])
Any ideas?

Related

How to modify inherited class for additional parameters?

I am working on a problem statement related to python classes:
I have two classes:
class MCC(object):
def __init__(self, problem_type, batch_size, dataset):
self.problem_type = problem_type
self.batch_size = batch_size
self.dataset = dataset
self.cls_weights = weights_calculation()
def weights_calculation(self):
class_weights = (1 - (self.dataset['labels'].value_counts().sort_index()/len(self.dataset))).values
return class_weights
second class
from transformers import Trainer
class WeightedTrainer(Trainer):
def compute_loss(self, model, inputs, return_outputs=False):
outputs = model(**inputs)
logits = outputs.get('logits')
labels = inputs.get('labels')
loss_func = nn.CrossEntropyLoss(weight = self.class_weights)
loss = loss_func(logits, labels)
return (loss, outputs) if return_outputs else loss
In the second class, I have to pass weight in nn.CrossEntropyLoss as in code loss_func = nn.CrossEntropyLoss(weight = self.class_weights)
I want to modify the inherited Trainer class to pass a new parameter custom_class_weight in MCC class.
What I have tried:
class MCC(object):
def __init__(self, problem_type, batch_size, dataset, model):
self.problem_type = problem_type
self.batch_size = batch_size
self.dataset = dataset
self.model = model
self.cls_weights = weights_calculation()
self.WeightedTrainer = WeightedTrainer(self.cls_weights)
self.trainer = self.WeightedTrainer(self.model)
def weights_calculation(self):
class_weights = (1 - (self.dataset['labels'].value_counts().sort_index()/len(self.dataset))).values
return class_weights
class WeightedTrainer(Trainer):
def __init__(self, custom_class_weight):
self.custom_class_weight = custom_class_weight
def compute_loss(self, model, inputs, return_outputs=False):
outputs = model(**inputs)
logits = outputs.get('logits')
labels = inputs.get('labels')
loss_func = nn.CrossEntropyLoss(weight = self.custom_class_weight)
loss = loss_func(logits, labels)
return (loss, outputs) if return_outputs else loss
Which is disturbing the inherited's Trainer class properties and thus giving me an error:
TypeError: 'WeightedTrainer' object is not callable
I also tried:
class WeightedTrainer(Trainer):
def __init__(self, custom_class_weight, **kwargs,):
self.custom_class_weight = custom_class_weight
super(WeightedTrainer, self).__init__(**kwargs)
def compute_loss(self, model, inputs, return_outputs=False):
outputs = model(**inputs)
logits = outputs.get('logits')
labels = inputs.get('labels')
loss_func = nn.CrossEntropyLoss(weight = self.custom_class_weight)
loss = loss_func(logits, labels)
return (loss, outputs) if return_outputs else loss
How to create a child class, using the Trainer class as a parent class and modify an inherited class while keeping all default properties of super class?
Thank you for #JMA valuable comment, Here is the fix.
I was calling self.WeightedTrainer which is not a callable object. Full working code:
class MCC(object):
def __init__(self, problem_type, batch_size, dataset, model):
self.problem_type = problem_type
self.batch_size = batch_size
self.dataset = dataset
self.model = model
self.cls_weights = weights_calculation()
self.custom_trainer = WeightedTrainer
self.trainer = self.WeightedTrainer(self.model, custom_class_weight = self.cls_weights)
def weights_calculation(self):
class_weights = (1 - (self.dataset['labels'].value_counts().sort_index()/len(self.dataset))).values
return class_weights
class WeightedTrainer(Trainer):
def __init__(self, custom_class_weight, **kwargs,):
super().__init__(**kwargs)
self.custom_class_weight = custom_class_weight
def compute_loss(self, model, inputs, return_outputs=False):
outputs = model(**inputs)
logits = outputs.get('logits')
labels = inputs.get('labels')
loss_func = nn.CrossEntropyLoss(weight = self.custom_class_weight)
loss = loss_func(logits, labels)
return (loss, outputs) if return_outputs else loss

making GRU/LSTM states trainable in Tensorflow/Keras and add some random noise

I train the following model based on GRU, note that I am passing the argument stateful=True to the GRU builder.
class LearningToSurpriseModel(tf.keras.Model):
def __init__(self, vocab_size, embedding_dim, rnn_units):
super().__init__(self)
self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
self.gru = tf.keras.layers.GRU(rnn_units,
stateful=True,
return_sequences=True,
return_state=True,
reset_after=True
)
self.dense = tf.keras.layers.Dense(vocab_size)
def call(self, inputs, states=None, return_state=False, training=False):
x = inputs
x = self.embedding(x, training=training)
if states is None:
states = self.gru.get_initial_state(x)
x, states = self.gru(x, initial_state=states, training=training)
x = self.dense(x, training=training)
if return_state:
return x, states
else:
return x
#tf.function
def train_step(self, inputs):
[defining here my training step]
I instantiate my model
model = LearningToSurpriseModel(
vocab_size=len(ids_from_chars.get_vocabulary()),
embedding_dim=embedding_dim,
rnn_units=rnn_units
)
[compile and do stuff]
the custom callback below reset states manually at the end of each epoch.
gru_layer = model.layers[1]
class CustomCallback(tf.keras.callbacks.Callback):
def __init__(self, gru_layer):
self.gru_layer = gru_layer
def on_epoch_end(self, epoch, logs=None):
self.gru_layer.reset_states()
model.fit(train_dataset, validation_data=validation_dataset, \
epochs=EPOCHS, callbacks = [EarlyS, CustomCallback(gru_layer)], verbose=1)
States will be reset to zero. I would like to follow ideas in https://r2rt.com/non-zero-initial-states-for-recurrent-neural-networks.html to make states trainable. Implementation in this post seems based on tensorflow, and overwrites native functions, maybe there is a more elegant way in Keras.
(1) how do I make states trainable ?
(2) how do I combine trainable states and random initialization ?
Based on the very good answer above, the full code for solving the case
class CustomGRULayer(tf.keras.layers.Layer):
def __init__(self, rnn_units, batch_size):
super(CustomGRULayer, self).__init__()
self.rnn_units = rnn_units
self.batch_size = batch_size
self.gru = tf.keras.layers.GRU(self.rnn_units,
stateful=True,
return_sequences=True,
return_state=True,
reset_after=True,
)
self.w=None
def build(self, input_shape):
w_init = tf.random_normal_initializer(mean=0.0, stddev=0.2)
self.w = tf.Variable(
initial_value=w_init(shape=(self.batch_size, self.rnn_units),
dtype='float32'), trainable=True)
def call(self, inputs):
return self.gru(inputs, initial_state = self.w)
class LearningToSurpriseModel(tf.keras.Model):
def __init__(self, vocab_size, embedding_dim, rnn_units, batch_size):
super().__init__(self)
self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
self.gru_layer = CustomGRULayer(rnn_units = rnn_units, batch_size = batch_size)
self.dense = tf.keras.layers.Dense(vocab_size)
def call(self, inputs, states=None, return_state=False, training=False):
x = inputs
x = self.embedding(x, training=training)
if states is None:
states = self.gru_layer.gru.get_initial_state(x)
x, states = self.gru_layer.gru(x, initial_state=states, training=training)
x = self.dense(x, training=training)
if return_state:
return x, states
else:
return x
model = LearningToSurpriseModel(
vocab_size=len(ids_from_chars.get_vocabulary()),
embedding_dim=embedding_dim,
rnn_units=rnn_units,
batch_size=BATCH_SIZE
)
model.compile(optimizer='adam', loss=tf.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=[
tf.keras.metrics.SparseCategoricalAccuracy()]
)
EarlyS = EarlyStopping(monitor = 'val_loss', mode = 'min', restore_best_weights=True, patience=10, verbose = 1)
# defining a custom callback for resetting states at the end of period only
gru_layer = model.layers[1]
class CustomCallback(tf.keras.callbacks.Callback):
def __init__(self, gru_layer):
self.gru_layer = gru_layer
def on_epoch_end(self, epoch, logs=None):
self.gru_layer.gru.reset_states(self.gru_layer.w)
model.fit(train_dataset, validation_data=validation_dataset, epochs=EPOCHS, callbacks = [EarlyS, CustomCallback(gru_layer)], verbose=1)

LSTM/GRU setting states to random noise instead or resetting to zero

I train the following model based on GRU, note that I am passing the argument stateful=True to the GRU builder.
class LearningToSurpriseModel(tf.keras.Model):
def __init__(self, vocab_size, embedding_dim, rnn_units):
super().__init__(self)
self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
self.gru = tf.keras.layers.GRU(rnn_units,
stateful=True,
return_sequences=True,
return_state=True,
reset_after=True
)
self.dense = tf.keras.layers.Dense(vocab_size)
def call(self, inputs, states=None, return_state=False, training=False):
x = inputs
x = self.embedding(x, training=training)
if states is None:
states = self.gru.get_initial_state(x)
x, states = self.gru(x, initial_state=states, training=training)
x = self.dense(x, training=training)
if return_state:
return x, states
else:
return x
#tf.function
def train_step(self, inputs):
[defining here my training step]
I instantiate my model
model = LearningToSurpriseModel(
vocab_size=len(ids_from_chars.get_vocabulary()),
embedding_dim=embedding_dim,
rnn_units=rnn_units
)
[compile and do stuff]
the custom callback below reset states manually at the end of each epoch.
gru_layer = model.layers[1]
class CustomCallback(tf.keras.callbacks.Callback):
def __init__(self, gru_layer):
self.gru_layer = gru_layer
def on_epoch_end(self, epoch, logs=None):
self.gru_layer.reset_states()
model.fit(train_dataset, validation_data=validation_dataset, \
epochs=EPOCHS, callbacks = [EarlyS, CustomCallback(gru_layer)], verbose=1)
States will be reset to zero. I would like to follow ideas in https://r2rt.com/non-zero-initial-states-for-recurrent-neural-networks.html to (re)initialize states to a random noise. What would be a good implementation for random noise?
Should I overwrite reset_states() adding a states parameter ?

Approximate function only with previous points

I would like to approximate a polynomial function (degree 3 for now), with an input of 10 steps to forecast the 20 next points. To do this, I use a simple dense model with one neuron, but I'm not having good results and don't know how to optimize the problem.
What I want to approximate:
The results I obtained:
My code is simple as that:
class Modeles:
def __init__(self, TO_FORECAST):
self.Model= None
self.TO_FORECAST = TO_FORECAST
def model(self):
self.Model = Sequential([
Lambda(lambda x: x[:, -1:, :]),
Dense(1, activation='relu'),
Dense(self.TO_FORECAST, activation='linear', kernel_initializer=initializers.zeros()),
Reshape([self.TO_FORECAST, 1])
])
class CompilationFitting:
def __init__(self, model, Data):
self.model = model
self.Data = Data
self.History = None
self.Prediction = None
def compile(self):
self.model.compile(loss=losses.MeanSquaredError(),
optimizer=Adam(learning_rate=0.01),
metrics=[metrics.MeanAbsoluteError()])
def fit(self):
MAX_EPOCHS = 100
self.History = self.model.fit(self.Data.X, self.Data.y, batch_size=16, epochs=MAX_EPOCHS,
validation_data=(self.Data.X, self.Data.y))
def predict(self):
self.Prediction = self.model.predict(self.Data.X)

AttributeError: 'SparseCategoricalCrossentropy' object has no attribute '_id'

I've been trying to recreate a simple DNN using just the base Keras layer and writing everything from scratch. Everything seems to work just fine, but during the training loop I get this error:
AttributeError: 'SparseCategoricalCrossentropy' object has no attribute '_id'
I've tried changing the loss function to either CategoricalCrossentropy and SparseCategoricalCrossentropy (with from_logits True or False), but the error always pops up.
Here's the code:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from utils import plot_image, plot_mnist_results, plot_value_array
class Flatten(keras.layers.Layer):
def __init__(self):
super(Flatten, self).__init__()
def build(self, input_shape):
self.output_size = np.prod(input_shape)
def call(self, X):
return tf.reshape(X, shape=(-1, self.output_size))
class Dense(keras.layers.Layer):
def __init__(self, units, activation):
super(Dense, self).__init__()
self.units = units
self.activation = activation
def build(self, input_shape):
self.kernel = self.add_weight(
name='kernel',
dtype=tf.float64,
initializer='glorot_normal',
trainable=True,
shape=(input_shape[-1], self.units)
)
self.bias = self.add_weight(
name='bias',
dtype=tf.float64,
initializer=keras.initializers.Constant(0.1),
trainable=True,
shape=(1, self.units)
)
def call(self, X):
return self.activation(tf.matmul(X, self.kernel) + self.bias)
class DNN(keras.models.Model):
def __init__(self, units, activation):
super(DNN, self).__init__()
self.units = units
self.activation = activation
def build(self, input_shape):
self.flatten = Flatten()
self.hidden_layer = Dense(self.units, tf.nn.relu)
self.output_layer = Dense(10, tf.nn.softmax)
def call(self, X):
print(self.hidden_layer(self.flatten(X)).shape)
print(self.output_layer(self.hidden_layer(self.flatten(X))).shape)
return self.output_layer(self.hidden_layer(self.flatten(X)))
# #tf.function
def train(model, loss, opt, X, y):
with tf.GradientTape() as tape:
gradients = tape.gradient(loss(model(X), y), model.trainable_variables)
gradient_variables = zip(gradients, model.trainable_variables)
opt.apply_gradients(gradient_variables)
mnist = keras.datasets.mnist
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()
train_images = train_images / 255.0
test_images = test_images / 255.0
model = DNN(units=128, activation=tf.nn.relu)
opt = tf.optimizers.Adam(learning_rate=1e-3)
for epoch in range(3):
for step in range(train_labels.shape[0]):
loss = keras.losses.SparseCategoricalCrossentropy
train(model, loss, opt, train_images[step, :, :], train_labels[step])
train_loss = loss(model(train_images), train_labels)
template = 'Epoch {}, Train loss: {:.5f}'
print(template.format(epoch + 1, train_loss.numpy()))
I would expect for the model to train successfully, but it doesn't seem to be the case. What am I doing wrong?
From the given code, i could see that you are using tf and keras intermixed in places like given below.
opt = tf.optimizers.Adam(learning_rate=1e-3)
loss = keras.losses.SparseCategoricalCrossentropy
This could raise issues like this. For TensorFlow 2.0, you can use tf.keras uniformly in all places wherever you use keras directly.
Also i could find that, you are instantiating loss object inside the batch loop. which is not correct. You have to instantiate at the top of starting you epoch loop.
Rest all seems fine. Hope this helps!!!

Categories