I am working on a problem statement related to python classes:
I have two classes:
class MCC(object):
def __init__(self, problem_type, batch_size, dataset):
self.problem_type = problem_type
self.batch_size = batch_size
self.dataset = dataset
self.cls_weights = weights_calculation()
def weights_calculation(self):
class_weights = (1 - (self.dataset['labels'].value_counts().sort_index()/len(self.dataset))).values
return class_weights
second class
from transformers import Trainer
class WeightedTrainer(Trainer):
def compute_loss(self, model, inputs, return_outputs=False):
outputs = model(**inputs)
logits = outputs.get('logits')
labels = inputs.get('labels')
loss_func = nn.CrossEntropyLoss(weight = self.class_weights)
loss = loss_func(logits, labels)
return (loss, outputs) if return_outputs else loss
In the second class, I have to pass weight in nn.CrossEntropyLoss as in code loss_func = nn.CrossEntropyLoss(weight = self.class_weights)
I want to modify the inherited Trainer class to pass a new parameter custom_class_weight in MCC class.
What I have tried:
class MCC(object):
def __init__(self, problem_type, batch_size, dataset, model):
self.problem_type = problem_type
self.batch_size = batch_size
self.dataset = dataset
self.model = model
self.cls_weights = weights_calculation()
self.WeightedTrainer = WeightedTrainer(self.cls_weights)
self.trainer = self.WeightedTrainer(self.model)
def weights_calculation(self):
class_weights = (1 - (self.dataset['labels'].value_counts().sort_index()/len(self.dataset))).values
return class_weights
class WeightedTrainer(Trainer):
def __init__(self, custom_class_weight):
self.custom_class_weight = custom_class_weight
def compute_loss(self, model, inputs, return_outputs=False):
outputs = model(**inputs)
logits = outputs.get('logits')
labels = inputs.get('labels')
loss_func = nn.CrossEntropyLoss(weight = self.custom_class_weight)
loss = loss_func(logits, labels)
return (loss, outputs) if return_outputs else loss
Which is disturbing the inherited's Trainer class properties and thus giving me an error:
TypeError: 'WeightedTrainer' object is not callable
I also tried:
class WeightedTrainer(Trainer):
def __init__(self, custom_class_weight, **kwargs,):
self.custom_class_weight = custom_class_weight
super(WeightedTrainer, self).__init__(**kwargs)
def compute_loss(self, model, inputs, return_outputs=False):
outputs = model(**inputs)
logits = outputs.get('logits')
labels = inputs.get('labels')
loss_func = nn.CrossEntropyLoss(weight = self.custom_class_weight)
loss = loss_func(logits, labels)
return (loss, outputs) if return_outputs else loss
How to create a child class, using the Trainer class as a parent class and modify an inherited class while keeping all default properties of super class?
Thank you for #JMA valuable comment, Here is the fix.
I was calling self.WeightedTrainer which is not a callable object. Full working code:
class MCC(object):
def __init__(self, problem_type, batch_size, dataset, model):
self.problem_type = problem_type
self.batch_size = batch_size
self.dataset = dataset
self.model = model
self.cls_weights = weights_calculation()
self.custom_trainer = WeightedTrainer
self.trainer = self.WeightedTrainer(self.model, custom_class_weight = self.cls_weights)
def weights_calculation(self):
class_weights = (1 - (self.dataset['labels'].value_counts().sort_index()/len(self.dataset))).values
return class_weights
class WeightedTrainer(Trainer):
def __init__(self, custom_class_weight, **kwargs,):
super().__init__(**kwargs)
self.custom_class_weight = custom_class_weight
def compute_loss(self, model, inputs, return_outputs=False):
outputs = model(**inputs)
logits = outputs.get('logits')
labels = inputs.get('labels')
loss_func = nn.CrossEntropyLoss(weight = self.custom_class_weight)
loss = loss_func(logits, labels)
return (loss, outputs) if return_outputs else loss
Related
I have defined my model as follows:
class MyModel(nn.Module):
def __init__(self, hidden_dim, activation_func):
super(MyModel, self).__init__()
self.bert1 = BertForTokenClassification.from_pretrained("...")
self.bert2 = BertForTokenClassification.from_pretrained("...")
self.freezModel(self.bert1)
self.freezModel(self.bert2)
self.l1 = nn.Linear(2048 * 2 * 2, hidden_dim)
self.activation_func = activation_func
self.l2 = nn.Linear(hidden_dim, 1)
def freezModel(self, model):
if model is None:
return
for name, p in model.named_parameters():
p.requires_grad = False
def forward(self, x):
x1 = x[:,0,:]
x2 = x[:,1,:]
b1_out = torch.flatten(self.bert1(x1)[0], 1)
b2_out = torch.flatten(self.bert2(x2)[0], 1)
b_out = torch.cat((b1_out, b2_out), 1)
l1_out = self.l1(b_out)
activation_func_out = self.activation_func(l1_out)
l2_out = self.l2(activation_func_out)
return l2_out
my first question: Did I define my model ok?
and the second question is: How should I define the optimizer? and when should I use the parameters group?
This is the code of my instantiation of the class and the optimizer.
model = MyModel(100, nn.ReLU())
optim = torch.optim.AdamW(model.parameters(), lr=5e-5)
The input of my model has two arrays and each array goes to one BERT and the output of them after concatenation will send to the linear layer. The BERTs are just used for generating the embeddings and I have frozen them to not be changed during the training.
I would like to approximate a polynomial function (degree 3 for now), with an input of 10 steps to forecast the 20 next points. To do this, I use a simple dense model with one neuron, but I'm not having good results and don't know how to optimize the problem.
What I want to approximate:
The results I obtained:
My code is simple as that:
class Modeles:
def __init__(self, TO_FORECAST):
self.Model= None
self.TO_FORECAST = TO_FORECAST
def model(self):
self.Model = Sequential([
Lambda(lambda x: x[:, -1:, :]),
Dense(1, activation='relu'),
Dense(self.TO_FORECAST, activation='linear', kernel_initializer=initializers.zeros()),
Reshape([self.TO_FORECAST, 1])
])
class CompilationFitting:
def __init__(self, model, Data):
self.model = model
self.Data = Data
self.History = None
self.Prediction = None
def compile(self):
self.model.compile(loss=losses.MeanSquaredError(),
optimizer=Adam(learning_rate=0.01),
metrics=[metrics.MeanAbsoluteError()])
def fit(self):
MAX_EPOCHS = 100
self.History = self.model.fit(self.Data.X, self.Data.y, batch_size=16, epochs=MAX_EPOCHS,
validation_data=(self.Data.X, self.Data.y))
def predict(self):
self.Prediction = self.model.predict(self.Data.X)
I am training a keras model with a data generator that reads the data in batches from a directory. This works great with model.fit(). But when using model.predict(), I would like to have both ypred and ytrue values returned.
Can I enable/modify model.predict() to do this (maybe with a custom callback)?
class DataGenerator(tf.keras.utils.Sequence):
def__init__(self, ids, batch_size=256):
self.batch_size=batch_size
self.ids = ids
def __len__(self):
return(self.ids)
def __getitem__(self, index):
X, y = np.load(f'data/{index}.npy', allow_pickle=True)
return X, y
def on_epoch_end(self):
'''Shuffle ids in each epoch'''
self.ids = np.random.choice(self.ids, len(self.ids), replace=False)
model = buildModel() #builds a multilayer perceptron
train_ids = np.arange(10000) #training data are in data/0.npy, data/1.npy, ... data/9999.npy
val_ids = np.arange(10000, 12000)
train_generator = DataGenerator(train_ids)
val_generator = DataGenerator(val_ids)
# Train model
history = model.fit(x=train_generator, epochs=100)
# Validate model (but I don't have ytrue)
ypred = model.predict(x=val_generator).reshape(-1)
# What I would like to achieve
(ypred, ytrue) = model.predict(x=val_generator, callbacks=[some_custom_callback])
# Or
ypred = model.predict(x=val_generator)
ytrue = some_fancy_method(val_generator)
This can be done by adding a method to your DataGenerator class that takes the fitted model as input, applies it to the generated data batches, and returns ytrue and ypred.
class DataGenerator(tf.keras.utils.Sequence):
def__init__(self, ids, batch_size=256):
self.batch_size=batch_size
self.ids = ids
def __len__(self):
return(self.ids)
def __getitem__(self, index):
X, y = self.load_data(index)
return X, y
def load_data(self, index):
X, y = np.load(f'data/{index}.npy', allow_pickle=True)
return X, y
def predict(self, model):
ytrue, ypred = [], []
for index in self.ids:
X, y = self.load_data(index)
pred = model.predict(X).reshape(-1)
ytrue.extend(y)
ypred.extend(pred)
return ytrue, ypred
def on_epoch_end(self):
'''Shuffle ids in each epoch'''
self.ids = np.random.choice(self.ids, len(self.ids), replace=False)
train_generator = DataGenerator(train_ids)
val_generator = DataGenerator(val_ids)
# Train model
history = model.fit(x=train_generator, epochs=100)
# Validate model
ypred, ytrue = val_generator.predict(model)
I am trying to run the following code (as given in Tensorflow documentation) to create windows of my data and then flatten the dataset of datasets.
window_size = 5
windows = range_ds.window(window_size, shift=1)
for sub_ds in windows.take(5):
print(sub_ds)
flat_windows = windows.flat_map(lambda x: x)
The problem is that flat_windows.cardinality().numpy() returns cardinality to be -2 which is creating problem for me during training. I tried looking for ways to set_cardinality of a dataset but couldn't find anything. I also tried other ways of flattening a dataset of datasets, but again no success.
Edit-1: The problem with the training is that the shape is unknown (at Linear and Dense layers) when I am training a subclass model (given below). The model trains well when I train the model eagerly (through tf.config.run_functions_eagerly(True)) but that is slow. Therefore I want the input data to be known for the model training.
Neural Network
class NeuralNetworkModel(tf.keras.Model):
def __init__(self):
super(NeuralNetworkModel, self).__init__()
self.encoder = Encoder()
def train_step(self, inputs):
X = inputs[0]
Y = inputs[1]
with tf.GradientTape() as tape:
enc_X = self.encoder(X)
enc_Y = self.encoder(Y)
# loss:
loss = tf.norm(enc_Y - enc_X, axis = [0, 1], ord = 'fro')
# Compute gradients
trainable_vars = self.encoder.trainable_variables
gradients = tape.gradient(loss, trainable_vars)
# Update weights
self.optimizer.apply_gradients(zip(gradients, trainable_vars))
# Compute our own metrics
loss_tracker.update_state(loss)
# Return a dict mapping metric names to current value.
# Note that it will include the loss (tracked in self.metrics).
return {"loss": loss_tracker.result()}
#property
def metrics(self):
# We list our `Metric` objects here so that `reset_states()` can be
# called automatically at the start of each epoch
# or at the start of `evaluate()`.
# If you don't implement this property, you have to call
# `reset_states()` yourself at the time of your choosing.
return [loss_tracker]
def test_step(self, inputs):
X = inputs[0]
Y = inputs[1]
Psi_X = self.encoder(X)
Psi_Y = self.encoder(Y)
# loss:
loss = tf.norm(Psi_Y - Psi_X, axis = [0, 1], ord = 'fro')
# Compute our own metrics
loss_tracker.update_state(loss)
# Return a dict mapping metric names to current value.
# Note that it will include the loss (tracked in self.metrics).
return {"loss": loss_tracker.result()}
class Encoder(tf.keras.Model):
def __init__(self):
super(Encoder, self).__init__(dtype = 'float64', name = 'Encoder')
self.input_layer = DenseLayer(128)
self.hidden_layer1 = DenseLayer(128)
self.hidden_layer2 = DenseLayer(64)
self.hidden_layer3 = DenseLayer(64)
self.output_layer = LinearLayer(64)
def call(self, input_data, training):
fx = self.input_layer(input_data)
fx = self.hidden_layer1(fx)
fx = self.hidden_layer2(fx)
fx = self.hidden_layer3(fx)
return self.output_layer(fx)
class LinearLayer(tf.keras.layers.Layer):
def __init__(self, units):
super(LinearLayer, self).__init__(dtype = 'float64')
self.units = units
def build(self, input_shape):
input_dim = input_shape[-1]
self.w = self.add_weight(shape = (input_dim, self.units),
initializer = "random_normal",
trainable = True)
self.b = self.add_weight(shape = (self.units,),
initializer = tf.zeros_initializer(),
trainable = True)
def call(self, inputs):
return tf.matmul(inputs, self.w) + self.b
class DenseLayer(tf.keras.layers.Layer):
def __init__(self, units):
super(DenseLayer, self).__init__(dtype = 'float64')
self.units = units
def build(self, input_shape):
input_dim = input_shape[-1]
self.w = self.add_weight(shape = (input_dim, self.units),
initializer = "random_normal",
trainable = True)
self.b = self.add_weight(shape = (self.units,),
initializer = tf.zeros_initializer(),
trainable = True)
def call(self, inputs):
x = tf.matmul(inputs, self.w) + self.b
return tf.nn.elu(x)
I was wondering about this as well. Turns out that -2 is tf.data.UNKNOWN_CARDINALITY (https://www.tensorflow.org/api_docs/python/tf/data#UNKNOWN_CARDINALITY), which represents that TF doesn't know how many elements the flat_map returns per item.
I just asked Windowing a TensorFlow dataset without losing cardinality information? to see if anyone knows a way to window datasets without losing cardinality.
I implemented a prediction model with a LSTM and wrote a custom callback in order to access some additional metrics on the reverse scaled input.
The Metrics class looks like this:
class Metrics(keras.callbacks.Callback):
def __init__(self, scaler):
self.scaler = scaler
def on_train_begin(self, logs):
self._data = []
def on_epoch_end(self, batch, logs):
val_data, val_target = self.validation_data[0], self.validation_data[1]
# calculating and appending the metric here
# self._data.append({metric})
return
def get_data(self):
return self._data
I then use it like this:
metrics = Metrics(scaler)
model = Sequential()
model.add(LSTM(32,
return_sequences=True,
activation='tanh',
input_shape=(dataset.X_train.shape[1], dataset.X_train.shape[2])))
# more layers and model.compile here
history = model.fit(dataset.X_train,
dataset.y_train,
epochs=EPOCHS,
validation_data=(dataset.X_valid, dataset.y_valid),
callbacks=[metrics])
Any ideas?