How should I define optimizer for my model? - python

I have defined my model as follows:
class MyModel(nn.Module):
def __init__(self, hidden_dim, activation_func):
super(MyModel, self).__init__()
self.bert1 = BertForTokenClassification.from_pretrained("...")
self.bert2 = BertForTokenClassification.from_pretrained("...")
self.freezModel(self.bert1)
self.freezModel(self.bert2)
self.l1 = nn.Linear(2048 * 2 * 2, hidden_dim)
self.activation_func = activation_func
self.l2 = nn.Linear(hidden_dim, 1)
def freezModel(self, model):
if model is None:
return
for name, p in model.named_parameters():
p.requires_grad = False
def forward(self, x):
x1 = x[:,0,:]
x2 = x[:,1,:]
b1_out = torch.flatten(self.bert1(x1)[0], 1)
b2_out = torch.flatten(self.bert2(x2)[0], 1)
b_out = torch.cat((b1_out, b2_out), 1)
l1_out = self.l1(b_out)
activation_func_out = self.activation_func(l1_out)
l2_out = self.l2(activation_func_out)
return l2_out
my first question: Did I define my model ok?
and the second question is: How should I define the optimizer? and when should I use the parameters group?
This is the code of my instantiation of the class and the optimizer.
model = MyModel(100, nn.ReLU())
optim = torch.optim.AdamW(model.parameters(), lr=5e-5)
The input of my model has two arrays and each array goes to one BERT and the output of them after concatenation will send to the linear layer. The BERTs are just used for generating the embeddings and I have frozen them to not be changed during the training.

Related

How to add Individual LSTM layers for each task in multi-task learning with Pytorch

for example, I define a model for 2 tasks in multi-task way.
class BertMy(nn.Module):
def __init__(self, segment_size, output_size, dropout):
super(BertMy, self).__init__()
self.bert = AutoModelForMaskedLM.from_pretrained("cl-tohoku/bert-base-japanese")
self.bert_vocab_size = 32000
self.bn = nn.BatchNorm1d(segment_size*self.bert_vocab_size)
self.fc1 = nn.Linear(segment_size*self.bert_vocab_size, output_size)
self.fc2 = nn.Linear(segment_size*self.bert_vocab_size, output_size)
self.dropout = nn.Dropout(dropout)
def forward(self, x):
x = self.bert(x).logits
x = x.view(x.shape[0], -1)
x1 = self.fc1(self.dropout(self.bn(x)))
x2 = self.fc2(self.dropout(self.bn(x)))
return [x1,x2]
I want add 2 Individual LSTM layers for 2 tasks, in order to predicate them Individually.
How should I define this in Pytorch?

How to modify inherited class for additional parameters?

I am working on a problem statement related to python classes:
I have two classes:
class MCC(object):
def __init__(self, problem_type, batch_size, dataset):
self.problem_type = problem_type
self.batch_size = batch_size
self.dataset = dataset
self.cls_weights = weights_calculation()
def weights_calculation(self):
class_weights = (1 - (self.dataset['labels'].value_counts().sort_index()/len(self.dataset))).values
return class_weights
second class
from transformers import Trainer
class WeightedTrainer(Trainer):
def compute_loss(self, model, inputs, return_outputs=False):
outputs = model(**inputs)
logits = outputs.get('logits')
labels = inputs.get('labels')
loss_func = nn.CrossEntropyLoss(weight = self.class_weights)
loss = loss_func(logits, labels)
return (loss, outputs) if return_outputs else loss
In the second class, I have to pass weight in nn.CrossEntropyLoss as in code loss_func = nn.CrossEntropyLoss(weight = self.class_weights)
I want to modify the inherited Trainer class to pass a new parameter custom_class_weight in MCC class.
What I have tried:
class MCC(object):
def __init__(self, problem_type, batch_size, dataset, model):
self.problem_type = problem_type
self.batch_size = batch_size
self.dataset = dataset
self.model = model
self.cls_weights = weights_calculation()
self.WeightedTrainer = WeightedTrainer(self.cls_weights)
self.trainer = self.WeightedTrainer(self.model)
def weights_calculation(self):
class_weights = (1 - (self.dataset['labels'].value_counts().sort_index()/len(self.dataset))).values
return class_weights
class WeightedTrainer(Trainer):
def __init__(self, custom_class_weight):
self.custom_class_weight = custom_class_weight
def compute_loss(self, model, inputs, return_outputs=False):
outputs = model(**inputs)
logits = outputs.get('logits')
labels = inputs.get('labels')
loss_func = nn.CrossEntropyLoss(weight = self.custom_class_weight)
loss = loss_func(logits, labels)
return (loss, outputs) if return_outputs else loss
Which is disturbing the inherited's Trainer class properties and thus giving me an error:
TypeError: 'WeightedTrainer' object is not callable
I also tried:
class WeightedTrainer(Trainer):
def __init__(self, custom_class_weight, **kwargs,):
self.custom_class_weight = custom_class_weight
super(WeightedTrainer, self).__init__(**kwargs)
def compute_loss(self, model, inputs, return_outputs=False):
outputs = model(**inputs)
logits = outputs.get('logits')
labels = inputs.get('labels')
loss_func = nn.CrossEntropyLoss(weight = self.custom_class_weight)
loss = loss_func(logits, labels)
return (loss, outputs) if return_outputs else loss
How to create a child class, using the Trainer class as a parent class and modify an inherited class while keeping all default properties of super class?
Thank you for #JMA valuable comment, Here is the fix.
I was calling self.WeightedTrainer which is not a callable object. Full working code:
class MCC(object):
def __init__(self, problem_type, batch_size, dataset, model):
self.problem_type = problem_type
self.batch_size = batch_size
self.dataset = dataset
self.model = model
self.cls_weights = weights_calculation()
self.custom_trainer = WeightedTrainer
self.trainer = self.WeightedTrainer(self.model, custom_class_weight = self.cls_weights)
def weights_calculation(self):
class_weights = (1 - (self.dataset['labels'].value_counts().sort_index()/len(self.dataset))).values
return class_weights
class WeightedTrainer(Trainer):
def __init__(self, custom_class_weight, **kwargs,):
super().__init__(**kwargs)
self.custom_class_weight = custom_class_weight
def compute_loss(self, model, inputs, return_outputs=False):
outputs = model(**inputs)
logits = outputs.get('logits')
labels = inputs.get('labels')
loss_func = nn.CrossEntropyLoss(weight = self.custom_class_weight)
loss = loss_func(logits, labels)
return (loss, outputs) if return_outputs else loss

Approximate function only with previous points

I would like to approximate a polynomial function (degree 3 for now), with an input of 10 steps to forecast the 20 next points. To do this, I use a simple dense model with one neuron, but I'm not having good results and don't know how to optimize the problem.
What I want to approximate:
The results I obtained:
My code is simple as that:
class Modeles:
def __init__(self, TO_FORECAST):
self.Model= None
self.TO_FORECAST = TO_FORECAST
def model(self):
self.Model = Sequential([
Lambda(lambda x: x[:, -1:, :]),
Dense(1, activation='relu'),
Dense(self.TO_FORECAST, activation='linear', kernel_initializer=initializers.zeros()),
Reshape([self.TO_FORECAST, 1])
])
class CompilationFitting:
def __init__(self, model, Data):
self.model = model
self.Data = Data
self.History = None
self.Prediction = None
def compile(self):
self.model.compile(loss=losses.MeanSquaredError(),
optimizer=Adam(learning_rate=0.01),
metrics=[metrics.MeanAbsoluteError()])
def fit(self):
MAX_EPOCHS = 100
self.History = self.model.fit(self.Data.X, self.Data.y, batch_size=16, epochs=MAX_EPOCHS,
validation_data=(self.Data.X, self.Data.y))
def predict(self):
self.Prediction = self.model.predict(self.Data.X)

Tensorflow 2.0: flat_map() to flatten Dataset of Dataset returns cardinality -2

I am trying to run the following code (as given in Tensorflow documentation) to create windows of my data and then flatten the dataset of datasets.
window_size = 5
windows = range_ds.window(window_size, shift=1)
for sub_ds in windows.take(5):
print(sub_ds)
flat_windows = windows.flat_map(lambda x: x)
The problem is that flat_windows.cardinality().numpy() returns cardinality to be -2 which is creating problem for me during training. I tried looking for ways to set_cardinality of a dataset but couldn't find anything. I also tried other ways of flattening a dataset of datasets, but again no success.
Edit-1: The problem with the training is that the shape is unknown (at Linear and Dense layers) when I am training a subclass model (given below). The model trains well when I train the model eagerly (through tf.config.run_functions_eagerly(True)) but that is slow. Therefore I want the input data to be known for the model training.
Neural Network
class NeuralNetworkModel(tf.keras.Model):
def __init__(self):
super(NeuralNetworkModel, self).__init__()
self.encoder = Encoder()
def train_step(self, inputs):
X = inputs[0]
Y = inputs[1]
with tf.GradientTape() as tape:
enc_X = self.encoder(X)
enc_Y = self.encoder(Y)
# loss:
loss = tf.norm(enc_Y - enc_X, axis = [0, 1], ord = 'fro')
# Compute gradients
trainable_vars = self.encoder.trainable_variables
gradients = tape.gradient(loss, trainable_vars)
# Update weights
self.optimizer.apply_gradients(zip(gradients, trainable_vars))
# Compute our own metrics
loss_tracker.update_state(loss)
# Return a dict mapping metric names to current value.
# Note that it will include the loss (tracked in self.metrics).
return {"loss": loss_tracker.result()}
#property
def metrics(self):
# We list our `Metric` objects here so that `reset_states()` can be
# called automatically at the start of each epoch
# or at the start of `evaluate()`.
# If you don't implement this property, you have to call
# `reset_states()` yourself at the time of your choosing.
return [loss_tracker]
def test_step(self, inputs):
X = inputs[0]
Y = inputs[1]
Psi_X = self.encoder(X)
Psi_Y = self.encoder(Y)
# loss:
loss = tf.norm(Psi_Y - Psi_X, axis = [0, 1], ord = 'fro')
# Compute our own metrics
loss_tracker.update_state(loss)
# Return a dict mapping metric names to current value.
# Note that it will include the loss (tracked in self.metrics).
return {"loss": loss_tracker.result()}
class Encoder(tf.keras.Model):
def __init__(self):
super(Encoder, self).__init__(dtype = 'float64', name = 'Encoder')
self.input_layer = DenseLayer(128)
self.hidden_layer1 = DenseLayer(128)
self.hidden_layer2 = DenseLayer(64)
self.hidden_layer3 = DenseLayer(64)
self.output_layer = LinearLayer(64)
def call(self, input_data, training):
fx = self.input_layer(input_data)
fx = self.hidden_layer1(fx)
fx = self.hidden_layer2(fx)
fx = self.hidden_layer3(fx)
return self.output_layer(fx)
class LinearLayer(tf.keras.layers.Layer):
def __init__(self, units):
super(LinearLayer, self).__init__(dtype = 'float64')
self.units = units
def build(self, input_shape):
input_dim = input_shape[-1]
self.w = self.add_weight(shape = (input_dim, self.units),
initializer = "random_normal",
trainable = True)
self.b = self.add_weight(shape = (self.units,),
initializer = tf.zeros_initializer(),
trainable = True)
def call(self, inputs):
return tf.matmul(inputs, self.w) + self.b
class DenseLayer(tf.keras.layers.Layer):
def __init__(self, units):
super(DenseLayer, self).__init__(dtype = 'float64')
self.units = units
def build(self, input_shape):
input_dim = input_shape[-1]
self.w = self.add_weight(shape = (input_dim, self.units),
initializer = "random_normal",
trainable = True)
self.b = self.add_weight(shape = (self.units,),
initializer = tf.zeros_initializer(),
trainable = True)
def call(self, inputs):
x = tf.matmul(inputs, self.w) + self.b
return tf.nn.elu(x)
I was wondering about this as well. Turns out that -2 is tf.data.UNKNOWN_CARDINALITY (https://www.tensorflow.org/api_docs/python/tf/data#UNKNOWN_CARDINALITY), which represents that TF doesn't know how many elements the flat_map returns per item.
I just asked Windowing a TensorFlow dataset without losing cardinality information? to see if anyone knows a way to window datasets without losing cardinality.

How to define the loss function using the output of intermediate layers?

class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
self.encoder = nn.Linear(300, 100)
self.dense1 = nn.Sequential(nn.Linear(100, 10),nn.ReLU())
self.dense2 = nn.Sequential(nn.Linear(10, 5),nn.ReLU())
self.dense3 = nn.Sequential(nn.Linear(5, 1))
def forward(self, x):
x = self.encoder(x)
x = self.dense1(x)
x = self.dense2(x)
x = self.dense3(x)
return x
I am working on a regression problem, and I need to use the output of the dense2 layer to calculate the loss.
output of dense2 layer is 5 dimensional (5x1).
I am using PyTorch.
Dataset: Suppose i am using 300 features and i need to predict some score(a floating value).
Input: 300 Features
Output: Some Floating Value
In general, your nn.Module can return as many elements as you like. Moreover, you don't have to use them anywhere - there is no mechanism that checks that. Pytorch philosophy is to compute computational graph on-the-run.
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
self.encoder = nn.Linear(300, 100)
self.dense1 = nn.Sequential(nn.Linear(100, 10),nn.ReLU())
self.dense2 = nn.Sequential(nn.Linear(10, 5),nn.ReLU())
self.dense3 = nn.Sequential(nn.Linear(5, 1))
def forward(self, x):
enc_output = self.encoder(x)
dense1_output = self.dense1(enc_output)
dense2_output = self.dense2(dense1_output)
dense3_output = self.dense3(dense2_output)
return dense3_output, dense2_output

Categories