class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
self.encoder = nn.Linear(300, 100)
self.dense1 = nn.Sequential(nn.Linear(100, 10),nn.ReLU())
self.dense2 = nn.Sequential(nn.Linear(10, 5),nn.ReLU())
self.dense3 = nn.Sequential(nn.Linear(5, 1))
def forward(self, x):
x = self.encoder(x)
x = self.dense1(x)
x = self.dense2(x)
x = self.dense3(x)
return x
I am working on a regression problem, and I need to use the output of the dense2 layer to calculate the loss.
output of dense2 layer is 5 dimensional (5x1).
I am using PyTorch.
Dataset: Suppose i am using 300 features and i need to predict some score(a floating value).
Input: 300 Features
Output: Some Floating Value
In general, your nn.Module can return as many elements as you like. Moreover, you don't have to use them anywhere - there is no mechanism that checks that. Pytorch philosophy is to compute computational graph on-the-run.
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
self.encoder = nn.Linear(300, 100)
self.dense1 = nn.Sequential(nn.Linear(100, 10),nn.ReLU())
self.dense2 = nn.Sequential(nn.Linear(10, 5),nn.ReLU())
self.dense3 = nn.Sequential(nn.Linear(5, 1))
def forward(self, x):
enc_output = self.encoder(x)
dense1_output = self.dense1(enc_output)
dense2_output = self.dense2(dense1_output)
dense3_output = self.dense3(dense2_output)
return dense3_output, dense2_output
Related
I have defined my model as follows:
class MyModel(nn.Module):
def __init__(self, hidden_dim, activation_func):
super(MyModel, self).__init__()
self.bert1 = BertForTokenClassification.from_pretrained("...")
self.bert2 = BertForTokenClassification.from_pretrained("...")
self.freezModel(self.bert1)
self.freezModel(self.bert2)
self.l1 = nn.Linear(2048 * 2 * 2, hidden_dim)
self.activation_func = activation_func
self.l2 = nn.Linear(hidden_dim, 1)
def freezModel(self, model):
if model is None:
return
for name, p in model.named_parameters():
p.requires_grad = False
def forward(self, x):
x1 = x[:,0,:]
x2 = x[:,1,:]
b1_out = torch.flatten(self.bert1(x1)[0], 1)
b2_out = torch.flatten(self.bert2(x2)[0], 1)
b_out = torch.cat((b1_out, b2_out), 1)
l1_out = self.l1(b_out)
activation_func_out = self.activation_func(l1_out)
l2_out = self.l2(activation_func_out)
return l2_out
my first question: Did I define my model ok?
and the second question is: How should I define the optimizer? and when should I use the parameters group?
This is the code of my instantiation of the class and the optimizer.
model = MyModel(100, nn.ReLU())
optim = torch.optim.AdamW(model.parameters(), lr=5e-5)
The input of my model has two arrays and each array goes to one BERT and the output of them after concatenation will send to the linear layer. The BERTs are just used for generating the embeddings and I have frozen them to not be changed during the training.
for example, I define a model for 2 tasks in multi-task way.
class BertMy(nn.Module):
def __init__(self, segment_size, output_size, dropout):
super(BertMy, self).__init__()
self.bert = AutoModelForMaskedLM.from_pretrained("cl-tohoku/bert-base-japanese")
self.bert_vocab_size = 32000
self.bn = nn.BatchNorm1d(segment_size*self.bert_vocab_size)
self.fc1 = nn.Linear(segment_size*self.bert_vocab_size, output_size)
self.fc2 = nn.Linear(segment_size*self.bert_vocab_size, output_size)
self.dropout = nn.Dropout(dropout)
def forward(self, x):
x = self.bert(x).logits
x = x.view(x.shape[0], -1)
x1 = self.fc1(self.dropout(self.bn(x)))
x2 = self.fc2(self.dropout(self.bn(x)))
return [x1,x2]
I want add 2 Individual LSTM layers for 2 tasks, in order to predicate them Individually.
How should I define this in Pytorch?
I would like to approximate a polynomial function (degree 3 for now), with an input of 10 steps to forecast the 20 next points. To do this, I use a simple dense model with one neuron, but I'm not having good results and don't know how to optimize the problem.
What I want to approximate:
The results I obtained:
My code is simple as that:
class Modeles:
def __init__(self, TO_FORECAST):
self.Model= None
self.TO_FORECAST = TO_FORECAST
def model(self):
self.Model = Sequential([
Lambda(lambda x: x[:, -1:, :]),
Dense(1, activation='relu'),
Dense(self.TO_FORECAST, activation='linear', kernel_initializer=initializers.zeros()),
Reshape([self.TO_FORECAST, 1])
])
class CompilationFitting:
def __init__(self, model, Data):
self.model = model
self.Data = Data
self.History = None
self.Prediction = None
def compile(self):
self.model.compile(loss=losses.MeanSquaredError(),
optimizer=Adam(learning_rate=0.01),
metrics=[metrics.MeanAbsoluteError()])
def fit(self):
MAX_EPOCHS = 100
self.History = self.model.fit(self.Data.X, self.Data.y, batch_size=16, epochs=MAX_EPOCHS,
validation_data=(self.Data.X, self.Data.y))
def predict(self):
self.Prediction = self.model.predict(self.Data.X)
I am trying to run the following code (as given in Tensorflow documentation) to create windows of my data and then flatten the dataset of datasets.
window_size = 5
windows = range_ds.window(window_size, shift=1)
for sub_ds in windows.take(5):
print(sub_ds)
flat_windows = windows.flat_map(lambda x: x)
The problem is that flat_windows.cardinality().numpy() returns cardinality to be -2 which is creating problem for me during training. I tried looking for ways to set_cardinality of a dataset but couldn't find anything. I also tried other ways of flattening a dataset of datasets, but again no success.
Edit-1: The problem with the training is that the shape is unknown (at Linear and Dense layers) when I am training a subclass model (given below). The model trains well when I train the model eagerly (through tf.config.run_functions_eagerly(True)) but that is slow. Therefore I want the input data to be known for the model training.
Neural Network
class NeuralNetworkModel(tf.keras.Model):
def __init__(self):
super(NeuralNetworkModel, self).__init__()
self.encoder = Encoder()
def train_step(self, inputs):
X = inputs[0]
Y = inputs[1]
with tf.GradientTape() as tape:
enc_X = self.encoder(X)
enc_Y = self.encoder(Y)
# loss:
loss = tf.norm(enc_Y - enc_X, axis = [0, 1], ord = 'fro')
# Compute gradients
trainable_vars = self.encoder.trainable_variables
gradients = tape.gradient(loss, trainable_vars)
# Update weights
self.optimizer.apply_gradients(zip(gradients, trainable_vars))
# Compute our own metrics
loss_tracker.update_state(loss)
# Return a dict mapping metric names to current value.
# Note that it will include the loss (tracked in self.metrics).
return {"loss": loss_tracker.result()}
#property
def metrics(self):
# We list our `Metric` objects here so that `reset_states()` can be
# called automatically at the start of each epoch
# or at the start of `evaluate()`.
# If you don't implement this property, you have to call
# `reset_states()` yourself at the time of your choosing.
return [loss_tracker]
def test_step(self, inputs):
X = inputs[0]
Y = inputs[1]
Psi_X = self.encoder(X)
Psi_Y = self.encoder(Y)
# loss:
loss = tf.norm(Psi_Y - Psi_X, axis = [0, 1], ord = 'fro')
# Compute our own metrics
loss_tracker.update_state(loss)
# Return a dict mapping metric names to current value.
# Note that it will include the loss (tracked in self.metrics).
return {"loss": loss_tracker.result()}
class Encoder(tf.keras.Model):
def __init__(self):
super(Encoder, self).__init__(dtype = 'float64', name = 'Encoder')
self.input_layer = DenseLayer(128)
self.hidden_layer1 = DenseLayer(128)
self.hidden_layer2 = DenseLayer(64)
self.hidden_layer3 = DenseLayer(64)
self.output_layer = LinearLayer(64)
def call(self, input_data, training):
fx = self.input_layer(input_data)
fx = self.hidden_layer1(fx)
fx = self.hidden_layer2(fx)
fx = self.hidden_layer3(fx)
return self.output_layer(fx)
class LinearLayer(tf.keras.layers.Layer):
def __init__(self, units):
super(LinearLayer, self).__init__(dtype = 'float64')
self.units = units
def build(self, input_shape):
input_dim = input_shape[-1]
self.w = self.add_weight(shape = (input_dim, self.units),
initializer = "random_normal",
trainable = True)
self.b = self.add_weight(shape = (self.units,),
initializer = tf.zeros_initializer(),
trainable = True)
def call(self, inputs):
return tf.matmul(inputs, self.w) + self.b
class DenseLayer(tf.keras.layers.Layer):
def __init__(self, units):
super(DenseLayer, self).__init__(dtype = 'float64')
self.units = units
def build(self, input_shape):
input_dim = input_shape[-1]
self.w = self.add_weight(shape = (input_dim, self.units),
initializer = "random_normal",
trainable = True)
self.b = self.add_weight(shape = (self.units,),
initializer = tf.zeros_initializer(),
trainable = True)
def call(self, inputs):
x = tf.matmul(inputs, self.w) + self.b
return tf.nn.elu(x)
I was wondering about this as well. Turns out that -2 is tf.data.UNKNOWN_CARDINALITY (https://www.tensorflow.org/api_docs/python/tf/data#UNKNOWN_CARDINALITY), which represents that TF doesn't know how many elements the flat_map returns per item.
I just asked Windowing a TensorFlow dataset without losing cardinality information? to see if anyone knows a way to window datasets without losing cardinality.
torch.nn has classes BatchNorm1d, BatchNorm2d, BatchNorm3d, but it doesn't have a fully connected BatchNorm class? What is the standard way of doing normal Batch Norm in PyTorch?
Ok. I figured it out. BatchNorm1d can also handle Rank-2 tensors, thus it is possible to use BatchNorm1d for the normal fully-connected case.
So for example:
import torch.nn as nn
class Policy(nn.Module):
def __init__(self, num_inputs, action_space, hidden_size1=256, hidden_size2=128):
super(Policy, self).__init__()
self.action_space = action_space
num_outputs = action_space
self.linear1 = nn.Linear(num_inputs, hidden_size1)
self.linear2 = nn.Linear(hidden_size1, hidden_size2)
self.linear3 = nn.Linear(hidden_size2, num_outputs)
self.bn1 = nn.BatchNorm1d(hidden_size1)
self.bn2 = nn.BatchNorm1d(hidden_size2)
def forward(self, inputs):
x = inputs
x = self.bn1(F.relu(self.linear1(x)))
x = self.bn2(F.relu(self.linear2(x)))
out = self.linear3(x)
return out
The BatchNorm1d normally comes before the ReLU, and the bias is redundant, so
import torch.nn as nn
class Policy(nn.Module):
def __init__(self, num_inputs, action_space, hidden_size1=256, hidden_size2=128):
super(Policy2, self).__init__()
self.action_space = action_space
num_outputs = action_space
self.linear1 = nn.Linear(num_inputs, hidden_size1, bias=False)
self.linear2 = nn.Linear(hidden_size1, hidden_size2, bias=False)
self.linear3 = nn.Linear(hidden_size2, num_outputs)
self.bn1 = nn.BatchNorm1d(hidden_size1)
self.bn2 = nn.BatchNorm1d(hidden_size2)
def forward(self, inputs):
x = inputs
x = F.relu(self.bn1(self.linear1(x)))
x = F.relu(self.bn2(self.linear2(x)))
out = self.linear3(x)
return out