Does my linear model with variable hidden layers look okay? [Pytorch] - python

I just need some help as I am getting very high loss values and this is the first model ive created myself so not sure if its my model or input data that is the cause.
My code is below
class Net(nn.Module):
def __init__(self, D_in, H, D_out,num_layers,dropout):
super().__init__()
self.fcs = nn.ModuleList([nn.Linear(D_in, H)])
for k in range(num_layers-1):
self.fcs.append(nn.Linear(H, H))
self.fcs.append(nn.Linear(H, D_out))
self.relu = nn.ReLU()
self.dropout = nn.Dropout(dropout)
def forward(self, x):
for l in self.fcs:
x = self.dropout(x)
x = self.relu(l(x))
return x.squeeze()

Related

How to add Individual LSTM layers for each task in multi-task learning with Pytorch

for example, I define a model for 2 tasks in multi-task way.
class BertMy(nn.Module):
def __init__(self, segment_size, output_size, dropout):
super(BertMy, self).__init__()
self.bert = AutoModelForMaskedLM.from_pretrained("cl-tohoku/bert-base-japanese")
self.bert_vocab_size = 32000
self.bn = nn.BatchNorm1d(segment_size*self.bert_vocab_size)
self.fc1 = nn.Linear(segment_size*self.bert_vocab_size, output_size)
self.fc2 = nn.Linear(segment_size*self.bert_vocab_size, output_size)
self.dropout = nn.Dropout(dropout)
def forward(self, x):
x = self.bert(x).logits
x = x.view(x.shape[0], -1)
x1 = self.fc1(self.dropout(self.bn(x)))
x2 = self.fc2(self.dropout(self.bn(x)))
return [x1,x2]
I want add 2 Individual LSTM layers for 2 tasks, in order to predicate them Individually.
How should I define this in Pytorch?

Approximate function only with previous points

I would like to approximate a polynomial function (degree 3 for now), with an input of 10 steps to forecast the 20 next points. To do this, I use a simple dense model with one neuron, but I'm not having good results and don't know how to optimize the problem.
What I want to approximate:
The results I obtained:
My code is simple as that:
class Modeles:
def __init__(self, TO_FORECAST):
self.Model= None
self.TO_FORECAST = TO_FORECAST
def model(self):
self.Model = Sequential([
Lambda(lambda x: x[:, -1:, :]),
Dense(1, activation='relu'),
Dense(self.TO_FORECAST, activation='linear', kernel_initializer=initializers.zeros()),
Reshape([self.TO_FORECAST, 1])
])
class CompilationFitting:
def __init__(self, model, Data):
self.model = model
self.Data = Data
self.History = None
self.Prediction = None
def compile(self):
self.model.compile(loss=losses.MeanSquaredError(),
optimizer=Adam(learning_rate=0.01),
metrics=[metrics.MeanAbsoluteError()])
def fit(self):
MAX_EPOCHS = 100
self.History = self.model.fit(self.Data.X, self.Data.y, batch_size=16, epochs=MAX_EPOCHS,
validation_data=(self.Data.X, self.Data.y))
def predict(self):
self.Prediction = self.model.predict(self.Data.X)

2x nested Tensorflow custom layers results in zero trainable parameters

I am creating a series of custom Tensorflow (version 2.4.1) layers and am running into a problem where the model summary shows zero trainable parameters. Below is a series of examples showing how everything is fine until I add in the last custom layer.
Here are the imports and custom classes:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (BatchNormalization, Conv2D, Input, ReLU,
Layer)
class basic_conv_stack(Layer):
def __init__(self, filters, kernel_size, strides):
super(basic_conv_stack, self).__init__()
self.conv1 = Conv2D(filters, kernel_size, strides, padding='same')
self.bn1 = BatchNormalization()
self.relu = ReLU()
def call(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
return x
class basic_residual(Layer):
def __init__(self, filters, kernel_size, strides):
super(basic_residual, self).__init__()
self.bcs1 = basic_conv_stack(filters, kernel_size, strides)
self.bcs2 = basic_conv_stack(filters, kernel_size, strides)
def call(self, x):
x = self.bcs1(x)
x = self.bcs2(x)
return x
class basic_module(Layer):
def __init__(self, filters, kernel_size, strides):
super(basic_module, self).__init__()
self.res = basic_residual
self.args = (filters, kernel_size, strides)
def call(self, x):
for _ in range(4):
x = self.res(*self.args)(x)
return x
Now, if I do the following, everything works out ok and I get 300 trainable parameters:
input_layer = Input((128, 128, 3))
conv = basic_conv_stack(10, 3, 1)(input_layer)
model = Model(input_layer, conv)
print (model.summary())
Similarly, if I do the following, I get 1,230 trainable parameters:
input_layer = Input((128, 128, 3))
conv = basic_residual(10, 3, 1)(input_layer)
model = Model(input_layer, conv)
print (model.summary())
However, if I try the basic_module class, I get zero trainable parameters:
input_layer = Input((128, 128, 3))
conv = basic_module(10, 3, 1)(input_layer)
model = Model(input_layer, conv)
print (model.summary())
Does anyone know why this is happening?
Edit to add:
I discovered that the layers used in the call must be initialized in the class's init for things to work properly. So if I change the basic module to this:
class basic_module(Layer):
def __init__(self, filters, kernel_size, strides):
super(basic_module, self).__init__()
self.clayers = [basic_residual(filters, kernel_size, strides) for _ in range(4)]
def call(self, x):
for idx in range(4):
x = self.clayers[idx](x)
return x
Everything works fine. I don't know why this is the case, so I'll leave this question open in case someone can answer the why of this question.
You have to initialize the class instances with the required parameter such as filters, kernel_size, strides to the predefined base_mdoule. Also, note that these hyper-parameters are related to trainable weights properties.
# >>> a = basic_module
# >>> a __main__.basic_module
# >>> a = basic_module(10, 3, 1)
# >>> a
# >>> <__main__.basic_module at 0x7f6123eed510>
class basic_module(Layer):
def __init__(self, filters, kernel_size, strides):
super(basic_module, self).__init__()
self.res = basic_residual # < ---
self.args = (filters, kernel_size, strides)
def call(self, x):
for _ in range(4):
x = self.res(*self.args)(x)
return x

How to define the loss function using the output of intermediate layers?

class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
self.encoder = nn.Linear(300, 100)
self.dense1 = nn.Sequential(nn.Linear(100, 10),nn.ReLU())
self.dense2 = nn.Sequential(nn.Linear(10, 5),nn.ReLU())
self.dense3 = nn.Sequential(nn.Linear(5, 1))
def forward(self, x):
x = self.encoder(x)
x = self.dense1(x)
x = self.dense2(x)
x = self.dense3(x)
return x
I am working on a regression problem, and I need to use the output of the dense2 layer to calculate the loss.
output of dense2 layer is 5 dimensional (5x1).
I am using PyTorch.
Dataset: Suppose i am using 300 features and i need to predict some score(a floating value).
Input: 300 Features
Output: Some Floating Value
In general, your nn.Module can return as many elements as you like. Moreover, you don't have to use them anywhere - there is no mechanism that checks that. Pytorch philosophy is to compute computational graph on-the-run.
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
self.encoder = nn.Linear(300, 100)
self.dense1 = nn.Sequential(nn.Linear(100, 10),nn.ReLU())
self.dense2 = nn.Sequential(nn.Linear(10, 5),nn.ReLU())
self.dense3 = nn.Sequential(nn.Linear(5, 1))
def forward(self, x):
enc_output = self.encoder(x)
dense1_output = self.dense1(enc_output)
dense2_output = self.dense2(dense1_output)
dense3_output = self.dense3(dense2_output)
return dense3_output, dense2_output

How to do fully connected batch norm in PyTorch?

torch.nn has classes BatchNorm1d, BatchNorm2d, BatchNorm3d, but it doesn't have a fully connected BatchNorm class? What is the standard way of doing normal Batch Norm in PyTorch?
Ok. I figured it out. BatchNorm1d can also handle Rank-2 tensors, thus it is possible to use BatchNorm1d for the normal fully-connected case.
So for example:
import torch.nn as nn
class Policy(nn.Module):
def __init__(self, num_inputs, action_space, hidden_size1=256, hidden_size2=128):
super(Policy, self).__init__()
self.action_space = action_space
num_outputs = action_space
self.linear1 = nn.Linear(num_inputs, hidden_size1)
self.linear2 = nn.Linear(hidden_size1, hidden_size2)
self.linear3 = nn.Linear(hidden_size2, num_outputs)
self.bn1 = nn.BatchNorm1d(hidden_size1)
self.bn2 = nn.BatchNorm1d(hidden_size2)
def forward(self, inputs):
x = inputs
x = self.bn1(F.relu(self.linear1(x)))
x = self.bn2(F.relu(self.linear2(x)))
out = self.linear3(x)
return out
The BatchNorm1d normally comes before the ReLU, and the bias is redundant, so
import torch.nn as nn
class Policy(nn.Module):
def __init__(self, num_inputs, action_space, hidden_size1=256, hidden_size2=128):
super(Policy2, self).__init__()
self.action_space = action_space
num_outputs = action_space
self.linear1 = nn.Linear(num_inputs, hidden_size1, bias=False)
self.linear2 = nn.Linear(hidden_size1, hidden_size2, bias=False)
self.linear3 = nn.Linear(hidden_size2, num_outputs)
self.bn1 = nn.BatchNorm1d(hidden_size1)
self.bn2 = nn.BatchNorm1d(hidden_size2)
def forward(self, inputs):
x = inputs
x = F.relu(self.bn1(self.linear1(x)))
x = F.relu(self.bn2(self.linear2(x)))
out = self.linear3(x)
return out

Categories