This code runs fine to create a simple feed-forward neural Network. The layer (torch.nn.Linear) is assigned to the class variable by using self.
class MultipleRegression3L(torch.nn.Module):
def __init__(self, num_features):
super(MultipleRegression3L, self).__init__()
self.layer_1 = torch.nn.Linear(num_features, 16)
## more layers
self.relu = torch.nn.ReLU()
def forward(self, inputs):
x = self.relu(self.layer_1(inputs))
x = self.relu(self.layer_2(x))
x = self.relu(self.layer_3(x))
x = self.layer_out(x)
return (x)
def predict(self, test_inputs):
return self.forward(test_inputs)
However, when I tried to store the layer using the list:
class MultipleRegression(torch.nn.Module):
def __init__(self, num_features, params):
super(MultipleRegression, self).__init__()
number_of_layers = 3 if not 'number_of_layers' in params else params['number_of_layers']
number_of_neurons_in_each_layer = [16, 32, 16] if not 'number_of_neurons_in_each_layer' in params else params['number_of_neurons_in_each_layer']
activation_function = "relu" if not 'activation_function' in params else params['activation_function']
self.layers = []
v1 = num_features
for i in range(0, number_of_layers):
v2 = number_of_neurons_in_each_layer[i]
self.layers.append(torch.nn.Linear(v1, v2))
v1 = v2
self.layer_out = torch.nn.Linear(v2, 1)
if activation_function == "relu":
self.act_func = torch.nn.ReLU()
else:
raise Exception("Activation function %s is not supported" % (activation_function))
def forward(self, inputs):
x = self.act_func(self.layers[0](inputs))
for i in range(1, len(self.layers)):
x = self.act_func(self.layers[i](x))
x = self.layer_out(x)
return (x)
The two models do not behave the same way. What can be wrong here?
Pytorch needs to keep the graph of the modules in the model, so using a list does not work. Using self.layers = torch.nn.ModuleList() fixed the problem.
Related
I have defined my model as follows:
class MyModel(nn.Module):
def __init__(self, hidden_dim, activation_func):
super(MyModel, self).__init__()
self.bert1 = BertForTokenClassification.from_pretrained("...")
self.bert2 = BertForTokenClassification.from_pretrained("...")
self.freezModel(self.bert1)
self.freezModel(self.bert2)
self.l1 = nn.Linear(2048 * 2 * 2, hidden_dim)
self.activation_func = activation_func
self.l2 = nn.Linear(hidden_dim, 1)
def freezModel(self, model):
if model is None:
return
for name, p in model.named_parameters():
p.requires_grad = False
def forward(self, x):
x1 = x[:,0,:]
x2 = x[:,1,:]
b1_out = torch.flatten(self.bert1(x1)[0], 1)
b2_out = torch.flatten(self.bert2(x2)[0], 1)
b_out = torch.cat((b1_out, b2_out), 1)
l1_out = self.l1(b_out)
activation_func_out = self.activation_func(l1_out)
l2_out = self.l2(activation_func_out)
return l2_out
my first question: Did I define my model ok?
and the second question is: How should I define the optimizer? and when should I use the parameters group?
This is the code of my instantiation of the class and the optimizer.
model = MyModel(100, nn.ReLU())
optim = torch.optim.AdamW(model.parameters(), lr=5e-5)
The input of my model has two arrays and each array goes to one BERT and the output of them after concatenation will send to the linear layer. The BERTs are just used for generating the embeddings and I have frozen them to not be changed during the training.
I just need some help as I am getting very high loss values and this is the first model ive created myself so not sure if its my model or input data that is the cause.
My code is below
class Net(nn.Module):
def __init__(self, D_in, H, D_out,num_layers,dropout):
super().__init__()
self.fcs = nn.ModuleList([nn.Linear(D_in, H)])
for k in range(num_layers-1):
self.fcs.append(nn.Linear(H, H))
self.fcs.append(nn.Linear(H, D_out))
self.relu = nn.ReLU()
self.dropout = nn.Dropout(dropout)
def forward(self, x):
for l in self.fcs:
x = self.dropout(x)
x = self.relu(l(x))
return x.squeeze()
i am a beginner of pytorch, and i want to build a fully connect model using Pytorch;
the model is very simple like:
def forward(self, x):
x = self.relu(self.fc1(x))
x = self.relu(self.fc2(x))
return self.fc3(x)
but when i want to add some layers or adjust the hidden layers, i found i have to write lots of Redundant code like:
def forward(self, x):
x = self.relu(self.fc1(x))
x = self.relu(self.fc2(x))
x = self.relu(self.fc3(x))
x = self.relu(self.fc4(x))
x = self.relu(self.fc5(x))
...
return self.fcn(x)
besides, if i want to change some layer's feature nums, i have to change the layer adjacent
so i want to know a way which is more grace(maybe more pythonic and more easy to adjust hyper parameter).
i tried to write code like:
def __init__(self):
super().__init__()
self.hidden_num = [2881, 5500, 2048, 20] # i just want to change here! to try some new structure
self.fc = [nn.Linear(self.hidden_num[i], self.hidden_num[i + 1]).to(DEVICE) for i in range(len(self.hidden_num) - 1)]
self.relu = nn.ReLU()
def forward(self, x):
for i in range(len(self.fc)):
x = self.fc[i](x)
if i != (len(self.fc) - 1):
x = self.relu(x)
return x
but i found this way doesn't work, the model can't be built
so could any bro tell me, how to define a fullyconnect model like above??
(so i can adjust the model layers only by adjust the list named hidden_num )
If you want to keep the same approach then you can use nn.ModuleList to properly register all linear layers inside the module's __init__:
class Model(nn.Module):
def __init__(self, hidden_num=[2881, 5500, 2048, 20]):
super().__init__()
self.fc = nn.ModuleList([
nn.Linear(hidden_num[i], hidden_num[i+1])
for i in range(len(hidden_num) - 1)])
def forward(self, x):
for i, m in enumerate(self.fc.children()):
x = m(x)
print(i)
if i != len(self.fc) - 1:
x = torch.relu(x)
return x
However, you may want to handle the logic inside the __init__ function once. One alternative is to use nn.Sequential.
class Model(nn.Module):
def __init__(self, hidden_num=[2881, 5500, 2048, 20]):
super().__init__()
fc = []
for i in range(len(hidden_num) - 1):
fc.append(nn.Linear(hidden_num[i], hidden_num[i+1]))
if i != len(self.fc) - 1:
fc.append(nn.ReLU())
self.fc = nn.Sequential(fc)
def forward(self, x):
x = self.fc(x)
return x
Ideally, you would inherit from nn.Sequential directly to avoid re-writing the forward function which is unnecessary in this case:
class Model(nn.Sequential):
def __init__(self, hidden_num=[2881, 5500, 2048, 20]):
fc = []
for i in range(len(hidden_num) - 1):
fc.append(nn.Linear(hidden_num[i], hidden_num[i+1]))
if i != len(self.fc) - 1:
fc.append(nn.ReLU())
super().__init__(fc)
I am creating a series of custom Tensorflow (version 2.4.1) layers and am running into a problem where the model summary shows zero trainable parameters. Below is a series of examples showing how everything is fine until I add in the last custom layer.
Here are the imports and custom classes:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (BatchNormalization, Conv2D, Input, ReLU,
Layer)
class basic_conv_stack(Layer):
def __init__(self, filters, kernel_size, strides):
super(basic_conv_stack, self).__init__()
self.conv1 = Conv2D(filters, kernel_size, strides, padding='same')
self.bn1 = BatchNormalization()
self.relu = ReLU()
def call(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
return x
class basic_residual(Layer):
def __init__(self, filters, kernel_size, strides):
super(basic_residual, self).__init__()
self.bcs1 = basic_conv_stack(filters, kernel_size, strides)
self.bcs2 = basic_conv_stack(filters, kernel_size, strides)
def call(self, x):
x = self.bcs1(x)
x = self.bcs2(x)
return x
class basic_module(Layer):
def __init__(self, filters, kernel_size, strides):
super(basic_module, self).__init__()
self.res = basic_residual
self.args = (filters, kernel_size, strides)
def call(self, x):
for _ in range(4):
x = self.res(*self.args)(x)
return x
Now, if I do the following, everything works out ok and I get 300 trainable parameters:
input_layer = Input((128, 128, 3))
conv = basic_conv_stack(10, 3, 1)(input_layer)
model = Model(input_layer, conv)
print (model.summary())
Similarly, if I do the following, I get 1,230 trainable parameters:
input_layer = Input((128, 128, 3))
conv = basic_residual(10, 3, 1)(input_layer)
model = Model(input_layer, conv)
print (model.summary())
However, if I try the basic_module class, I get zero trainable parameters:
input_layer = Input((128, 128, 3))
conv = basic_module(10, 3, 1)(input_layer)
model = Model(input_layer, conv)
print (model.summary())
Does anyone know why this is happening?
Edit to add:
I discovered that the layers used in the call must be initialized in the class's init for things to work properly. So if I change the basic module to this:
class basic_module(Layer):
def __init__(self, filters, kernel_size, strides):
super(basic_module, self).__init__()
self.clayers = [basic_residual(filters, kernel_size, strides) for _ in range(4)]
def call(self, x):
for idx in range(4):
x = self.clayers[idx](x)
return x
Everything works fine. I don't know why this is the case, so I'll leave this question open in case someone can answer the why of this question.
You have to initialize the class instances with the required parameter such as filters, kernel_size, strides to the predefined base_mdoule. Also, note that these hyper-parameters are related to trainable weights properties.
# >>> a = basic_module
# >>> a __main__.basic_module
# >>> a = basic_module(10, 3, 1)
# >>> a
# >>> <__main__.basic_module at 0x7f6123eed510>
class basic_module(Layer):
def __init__(self, filters, kernel_size, strides):
super(basic_module, self).__init__()
self.res = basic_residual # < ---
self.args = (filters, kernel_size, strides)
def call(self, x):
for _ in range(4):
x = self.res(*self.args)(x)
return x
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
self.encoder = nn.Linear(300, 100)
self.dense1 = nn.Sequential(nn.Linear(100, 10),nn.ReLU())
self.dense2 = nn.Sequential(nn.Linear(10, 5),nn.ReLU())
self.dense3 = nn.Sequential(nn.Linear(5, 1))
def forward(self, x):
x = self.encoder(x)
x = self.dense1(x)
x = self.dense2(x)
x = self.dense3(x)
return x
I am working on a regression problem, and I need to use the output of the dense2 layer to calculate the loss.
output of dense2 layer is 5 dimensional (5x1).
I am using PyTorch.
Dataset: Suppose i am using 300 features and i need to predict some score(a floating value).
Input: 300 Features
Output: Some Floating Value
In general, your nn.Module can return as many elements as you like. Moreover, you don't have to use them anywhere - there is no mechanism that checks that. Pytorch philosophy is to compute computational graph on-the-run.
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
self.encoder = nn.Linear(300, 100)
self.dense1 = nn.Sequential(nn.Linear(100, 10),nn.ReLU())
self.dense2 = nn.Sequential(nn.Linear(10, 5),nn.ReLU())
self.dense3 = nn.Sequential(nn.Linear(5, 1))
def forward(self, x):
enc_output = self.encoder(x)
dense1_output = self.dense1(enc_output)
dense2_output = self.dense2(dense1_output)
dense3_output = self.dense3(dense2_output)
return dense3_output, dense2_output