I’m going to compare the difference between with and without regularization, so I want to custom two loss functions.
My loss function with L2 norm:
###NET
class CNN(nn.Module):
def __init__(self):
super(CNN,self).__init__()
self.layer1 = nn.Sequential(
nn.Conv2d(3, 16, kernel_size = 5, padding=2),
nn.ReLU(),
nn.MaxPool2d(2))
self.layer2 = nn.Sequential(
nn.Conv2d(16, 32, kernel_size = 5, padding=2),
nn.ReLU(),
nn.MaxPool2d(2))
self.layer3 = nn.Sequential(
nn.Conv2d(32, 32, kernel_size = 5, padding=2),
nn.ReLU(),
nn.MaxPool2d(4))
self.fc = nn.Linear(32*32*32,11)
def forward(self, x):
out = self.layer1(x)
out = self.layer2(out)
out = self.layer3(out)
out = out.view(out.size(0), -1)
out = self.fc(out)
return out
net = CNN()
###OPTIMIZER
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr = LR, momentum = MOMENTUM)
1.How can I add a L2 norm in my loss function?
2.If I want to write the loss function by myself (without using optim.SGD) and do the grad-decent by autograd, how can I do?
Thanks for your help!
You can explicitly compute the norm of the weights yourself, and add it to the loss.
reg = 0
for param in CNN.parameters():
reg += 0.5 * (param ** 2).sum() # you can replace it with abs().sum() to get L1 regularization
loss = criterion(CNN(x), y) + reg_lambda * reg # make the regularization part of the loss
loss.backward() # continue as usuall
See this thread for more info.
Related
I am trying to train a neural network. I have two classes. Precision and Recall for one of the classes equals 0 all the time.
Here's the code for the neural network.
class ConvNet(nn.Module):
def __init__(self):
super(ConvNet,self).__init__()
self.conv1 = nn.Sequential(
nn.Conv1d(
in_channels=1,
out_channels=200,
kernel_size=4,
stride=3,
padding = 0)
,nn.ReLU()
)
self.maxpool = nn.MaxPool1d(2)
random_input = torch.rand(1, 1, 1500 , requires_grad=False)
random_input = self.conv1(random_input)
random_input = self.maxpool(random_input)
maxpool_out = random_input.view(1, -1).shape[1]
self.fc1 = nn.Sequential(
nn.Linear(
in_features= maxpool_out,
out_features=200
),
nn.Dropout(p=0.05),
nn.ReLU()
)
self.fc2 = nn.Sequential(
nn.Linear(
in_features=200,
out_features=100
),
nn.Dropout(p=0.05),
nn.ReLU()
)
self.fc3 = nn.Sequential(
nn.Linear(
in_features=100,
out_features=50
),
nn.Dropout(p=0.05),
nn.ReLU()
)
self.lastlayer = nn.Linear(
in_features=50,
out_features=1
)
def forward(self,x):
#adding 1 dimention
x = x.unsqueeze(1)
#conv layers
x = self.conv1(x)
x = self.maxpool(x)
#flatten
x = x.reshape(x.shape[0], -1)
#3fc
x = self.fc1(x)
x = self.fc2(x)
x = self.fc3(x)
#output
x = self.lastlayer(x)
return x
Here's the training loop:
def binary_acc(y_pred, y_test):
y_pred_tag = torch.round(torch.sigmoid(y_pred))
correct_results_sum = (y_pred_tag == y_test).sum().float()
acc = correct_results_sum/y_test.shape[0]
acc = torch.round(acc * 100)
return acc
def Training(model, train_loader, criterion, optimizer, epochs):
train_losses = []
Acc =[]
for epoch in range(epochs):
epoch_accuracy = 0
train_loss = 0
total_pcaps = 0
model.train()
for elem in train_loader:
pcap_byte = elem['feature'].to(device)
labels = elem['label'].to(device)
optimizer.zero_grad()
outputs = model(pcap_byte)
loss = criterion(outputs, labels.unsqueeze(1).float())
loss.backward()
# Updating parameters
optimizer.step()
total_pcaps += labels.size(0)
acc = binary_acc(outputs, labels.unsqueeze(1).float()
train_loss += loss.item()
epoch_accuracy += acc.item()
Acc.append(epoch_accuracy)
average_loss = train_loss / len(train_loader)
train_losses.append(train_loss)
print('epoch %d, train_loss: %.3f' % (epoch + 1, average_loss))
After training The precision and recall of one of the classes equals 0 and the other one is pr = 1 and recall = 0.9.
Is there something wrong with the data? I self-collected the data and I can't understand whether the problem is with the data or something is wrong in my code.
The problem was with the data. I was using undersampling in the pre-processing step. I deleted that part and the model performed well.
I'm learning pytorch and practicing it on Dogs vs Cats competition on Kaggle using the kaggle gpu. I built a straightforward nn.
Here is my dataset:
class customDataset:
def __init__(self, paths, labels, transforms = None):
self.paths = paths
self.labels = torch.tensor(labels, dtype = torch.long)
self.transforms = transforms
def __len__(self):
return len(self.paths)
def __getitem__(self, idx):
image = torchvision.io.read_image(self.paths[idx])
label = self.labels[idx]
if self.transforms:
image = self.transforms(image)
return (
image,
label
)
labels = pd.Series(labels)
labels = labels.map({'dog': 1, 'cat': 0}).to_numpy(int)
trans = transforms.Compose([
transforms.ToPILImage(),
transforms.Resize([224, 224]),
transforms.ToTensor(),
])
cds = customDataset(pList, labels, trans)
train_ds, test_ds = torch.utils.data.random_split(cds, (20000, 5000))
dataloader = {
'train': DataLoader(train_ds, batch_size = 1, num_workers= 2, shuffle = True),
'val': DataLoader(test_ds, batch_size = 1, num_workers = 2, shuffle = True)
}
here is my nn, and fit function:
class Neural(nn.Module):
def __init__(self):
super().__init__()
self.seq = nn.Sequential(
## image size = 224x224
nn.Conv2d(3, 16, kernel_size = 3, padding = 'same'),
nn.ReLU(),
nn.MaxPool2d(2),
nn.Conv2d(16, 32, kernel_size = 3, padding = 'same'),
nn.ReLU(),
nn.MaxPool2d(2),
nn.Conv2d(32,32, kernel_size = 3, padding = 'same'),
nn.ReLU(),
nn.MaxPool2d(2),
nn.Conv2d(32,32, kernel_size = 3, padding = 'same'),
nn.ReLU(),
nn.MaxPool2d(2),
nn.Conv2d(32,32, kernel_size = 3, padding = 'same'),
nn.ReLU(),
nn.MaxPool2d(2),
nn.Flatten(),
nn.Linear(7*7*32, 112),
nn.Linear(112, 2),
)
def forward(self, x):
out = self.seq(x)
return out
def fit(model, loss_func, dataloader, optimizer, epochs = 10):
for epoch in range(epochs):
running_loss = 0
running_acc = 0
print(f"epoch {epoch}/{epochs}", '-'*20)
for batch, (img, label) in enumerate(dataloader['train']):
img, label = img.to(device), label.to(device)
pred = model(img)
loss = loss_func(pred, label)
optimizer.zero_grad()
loss.backward()
optimizer.step()
running_acc += (pred.argmax() == label).sum()
running_loss += loss.item()
print(f"loss at {epoch}: {running_loss/20000}, Acc:{running_acc/20000}")
acc=0
loss=0
for (img, label) in dataloader['val']:
img, label = img.to(device), label.to(device)
pred = model(img)
loss += loss_func(pred, label)
acc += (pred.argmax() == label).sum()
print(f"Val: loss - {loss/5000}, acc - {acc/5000}")
loss_func = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters())
fit(model, loss_func, dataloader, optimizer)
After just one epoch I'm greeted with the error:
CUDA out of memory. Tried to allocate 20.00 MiB (GPU 0; 15.90 GiB total capacity; 14.93 GiB already allocated; 29.75 MiB free; 14.96 GiB reserved in total by PyTorch)
I decreased my batch size to 2, and used torch.cuda.empty_cache() but the issue still presists on paper this should not happen, I'm really confused.
Any help is appreciated. Thanks
This isn't a bug. The only way to decrease your memory usage is to either 1: decrease your batch size, 2: decrease your input size (WxH), 3: decrease your model size. I think you should look at the first two options as your 16GB card should be able to handle this network if you reduce your image size.
Even if you look at the size of your linear layer:
size_gb = 53760*224*32*32/1e9 #layer 1, 2, float32, batch, /GB
This gives us 12.3GB, not leaving much room for your other layers.
I am new to pytorch. I am trying to use chinese mnist dataset to train the neural network that shows in below code. Is that a problem of the neural network input or something else goes wrong in my code. I have tried many ways to fix it but instead it shows me other errors
train_df = chin_mnist_df.groupby('value').apply(lambda x: x.sample(700, random_state=SEED)).reset_index(drop=True)
x_train, y_train = train_df.iloc[:, :-2], train_df.iloc[:, -2]
valid_df = chin_mnist_df.groupby('value').apply(lambda x: x.sample(200, random_state=SEED)).reset_index(drop=True)
x_valid, y_valid = valid_df.iloc[:, :-2], valid_df.iloc[:, -2]
test_df = chin_mnist_df.groupby('value').apply(lambda x: x.sample(100, random_state=SEED)).reset_index(drop=True)
x_test, y_test = test_df.iloc[:, :-2], test_df.iloc[:, -2]
train_ds = Dataset(x_train, y_train)
train_dataloader = torch.utils.data.DataLoader(train_ds, batch_size=16, shuffle=True)
valid_ds = Dataset(x_valid, y_valid)
valid_dataloader = torch.utils.data.DataLoader(valid_ds, batch_size=16, shuffle=True)
test_ds = Dataset(x_test, y_test)
test_dataloader = torch.utils.data.DataLoader(test_ds, batch_size=16, shuffle=True)
# Convolutional neural network (two convolutional layers)
class ConvNet(nn.Module):
def __init__(self):
super(ConvNet, self).__init__()
self.layer1 = nn.Sequential(
nn.Conv2d(1, 32, kernel_size=5, stride=1, padding=2),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2))
self.layer2 = nn.Sequential(
nn.Conv2d(32, 64, kernel_size=5, stride=1, padding=2),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2))
self.drop_out = nn.Dropout()
self.fc1 = nn.Linear(7 * 7 * 64, 1000)
self.fc2 = nn.Linear(1000, 15)
def forward(self, x):
out = self.layer1(x)
out = self.layer2(out)
out = out.reshape(out.size(0), -1)
out = self.drop_out(out)
out = self.fc1(out)
out = self.fc2(out)
return out
model = ConvNet()
klisi=[]
apoklisi=[]
apoklisi2=[]
klisi2=[]
olatalr=[]
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
total_step = len(train_dataloader)
loss_list = []
acc_list = []
for epoch in range(num_epochs):
for i,data in enumerate(train_dataloader):#(images, labels)
batch_inputs, batch_labels = data[0][:].to(device).type(torch.float), data[1][:].to(device)
# Run the forward pass
outputs = model(batch_inputs)
loss = criterion(outputs, batch_labels)
Your training images are greyscale images. That is, they only have one channel (as opposed to the three RGB color channels in color images).
It seems like your Dataset (implicitly) "squeezes" this singleton dimension, and instead of having a batch of shape BxCxHxW = 16x1x64x64, you end up with a batch of shape 16x64x64.
Try:
# ...
batch_inputs, batch_labels = data[0][:].to(device).type(torch.float), data[1][:].to(device)
batch_inputs = batch_inputs[:, None, ...] # explicitly add the singleton channel dimension
# Run the forward pass
# ...
We are in the process of training an AE on CIFAR10 images. We used the following architecture:
class OurAE(nn.Module):
def __init__(self, in_channels, z_channels):
super(OurAE, self).__init__()
self.tot_diff = None
self.in_channels = in_channels
curr_channels = in_channels
encoder = []
channels = [3, 16, 64] + [z_channels]
for out_channels in channels:
encoder += [
nn.Conv2d(in_channels=curr_channels, out_channels=out_channels, kernel_size=3, padding=1, stride=2),
nn.ReLU()
]
curr_channels = out_channels
self.encoder = nn.Sequential(*encoder)
curr_channels = z_channels
decoder = []
channels = [64, 16, 3] + [in_channels]
for out_channels in channels:
decoder += [
nn.ConvTranspose2d(in_channels=curr_channels, out_channels=out_channels, kernel_size=4, padding=1, stride=2),
nn.ReLU()
]
curr_channels = out_channels
decoder = decoder[:-1] # removing the RELU layer
decoder.append(nn.Sigmoid())
self.decoder = nn.Sequential(*decoder)
def forward(self, x):
return self.decoder(self.encoder(x))
We are not sure why but we always get black and white images.
We tried to replace the Sigmoid with ReLU in the last layer but to no avail.
These are the loss function and the optimizers that we used:
optimizer = torch.optim.Adam(classifier.parameters(), lr=lr)
criterion = torch.nn.CrossEntropyLoss()
Here is an example of an input and output of the AE after training:
I had the same problem. I changed the loss function to 'mae'. That cleared the issue for me.
I am trying to train my custom model on Cifar 10 dataset.
My model's code is below: -
class cifar10Model(keras.Model):
def __init__(self):
super(cifar10Model, self).__init__()
self.conv1 = keras.layers.Conv2D(32, 3, activation='relu', input_shape=(32, 32, 3))
self.pool1 = keras.layers.MaxPool2D((3, 3))
self.batch_norm1 = keras.layers.BatchNormalization()
self.dropout1 = keras.layers.Dropout(0.1)
self.conv2 = keras.layers.Conv2D(64, 3, activation='relu')
self.pool2 = keras.layers.MaxPool2D((3, 3))
self.batch_norm2 = keras.layers.BatchNormalization()
self.dropout2 = keras.layers.Dropout(0.2)
self.conv3 = keras.layers.Conv2D(128, 3, activation='relu')
self.pool3 = keras.layers.MaxPool2D((3, 3))
self.batch_norm3 = keras.layers.BatchNormalization()
self.dropout3 = keras.layers.Dropout(0.3)
self.flatten = keras.layers.Flatten()
self.dense1 = keras.layers.Dense(128, activation='relu')
self.dense2 = keras.layers.Dense(10)
def call(self, x):
x = self.conv1(x)
x = self.pool1(x)
x = self.batch_norm1(X)
x = self.dropout1(x)
x = self.conv2(x)
x = self.pool2(x)
x = self.batch_norm2(X)
x = self.dropout2(x)
x = self.conv3(x)
x = self.pool3(x)
x = self.batch_norm3(x)
x = self.dropout3(x)
x = self.flatten(x)
x = self.dense1(x)
return self.dense2(x)
model = cifar10Model()
When i run this code this gives me no error.
Then i defined my training loop
loss_object = keras.losses.SparseCategoricalCrossentropy(from_logits=True)
optimizer = keras.optimizers.Adam()
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')
test_loss = tf.keras.metrics.Mean(name='test_loss')
test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')
#tf.function
def train_step(images, labels):
with tf.GradientTape() as tape:
predictions = model(images, training=True)
loss = loss_object(labels, predictions)
grad = tape.gradient(loss, model.trainable_variables)
optimizer.apply_gradients(zip(grad, model.trainable_variables))
train_loss(loss)
train_accuracy(labels, predictions)
#tf.function
def test_step(images, labels):
predictions = model(images)
t_loss = loss_object(labels, predictions)
test_loss(t_loss)
test_accuracy(labels, predictions)
epochs = 10
for epoch in range(epochs):
train_loss.reset_states()
train_accuracy.reset_states()
test_loss.reset_states()
test_accuracy.reset_states()
for images, labels in train_dataset:
train_step(images, labels)
for images, labels in test_dataset:
test_step(images, labels)
template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'
print(template.format(epoch + 1,
train_loss.result(),
train_accuracy.result() * 100,
test_loss.result(),
test_accuracy.result() * 100))
When i run this code, i get the following error
NotImplementedError: When subclassing the `Model` class, you should implement a `call` method.
I am currently running my code on google colab.
My colab link is https://colab.research.google.com/drive/1sOlbRpPRdyOCJI0zRFfIA-Trj1vrIbWY?usp=sharing
My tensorflow version on colab is 2.2.0
Also, when i tried to predict labels from untrained model by this code :-
print(model(train_images))
This also gives me the same error.
The error is saying that i have not implemented the call method on model.
but, i have defined the call method.
I also tried by changing the call method to __call__ method.
But still, it gives me the same error.
Thanks in advance :-
The problem is with indentation. You've defined call method inside __init__. Try defining it outside the __init__ method as follows:
class cifar10Model(keras.Model):
def __init__(self):
super(cifar10Model, self).__init__()
self.conv1 = keras.layers.Conv3D(32, 3, activation='relu', input_shape=(32, 32, 3))
self.pool1 = keras.layers.MaxPool3D((3, 3, 3))
self.batch_norm1 = keras.layers.BatchNormalization()
self.dropout1 = keras.layers.Dropout(0.1)
self.conv2 = keras.layers.Conv3D(64, 3, activation='relu')
self.pool2 = keras.layers.MaxPool3D((3, 3, 3))
self.batch_norm2 = keras.layers.BatchNormalization()
self.dropout2 = keras.layers.Dropout(0.2)
self.conv3 = keras.layers.Conv3D(128, 3, activation='relu')
self.pool3 = keras.layers.MaxPool3D((3, 3, 3))
self.batch_norm3 = keras.layers.BatchNormalization()
self.dropout3 = keras.layers.Dropout(0.3)
self.flatten = keras.layers.Flatten()
self.dense1 = keras.layers.Dense(128, activation='relu')
self.dense2 = keras.layers.Dense(10)
def call(self, x):
x = self.conv1(x)
x = self.pool1(x)
x = self.batch_norm1(X)
x = self.dropout1(x)
x = self.conv2(x)
x = self.pool2(x)
x = self.batch_norm2(X)
x = self.dropout2(x)
x = self.conv3(x)
x = self.pool3(x)
x = self.batch_norm3(X)
x = self.dropout3(x)
x = self.flatten(x)
x = self.dense1(x)
return self.dense2(x)
model = cifar10Model()
Hope this helps.