How to plot Receptive Fields, for a CNN/fashionMNIST? - python

I created my CNN with PyTorch Lightning, and I am actually looking for plotting the Receptive Fields.
Do you have any suggestions about it?
I look for different solutions here and there, but I actually can't make them synergize with PyTorch Lightning.
Is it possible to visualize the Receptive fields directly inside Tensorboard?
I'll share with you my Dataset:
train_dataset = torchvision.datasets.FashionMNIST('classifier_data', train=True, download=True, transform=transforms.ToTensor())
train, val = train_test_split(train_dataset, test_size = .2)
train_loader = DataLoader(train, batch_size = 32)
val_loader = DataLoader(train, batch_size = 32)
test_dataset = torchvision.datasets.FashionMNIST('classifier_data', train=False, download=True, transform=transforms.ToTensor())
test_loader = DataLoader(test_dataset, batch_size = 32)
and CNN:
def __init__(self, dropout, learn_rate, momentum, weight_decay, optimizer):
#def __init__(self, dropout, learn_rate, weight_decay, optimizer):
super().__init__()
self.conv1 = nn.Conv2d(in_channels = 1, out_channels = 6, kernel_size = 5)
self.conv2 = nn.Conv2d(in_channels = 6, out_channels = 12 , kernel_size = 5)
self.fc1 = nn.Linear(in_features = 12*4*4, out_features = 120)
self.fc2 = nn.Linear(in_features = 120, out_features = 60)
self.out = nn.Linear(in_features = 60, out_features = 10)
self.do = nn.Dropout(dropout) #for overfitting issues
self.loss = nn.CrossEntropyLoss()
self.accuracy = torchmetrics.Accuracy()
self.learn_rate = learn_rate
self.momentum = momentum #with Adam we don't have momentum. To Check best Optimizer with Optune, please comment this line.
self.weight_decay = weight_decay
self.optimizer = optimizer
self.train_loss = []
self.val_loss = []
self.train_acc = []
self.test_acc = []
#plot into tensorboard
log_dir = pathlib.Path.cwd() / "lightning_logs"
self.writer = SummaryWriter(log_dir)
#forward step
#I add each layer to the histogram. It's plotted into tensorboard
def forward(self, x, additional_out=False):
#conv1
x = self.conv1(x)
self.writer.add_histogram("First convolutional layer CNN", x)
x = F.relu(x)
x = F.max_pool2d(x, kernel_size = 2, stride = 2)
#conv2
x = self.conv2(x)
self.writer.add_histogram("Second convolutional layer CNN", x)
x = F.relu(x)
x = F.max_pool2d(x, kernel_size = 2, stride = 2)
#fuly connected 1
x = x.reshape(-1, 12*4*4)
x = self.fc1(x)
self.writer.add_histogram("First linear layer CNN", x)
x = F.relu(x)
x = self.do(x)
#fully connected 2
x=self.fc2(x)
self.writer.add_histogram("Second linear layer CNN", x)
x = F.relu(x)
x = self.do(x)
#output
x = self.out(x)
self.writer.add_histogram("Output layer CNN", x)
return x
#optimizer
def configure_optimizers(self):
#optimizer = self.optimizer(self.parameters(), lr = self.learn_rate, momentum = self.momentum, weight_decay = self.weight_decay)
optimizer = self.optimizer(self.parameters(), lr = self.learn_rate, weight_decay = self.weight_decay)
return optimizer
#training step
def training_step(self, batch, batch_idx):
x, y = batch
b = x.size(0)
x = x.view(b, -1, 28, 28)
logit = self(x)
J = self.loss(logit, y) #loss
#self.train_loss.append(J) #no need to append
acc = self.accuracy(logit, y) #accuracy
#self.train_acc.append(acc) #no need to append
self.log("train_loss_cnn", J.item())
self.log("train_acc_cnn", acc.item())
return {'loss': J}
#Since I used Tensorboard, it don't have to append to loss
def test_step(self, batch, batch_idx):
p, q = batch
b = p.size(0)
p = p.view(b, -1, 28, 28)
logit = self(p)
J = self.loss(logit, q) #loss
acc_test = self.accuracy(logit, q) #accuracy
#self.train_acc.append(acc_test) #no need to append
#self.train_loss.append(J) #no need to append
self.log("test_acc_cnn", acc_test.item())
self.log("test_loss_cnn", J.item())
def validation_step(self, batch, batch_idx=None):
u, v = batch
b = u.size(0)
u = u.view(b, -1, 28, 28)
logit = self(u)
J = self.loss(logit, v) #loss
#self.val_loss.append(J) #no need to append
acc_val = self.accuracy(logit, v) #accuracy
#self.train_acc.append(acc_val) #no need to append
self.log("val_loss_cnn", J.item())
self.log("val_acc_cnn", acc_val.item())
return {"loss": J, "pred": logit, "target": v}
#Once saves from validation step, I take with me the returned elements, and I can plot the Confusion Matrix inside Tensorboard
def validation_epoch_end(self, outputs):
preds = torch.cat([tmp['pred'] for tmp in outputs])
targets = torch.cat([tmp['target'] for tmp in outputs])
conf_mat = confusion_matrix(preds, targets, num_classes=10)
df_cm = pd.DataFrame(conf_mat.numpy(), index = range(10), columns=range(10))
plt.figure(figsize = (10,7))
fig_ = sns.heatmap(df_cm, annot=True, cmap='Spectral').get_figure()
plt.close(fig_)
self.logger.experiment.add_figure("Confusion matrix CNN", fig_, self.current_epoch)

Related

Should i include model.train() with Dropout in PytorchLightning

I was reading guide in which an author used model.train() in each epoch because of the DropOut layer (he didn't use Pytorch Lightning). The question is - should i include model.train() in my Pytorch Lightning Module and if so, how do i do it ? (Or it does it automatically ?)
The code is below
class MulticlassClassificationLIGHT(pl.LightningModule):
def __init__(self,class_weights):
super(MulticlassClassificationLIGHT, self).__init__()
self.num_feature=35
self.num_class=36
self.layer_1 = nn.Linear(self.num_feature, 512)
self.layer_2 = nn.Linear(512, 128)
self.layer_3 = nn.Linear(128, 64)
self.layer_out = nn.Linear(64, self.num_class)
self.relu = nn.ReLU()
self.dropout = nn.Dropout(p=0.2)
self.batchnorm1 = nn.BatchNorm1d(512)
self.batchnorm2 = nn.BatchNorm1d(128)
self.batchnorm3 = nn.BatchNorm1d(64)
self.loss = nn.CrossEntropyLoss(weight=class_weights.to(device))
def forward(self, x):
x = self.layer_1(x)
x = self.batchnorm1(x)
x = self.relu(x)
x = self.layer_2(x)
x = self.batchnorm2(x)
x = self.relu(x)
x = self.dropout(x)
x = self.layer_3(x)
x = self.batchnorm3(x)
x = self.relu(x)
x = self.dropout(x)
x = self.layer_out(x)
return x
def training_step(self, batch, batch_idx):
x, y = batch
logits = self.forward(x)
loss = self.loss(logits, y)
self.log("train_loss", loss, prog_bar=True, logger=True)
return loss
And here is an example from guide
for e in tqdm(range(1, EPOCHS+1)):
train_epoch_loss = 0
train_epoch_acc = 0
model.train()
for X_train_batch, y_train_batch in train_loader:
X_train_batch, y_train_batch = X_train_batch.to(device), y_train_batch.to(device)
optimizer.zero_grad()
I use Trainer for training. Here is it
trainer = pl.Trainer(
devices="auto",
accelerator="auto",
auto_lr_find=False,
auto_scale_batch_size=True,
fast_dev_run=False,
num_sanity_val_steps=3,
logger=logger,
min_epochs=EPOCHS,
)
And how I use it
trainer.fit(model, data_module_classifier.train_dataloader(),data_module_classifier.val_dataloader() )
There is just no information in the documentation and solutions are only in usual Pytorch, but not in Pytorch Lightning

How to get activation values of a layer in pytorch

I have a pytorch-lightning model that has a dense layer like so:
def __init__(...)
...
self.dense = nn.Linear(channels[-1], 64, bias=True)
...
for my project, I need to get the activation values of this layer as a list
I have tried this code which I found on the pytorch discussion forum:
activation = {}
def get_activation(name):
def hook(model, input, output):
activation[name] = output.detach()
return hook
test_img = cv.imread(f'digimage/100.jpg')
test_img = cv.resize(test_img, (128, 128))
test_img = np.moveaxis(test_img, 2, 0)
modelftr = load_feature_model(**model_dict)
num_ftrs = modelftr.fc.in_features
modelftr.fc = torch.nn.Linear(num_ftrs, 228)
modelftr.load_state_dict(torch.load('...'))
modelftr.dense.register_forward_hook(get_activation('dense'))
with torch.no_grad():
modelatt.to('cpu')
modelatt.eval()
test_img = torch.tensor(test_img).view(-1, 3, 128, 128).float()
output = modelcat(test_img)
print(activation['dense'])
But this gives a keyerror:
8 test_img = torch.tensor(test_img).view(-1, 3, 128, 128).float()
9 output = modelcat(test_img)
---> 10 print(activation['dense'])
KeyError: 'dense'
Update:
This is my full model code.
As you can see there is a linear layer named dense
class FAtNet(pl.LightningModule):
def __init__(self, image_size, in_channels, num_blocks, channels,
num_classes=20, block_types=['C', 'C', 'T', 'T'], lr=0.0001, loss_function=nn.CrossEntropyLoss()):
super().__init__()
self.lr = lr
self.loss_function = loss_function
ih, iw = image_size
block = {'C': MBConv, 'T': Transformer}
self.s0 = self._make_layer(
conv_3x3_bn, in_channels, channels[0], num_blocks[0], (ih // 2, iw // 2))
self.s1 = self._make_layer(
block[block_types[0]], channels[0], channels[1], num_blocks[1], (ih // 4, iw // 4))
self.s2 = self._make_layer(
block[block_types[1]], channels[1], channels[2], num_blocks[2], (ih // 8, iw // 8))
self.s3 = self._make_layer(
block[block_types[2]], channels[2], channels[3], num_blocks[3], (ih // 16, iw // 16))
self.s4 = self._make_layer(
block[block_types[3]], channels[3], channels[4], num_blocks[4], (ih // 32, iw // 32))
self.pool = nn.AvgPool2d(ih // 32, 1)
self.dense = nn.Linear(channels[-1], 64, bias=True)
self.fc = nn.Linear(64, num_classes, bias=False)
def forward(self, x):
x = self.s0(x)
x = self.s1(x)
x = self.s2(x)
x = self.s3(x)
x = self.s4(x)
x = self.pool(x).view(-1, x.shape[1])
x = self.dense(x)
x = self.fc(x)
return x
def _make_layer(self, block, inp, oup, depth, image_size):
layers = nn.ModuleList([])
for i in range(depth):
if i == 0:
layers.append(block(inp, oup, image_size, downsample=True))
else:
layers.append(block(oup, oup, image_size))
return nn.Sequential(*layers)
def configure_optimizers(self):
return optim.Adam(self.parameters(), lr=self.lr)
def training_step(self, batch, batch_idx):
X, y = batch
y_hat = self(X)
loss = self.loss_function(y_hat, y)
self.log('train_loss', loss)
return loss
def test_step(self, batch, batch_idx):
X, y = batch
y_hat = self(X)
loss = self.loss_function(y_hat, y)
self.log('test_loss', loss)
return loss
### custom prediction function ###
def predict(self, dm):
X_test = dm.X_test
self.eval()
X_test = torch.tensor(X_test).float()
self.to(device='cuda')
pred = []
with torch.no_grad():
for data in X_test:
output = self(data)
pred.append(output)
pred = pred[0].detach()
pred = pred.cpu()
self.to(device='cpu')
self.train()
return pred
def count_parameters(model):
return sum(p.numel() for p in model.parameters() if p.requires_grad)
It seems like you model does not have 'dense' layer, only 'fc'.
Try:
modelftr.fc.register_forward_hook(get_activation('fc'))

Pytorch network not training

I am trying to train an activity recognition system using PyTorch, but the network is not training and loss is not dropping, even though I have a similar model working perfectly on keras. I have provided code for the training loop, model class, and dataset class here. Can you help me why the loss is not dropping (accuracy is not increasing)
main training loop
# create dataset
dataset = IMU_dataset()
train_loader = DataLoader(dataset=dataset,
batch_size=40,
shuffle=True,
num_workers=2)
num_epochs = 100
total_samples = len(dataset)
n_iterations = math.ceil(total_samples/4)
print(total_samples, n_iterations)
input_shape = 3
output_index = 6
device = torch.device('cpu')
model = HARmodel(input_shape, output_index).to(device)
model.float()
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.1)
for epoch in range(num_epochs):
for i, (inputs, labels) in enumerate(train_loader):
# origin shape: [40, 3, 400]
labels = labels.to(device)
# Forward pass
outputs = model(inputs.to(device).float())
loss = criterion(outputs, labels.long())
# Backward and optimize
optimizer.zero_grad()
loss.backward()
optimizer.step()
# if (i+1) % 5 == 0:
# print(f'loss: {loss.item()}')
print(model.calculate_accuracy(dataset.x_data, dataset.y_data), model.calculate_loss(dataset.x_data, dataset.y_data, criterion))
Here is the model class.
class HARmodel(nn.Module):
"""Model for human-activity-recognition."""
def __init__(self, input_size, num_classes):
super().__init__()
# Extract features, 1D conv layers
self.layer_1 = nn.Conv1d(input_size, 100, 10, stride=1)
self.activation_relu = nn.ReLU()
self.layer_2 = nn.Conv1d(100, 100, 10, stride=1)
self.layer_3 = nn.Conv1d(100, 100, 10, stride=1)
self.layer_4 = nn.MaxPool1d(2, stride=3)
self.layer_5 = nn.Dropout(p=0.2)
self.layer_6 = nn.Conv1d(100, 160, 10, stride=1)
self.layer_7 = nn.Conv1d(160, 160, 10, stride=1)
self.layer_8 = nn.Conv1d(160, 160, 10, stride=1)
# self.layer_9 = nn.AvgPool1d(97)
self.layer_10 = nn.Dropout(p=0.5)
self.layer_11 = nn.Linear(160, 6)
self.activation_softmax = nn.Softmax()
def forward(self, x):
x = self.layer_1(x)
x = self.activation_relu(x)
x = self.layer_2(x)
x = self.activation_relu(x)
x = self.layer_3(x)
x = self.activation_relu(x)
x = self.layer_4(x)
x = self.layer_5(x)
x = self.layer_6(x)
x = self.activation_relu(x)
x = self.layer_7(x)
x = self.activation_relu(x)
x = self.layer_8(x)
x = self.activation_relu(x)
self.layer_9 = nn.AvgPool1d(x.shape[2])
x = self.layer_9(x)
x = self.layer_10(x)
y = self.layer_11(x.view(x.shape[0],x.shape[1]))
# y = self.activation_softmax(y)
return y
def calculate_accuracy(self, X,y):
with torch.no_grad():
output = model.forward(X.float())
max_index = output.max(dim = 1)[1]
true_output = y.type(torch.LongTensor)
result = (max_index == true_output).sum()/y.shape[0]
return result.detach().numpy()
def calculate_loss(self, X,y, crit):
with torch.no_grad():
output = model.forward(X.float())
max_index = output.max(dim = 1)[1]
true_output = y.type(torch.LongTensor)
return crit(output, true_output).item()
Here is the dataset class:
class IMU_dataset(Dataset):
def __init__(self):
self.n = X.shape[0]
self.x_data = torch.from_numpy(X.reshape(-1,3,400))
self.y_data = torch.from_numpy(y)
def __getitem__(self, index):
return self.x_data[index], self.y_data[index]
def __len__(self):
return self.n
EDIT 1:
I got to know that I need to remove the softmax layer
I have tried with a lower learning rate and still have the same problem.

Error: When subclassing the `Model` class, you should implement a `call` method. on tensorflow custom model

I am trying to train my custom model on Cifar 10 dataset.
My model's code is below: -
class cifar10Model(keras.Model):
def __init__(self):
super(cifar10Model, self).__init__()
self.conv1 = keras.layers.Conv2D(32, 3, activation='relu', input_shape=(32, 32, 3))
self.pool1 = keras.layers.MaxPool2D((3, 3))
self.batch_norm1 = keras.layers.BatchNormalization()
self.dropout1 = keras.layers.Dropout(0.1)
self.conv2 = keras.layers.Conv2D(64, 3, activation='relu')
self.pool2 = keras.layers.MaxPool2D((3, 3))
self.batch_norm2 = keras.layers.BatchNormalization()
self.dropout2 = keras.layers.Dropout(0.2)
self.conv3 = keras.layers.Conv2D(128, 3, activation='relu')
self.pool3 = keras.layers.MaxPool2D((3, 3))
self.batch_norm3 = keras.layers.BatchNormalization()
self.dropout3 = keras.layers.Dropout(0.3)
self.flatten = keras.layers.Flatten()
self.dense1 = keras.layers.Dense(128, activation='relu')
self.dense2 = keras.layers.Dense(10)
def call(self, x):
x = self.conv1(x)
x = self.pool1(x)
x = self.batch_norm1(X)
x = self.dropout1(x)
x = self.conv2(x)
x = self.pool2(x)
x = self.batch_norm2(X)
x = self.dropout2(x)
x = self.conv3(x)
x = self.pool3(x)
x = self.batch_norm3(x)
x = self.dropout3(x)
x = self.flatten(x)
x = self.dense1(x)
return self.dense2(x)
model = cifar10Model()
When i run this code this gives me no error.
Then i defined my training loop
loss_object = keras.losses.SparseCategoricalCrossentropy(from_logits=True)
optimizer = keras.optimizers.Adam()
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')
test_loss = tf.keras.metrics.Mean(name='test_loss')
test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')
#tf.function
def train_step(images, labels):
with tf.GradientTape() as tape:
predictions = model(images, training=True)
loss = loss_object(labels, predictions)
grad = tape.gradient(loss, model.trainable_variables)
optimizer.apply_gradients(zip(grad, model.trainable_variables))
train_loss(loss)
train_accuracy(labels, predictions)
#tf.function
def test_step(images, labels):
predictions = model(images)
t_loss = loss_object(labels, predictions)
test_loss(t_loss)
test_accuracy(labels, predictions)
epochs = 10
for epoch in range(epochs):
train_loss.reset_states()
train_accuracy.reset_states()
test_loss.reset_states()
test_accuracy.reset_states()
for images, labels in train_dataset:
train_step(images, labels)
for images, labels in test_dataset:
test_step(images, labels)
template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'
print(template.format(epoch + 1,
train_loss.result(),
train_accuracy.result() * 100,
test_loss.result(),
test_accuracy.result() * 100))
When i run this code, i get the following error
NotImplementedError: When subclassing the `Model` class, you should implement a `call` method.
I am currently running my code on google colab.
My colab link is https://colab.research.google.com/drive/1sOlbRpPRdyOCJI0zRFfIA-Trj1vrIbWY?usp=sharing
My tensorflow version on colab is 2.2.0
Also, when i tried to predict labels from untrained model by this code :-
print(model(train_images))
This also gives me the same error.
The error is saying that i have not implemented the call method on model.
but, i have defined the call method.
I also tried by changing the call method to __call__ method.
But still, it gives me the same error.
Thanks in advance :-
The problem is with indentation. You've defined call method inside __init__. Try defining it outside the __init__ method as follows:
class cifar10Model(keras.Model):
def __init__(self):
super(cifar10Model, self).__init__()
self.conv1 = keras.layers.Conv3D(32, 3, activation='relu', input_shape=(32, 32, 3))
self.pool1 = keras.layers.MaxPool3D((3, 3, 3))
self.batch_norm1 = keras.layers.BatchNormalization()
self.dropout1 = keras.layers.Dropout(0.1)
self.conv2 = keras.layers.Conv3D(64, 3, activation='relu')
self.pool2 = keras.layers.MaxPool3D((3, 3, 3))
self.batch_norm2 = keras.layers.BatchNormalization()
self.dropout2 = keras.layers.Dropout(0.2)
self.conv3 = keras.layers.Conv3D(128, 3, activation='relu')
self.pool3 = keras.layers.MaxPool3D((3, 3, 3))
self.batch_norm3 = keras.layers.BatchNormalization()
self.dropout3 = keras.layers.Dropout(0.3)
self.flatten = keras.layers.Flatten()
self.dense1 = keras.layers.Dense(128, activation='relu')
self.dense2 = keras.layers.Dense(10)
def call(self, x):
x = self.conv1(x)
x = self.pool1(x)
x = self.batch_norm1(X)
x = self.dropout1(x)
x = self.conv2(x)
x = self.pool2(x)
x = self.batch_norm2(X)
x = self.dropout2(x)
x = self.conv3(x)
x = self.pool3(x)
x = self.batch_norm3(X)
x = self.dropout3(x)
x = self.flatten(x)
x = self.dense1(x)
return self.dense2(x)
model = cifar10Model()
Hope this helps.

Triplet-Loss not converging using Tensorflow

Currently I am trying to implement triplet loss using Tensorflow attached my code.
But I found that the training always diverges. Can anyone help me find out where the problem is?
def compute_triplet_loss(anchor_features, positive_features, negative_features, margin=0.1):
with tf.name_scope("triplet_loss"):
anchor_features_norm = compute_norm(anchor_features)
positive_features_norm = compute_norm(positive_features)
negative_features_norm = compute_norm(negative_features)
denom1 = tf.multiply(anchor_features_norm, positive_features_norm)
denom2 = tf.multiply(anchor_features_norm, negative_features_norm)
a_p_product = compute_dot_product(anchor_features, positive_features)
a_n_product = compute_dot_product(anchor_features, negative_features)
a_p_vec = tf.divide(a_p_product, denom1)
a_n_vec = tf.divide(a_n_product, denom2)
loss = tf.maximum(0., tf.add(tf.subtract(a_n_vec, a_p_vec), margin))
return loss
def DNN(input_tensor, is_training, Reuse=True):
tf.add_to_collection('input_tensor', input_tensor)
with tf.name_scope('uttr_net', [input_tensor]):
with tf.variable_scope('uttr_vars', reuse=Reuse):
x = slim.fully_connected(input_tensor, 1024, scope='fc_1')
x = tf.layers.batch_normalization(x, training=is_training, momentum=0.995, name='bn_1')
x = slim.fully_connected(x, 1024, scope='fc_2')
x = tf.layers.batch_normalization(x, training=is_training, momentum=0.995, name='bn_2')
x = slim.fully_connected(x, 1024, scope='fc_3')
x = tf.layers.batch_normalization(x, training=is_training, momentum=0.995, name='bn_3')
x = slim.fully_connected(x, 80, scope='d_vec')
x = tf.layers.batch_normalization(x, training=is_training, momentum=0.995, name='bn_4')
# pooling
x = tf.expand_dims(x, 1)
x = tf.nn.avg_pool(x, ksize=[1,1,2,1], strides=[1,1,1,1], padding='VALID')
x = tf.squeeze(x, [1])
x = slim.dropout(x, keep_prob=0.65, is_training=is_training, scope='final_drop')
x = tf.reduce_mean(x, axis=1, name='uttr_mean')
tf.add_to_collection('d-vec', x)
return x
def build_graph(input_shape, learning_rate=0.001):
input_positive_0 = tf.placeholder(tf.float32, shape=input_shape, name='input_positive_0')
input_positive_1 = tf.placeholder(tf.float32, shape=input_shape, name='input_positive_1')
input_negative = tf.placeholder(tf.float32, shape=input_shape, name='input_negative')
is_training = tf.placeholder(tf.bool, [])
tf.add_to_collection('input_positive_0', input_positive_0)
tf.add_to_collection('input_positive_1', input_positive_1)
tf.add_to_collection('input_negative', input_negative)
tf.add_to_collection('is_training', is_training)
DNN_0 = DNN(input_positive_0, is_training, False)
DNN_1 = DNN(input_positive_1, is_training)
DNN_2 = DNN(input_negative, is_training)
loss = compute_triplet_loss(DNN_0, DNN_1, DNN_2)
loss = tf.reduce_sum(loss)
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(loss)
return optimizer, loss, input_positive_0, input_positive_1, input_negative, is_training

Categories