I made a CNN model for emotion recognition on 5 emotions. I wanted to test it on an single image to get the individual class predictions for each emotion.
Evaluating the model works, but I can't seem to find how to make a prediction with a single image. How can I do that?
The Model
def conv_block(in_channels, out_channels, pool=False):
layers = [nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
nn.BatchNorm2d(out_channels),
nn.ELU(inplace=True)]
if pool: layers.append(nn.MaxPool2d(2))
return nn.Sequential(*layers)
class ResNet(ImageClassificationBase):
def __init__(self, in_channels, num_classes):
super().__init__()
self.conv1 = conv_block(in_channels, 128)
self.conv2 = conv_block(128, 128, pool=True)
self.res1 = nn.Sequential(conv_block(128, 128), conv_block(128, 128))
self.drop1 = nn.Dropout(0.5)
self.conv3 = conv_block(128, 256)
self.conv4 = conv_block(256, 256, pool=True)
self.res2 = nn.Sequential(conv_block(256, 256), conv_block(256, 256))
self.drop2 = nn.Dropout(0.5)
self.conv5 = conv_block(256, 512)
self.conv6 = conv_block(512, 512, pool=True)
self.res3 = nn.Sequential(conv_block(512, 512), conv_block(512, 512))
self.drop3 = nn.Dropout(0.5)
self.classifier = nn.Sequential(nn.MaxPool2d(6),
nn.Flatten(),
nn.Linear(512, num_classes))
def forward(self, xb):
out = self.conv1(xb)
out = self.conv2(out)
out = self.res1(out) + out
out = self.drop1(out)
out = self.conv3(out)
out = self.conv4(out)
out = self.res2(out) + out
out = self.drop2(out)
out = self.conv5(out)
out = self.conv6(out)
out = self.res3(out) + out
out = self.drop3(out)
out = self.classifier(out)
return out
The fit_one_cycle function is called to train the model
#torch.no_grad()
def evaluate(model, val_loader):
model.eval()
outputs = [model.validation_step(batch) for batch in val_loader]
return model.validation_epoch_end(outputs)
def get_lr(optimizer):
for param_group in optimizer.param_groups:
return param_group['lr']
def fit_one_cycle(epochs, max_lr, model, train_loader, val_loader,
weight_decay=0, grad_clip=None, opt_func=torch.optim.SGD):
torch.cuda.empty_cache()
history = []
# Set up custom optimizer with weight decay
optimizer = opt_func(model.parameters(), max_lr, weight_decay=weight_decay)
# Set up one-cycle learning rate scheduler
sched = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr, epochs=epochs,
steps_per_epoch=len(train_loader))
for epoch in range(epochs):
# Training Phase
model.train()
train_losses = []
lrs = []
for batch in train_loader:
loss = model.training_step(batch)
train_losses.append(loss)
loss.backward()
# Gradient clipping
if grad_clip:
nn.utils.clip_grad_value_(model.parameters(), grad_clip)
optimizer.step()
optimizer.zero_grad()
# Record & update learning rate
lrs.append(get_lr(optimizer))
sched.step()
# Validation phase
result = evaluate(model, val_loader)
result['train_loss'] = torch.stack(train_losses).mean().item()
result['lrs'] = lrs
model.epoch_end(epoch, result)
history.append(result)
return history
This returns the accuracy and loss, i want to change this so it returns prediction percentages for each class.
def accuracy(outputs, labels):
_, preds = torch.max(outputs, dim=1)
return torch.tensor(torch.sum(preds == labels).item() / len(preds))
class ImageClassificationBase(nn.Module):
def training_step(self, batch):
images, labels = batch
out = self(images)
loss = F.cross_entropy(out, labels)
return loss
def validation_step(self, batch):
images, labels = batch
out = self(images)
loss = F.cross_entropy(out, labels)
acc = accuracy(out, labels)
return {'val_loss': loss, 'val_acc': acc}
def validation_epoch_end(self, outputs):
batch_losses = [x['val_loss'] for x in outputs]
epoch_loss = torch.stack(batch_losses).mean()
batch_accs = [x['val_acc'] for x in outputs]
epoch_acc = torch.stack(batch_accs).mean()
return {'val_loss': epoch_loss.item(), 'val_acc': epoch_acc.item()}
def epoch_end(self, epoch, result):
print("Epoch [{}], last_lr: {:.5f}, train_loss: {:.4f}, val_loss: {:.4f}, val_acc: {:.4f}".format(
epoch, result['lrs'][-1], result['train_loss'], result['val_loss'], result['val_acc']))
Evaluating the model works, but I can't seem to find how to make a
prediction with a single image. How can I do that?
Simply, if you have a single image make sure to:
use additional 1 dimension at the beginning
make sure to use CHW format instead of HWC (or specify that within pytorch, check out how to do that here)
For example:
my_model = CNN(...)
random_image = torch.randn(1, 3, 100, 100) # 3 channels, 100x100 img
BTW. Your accuracy could be written a little simpler like this:
def accuracy(outputs, labels):
preds = torch.argmax(outputs, dim=1)
return torch.sum(preds == labels) / len(preds)
Getting class probability
Similar to argmax you can use softmax which transforms logits (unnormalized probability outputted by your network) into probabilities:
def probability(outputs):
return torch.nn.functional.softmax(outputs, dim=1)
Related
I am building a model to classify news (AG news dataset). The vocab size ~33k with custom embedding layer. I have run this for 20 epochs but the loss and accuracy (1.3 and 26% respec.) is almost constant even at the end of 20th epoch. Can someone please help me with this? Also, am I feeding the correct input to the fc layer? I am using CrossEntropyLoss as the loss function.
Here is my model class:
class NewsClassifier(nn.Module):
def __init__(self, vocab_weights = None, rnn_type = 'LSTM', vocab_size = len(vocab.vocab), n_classes = 4, embed_size = 300, rnn_units = 512, \
n_layers = 2, bi_dir = True, rnn_drop = 0.0, padding_index = vocab['<unk>']):
super().__init__()
self.rnn_units = rnn_units
self.n_classes = n_classes
self.rnn_type = rnn_type
if vocab_weights:
self.embedding = nn.Embedding.from_pretrained(torch.as_tensor(vocab_weights))
else:
self.embedding = nn.Embedding(vocab_size, embed_size, padding_idx = padding_index)
if rnn_type == 'LSTM':
self.rnn = nn.LSTM(embed_size, rnn_units, num_layers = n_layers, bidirectional = bi_dir, dropout = rnn_drop)
elif rnn_type == 'GRU':
self.rnn = nn.GRU(embed_size, rnn_units, num_layers = n_layers, bidirectional = bi_dir, dropout = rnn_drop)
else:
raise NotImplementError
self.fc = nn.Linear(2 * rnn_units if bi_dir else rnn_units, self.n_classes)
def forward(self, data, lens):
x_embed = self.embedding(data) # (padded_lens, batch_size, embed_dim)
x_packed = pack_padded_sequence(x_embed, lens.cpu(), enforce_sorted = False) #packing sequences and passing to RNN unit
if self.rnn_type == 'LSTM':
output_packed, (hidden,cell) = self.rnn(x_packed) #output is packed and cannot be fed to linear layers
else:
output_packed, hidden = self.rnn(x_packed) #For GRU there is only hidden state
#Though n number of layers are stacked the output is always 1
output_padded, _ = pad_packed_sequence(output_packed) #output is padded to be fed to linear layer (padded_lens, batch size, hidden_units)
#Picking only the last output --> equivalent to reutrn_sequences = False in Keras
out_reduced = torch.cat((output_padded[-1, :, : self.rnn_units], output_padded[-1, :, self.rnn_units :]), 1)
return self.fc(out_reduced)
model = NewsClassifier()
print(f'The total number of trainable parameters are : {sum(p.numel() for p in model.parameters() if p.requires_grad)}')
My training function is:
def train(model, iterator = trainDataloader, optimizer = optimizer, loss_fn = criterion):
e_loss = e_acc = i = 0
model.train()
for inputs, leng, labels in iterator:
inputs, leng, labels = inputs.to(device), leng.to(device), labels.to(device)
optimizer.zero_grad()
preds = model(inputs, leng).squeeze(1)
loss = loss_fn(preds, labels.long())
acc = accuracy(preds, labels)
loss.backward()
optimizer.step()
e_loss += loss.item()
e_acc += acc.item()
i += 1
return e_loss/i, e_acc/i
def predict(model, iterator = testDataloader, loss_fn = criterion):
e_loss = e_acc = i = 0
model.eval()
with torch.no_grad():
for inputs, leng, labels in iterator:
inputs, leng, labels = inputs.to(device), leng.to(device), labels.to(device)
preds = model(inputs, leng).squeeze(1)
loss = loss_fn(preds, labels.long())
acc = accuracy(preds, labels)
e_loss += loss.item()
e_acc += acc.item()
i += 1
return e_loss/i, e_acc/i
N_EPOCHS = 20
best_valid_loss = float('inf')
for epoch in range(N_EPOCHS):
start_time = time.time()
train_loss, train_acc = train(model)
valid_loss, valid_acc = predict(model)
end_time = time.time()
epoch_mins, epoch_secs = epoch_time(start_time, end_time)
if valid_loss < best_valid_loss:
best_valid_loss = valid_loss
torch.save(model.state_dict(), 'tut1-model.pt')
print(f'Epoch: {epoch+1:02} / {N_EPOCHS} | Epoch Time: {epoch_mins}m {epoch_secs}s')
print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
print(f'\t Val. Loss: {valid_loss:.3f} | Val. Acc: {valid_acc*100:.2f}%')
This is my model:
class BiLSTM(nn.Module):
def __init__(self):
super(BiLSTM, self).__init__()
self.hidden_size = 128
drp = 0.2
n_classes = len(le.classes_)
self.embedding = nn.Embedding(max_features, embed_size)
self.embedding.weight = nn.Parameter(torch.tensor(embedding_matrix, dtype=torch.float32))
self.embedding.weight.requires_grad = False
self.lstm = nn.LSTM(embed_size, self.hidden_size, bidirectional=True, batch_first=True)
self.linear = nn.Linear(self.hidden_size*4 , 128)
self.relu = nn.ReLU()
self.dropout = nn.Dropout(drp)
self.out = nn.Linear(128, n_classes)
def forward(self, x):
#rint(x.size())
h_embedding = self.embedding(x)
_embedding = torch.squeeze(torch.unsqueeze(h_embedding, 0))
h_lstm, _ = self.lstm(h_embedding)
avg_pool = torch.mean(h_lstm, 1)
max_pool, _ = torch.max(h_lstm, 1)
conc = torch.cat(( avg_pool, max_pool), 1)
conc = self.relu(self.linear(conc))
conc = self.dropout(conc)
out = self.out(conc)
return out
n_epochs = 87
model = BiLSTM()
loss_fn = nn.CrossEntropyLoss(reduction='mean',)
optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=0.002)
model.cuda()
# Load train and test in CUDA Memory
x_train = torch.tensor(train_X, dtype=torch.long).cuda()
y_train = torch.tensor(train_y, dtype=torch.long).cuda()
x_cv = torch.tensor(test_X, dtype=torch.long).cuda()
y_cv = torch.tensor(test_y, dtype=torch.long).cuda()
# Create Torch datasets
train = torch.utils.data.TensorDataset(x_train, y_train)
valid = torch.utils.data.TensorDataset(x_cv, y_cv)
# Create Data Loaders
train_loader = torch.utils.data.DataLoader(train, batch_size=batch_size, shuffle=True)
valid_loader = torch.utils.data.DataLoader(valid, batch_size=batch_size, shuffle=True)
train_loss = []
valid_loss = []
for epoch in range(n_epochs):
start_time = time.time()
# Set model to train configuration
model.train()
avg_loss = 0.
for i, (x_batch, y_batch) in enumerate(train_loader):
# Predict/Forward Pass
y_pred = model(x_batch)
# Compute loss
loss = loss_fn(y_pred, y_batch)
optimizer.zero_grad()
loss.backward()
optimizer.step()
avg_loss += loss.item() / len(train_loader)
#acc =n-avg_loss
# Set model to validation configuration
model.eval()
avg_val_loss = 0.
val_preds = np.zeros((len(x_cv),len(le.classes_)))
for i, (x_batch, y_batch) in enumerate(valid_loader):
y_pred = model(x_batch).detach()
avg_val_loss += loss_fn(y_pred, y_batch).item() / len(valid_loader)
#val_accuracy = n- avg_val_loss
# keep/store predictions
val_preds[i * batch_size:(i+1) * batch_size] =F.softmax(y_pred).cpu().numpy()
val_preds=val_preds*5000
# Check Accuracy
val_accuracy = sum(val_preds.argmax(axis=1)==test_y)/len(test_y)
train_loss.append(avg_loss)
valid_loss.append(avg_val_loss)
elapsed_time = time.time() - start_time
print('Epoch {}/{} \t Train_loss={:.4f} \t val_loss={:.4f} \t val_acc={:.4f} \t time={:.2f}s'.format(
epoch + 1, n_epochs , avg_loss, avg_val_loss, val_accuracy, elapsed_time))
This is the output I am receiving.I have used the BiLSTM model. I tried changing the loss techniques and dropout value but it didn't work. The issue is I guess the model is overfitting how can I increase the accuracy this is the output I am receiving.I have used the BiLSTM model. I tried changing the loss techniques and dropout value but it didn't work. The issue is I guess the model is overfitting how can I increase the accuracy
This is the output I am receiving.I have used the BiLSTM model. I tried changing the loss techniques and dropout value but it didn't work. The issue is I guess the model is overfitting how can I increase the accuracy, The size of the dataset is 3000
[1]: https://i.stack.imgur.com/NbK92.png [output looks like this][1]
[1]: https://i.stack.imgur.com/ll12J.png [Data Looks Like this][1]
I am new to Torch and using a code template for a masked-cnn model. In order to be prepared if the training is interrupted, I have used torch.save and torch.load in my code, but I think I cannot use this alone for continuing training sessions? I start training by:
model = train_mask_net(64)
This calls the function train_mask_net where I have included torch.save in the epoch loop. I wanted to load one of the saved models and continue training with torch.load in front of the loop, but I got "key error" messages for the optimizer, loss and epoch call. Should I have made a specific checkpoint function as I have seen in some tutorials or is there a possibility that I can continue training with the files saved by the torch.saved command?
def train_mask_net(num_epochs=1):
data = MaskDataset(list(data_mask.keys()))
data_loader = torch.utils.data.DataLoader(data, batch_size=8, shuffle=True, num_workers=4)
model = XceptionHourglass(max_clz+2)
model.cuda()
dp = torch.nn.DataParallel(model)
loss = nn.CrossEntropyLoss()
params = [p for p in dp.parameters() if p.requires_grad]
optimizer = torch.optim.RMSprop(params, lr=2.5e-4, momentum=0.9)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
step_size=6,
gamma=0.9)
checkpoint = torch.load('imaterialist2020-pretrain-models/maskmodel_160.model_ep17')
#print(checkpoint)
model.load_state_dict(checkpoint)
#optimizer.load_state_dict(checkpoint)
#epoch = checkpoint['epoch']
#loss = checkpoint['loss']
for epoch in range(num_epochs):
print(epoch)
total_loss = []
prog = tqdm(data_loader, total=len(data_loader))
for i, (imag, mask) in enumerate(prog):
X = imag.cuda()
y = mask.cuda()
xx = dp(X)
# to 1D-array
y = y.reshape((y.size(0),-1)) # batch, flatten-img
y = y.reshape((y.size(0) * y.size(1),)) # flatten-all
xx = xx.reshape((xx.size(0), xx.size(1), -1)) # batch, channel, flatten-img
xx = torch.transpose(xx, 2, 1) # batch, flatten-img, channel
xx = xx.reshape((xx.size(0) * xx.size(1),-1)) # flatten-all, channel
losses = loss(xx, y)
prog.set_description("loss:%05f"%losses)
optimizer.zero_grad()
losses.backward()
optimizer.step()
total_loss.append(losses.detach().cpu().numpy())
torch.save(model.state_dict(), MODEL_FILE_DIR+"maskmodel_%d.model"%attr_image_size[0]+'_ep'+str(epoch)+'_tsave')
prog, X, xx, y, losses = None, None, None, None, None,
torch.cuda.empty_cache()
gc.collect()
return model
I don't think its necessary, but the xceptionhour class looks like this:
class XceptionHourglass(nn.Module):
def __init__(self, num_classes):
super(XceptionHourglass, self).__init__()
self.num_classes = num_classes
self.conv1 = nn.Conv2d(3, 128, 3, 2, 1, bias=True)
self.bn1 = nn.BatchNorm2d(128)
self.mish = Mish()
self.conv2 = nn.Conv2d(128, 256, 3, 1, 1, bias=True)
self.bn2 = nn.BatchNorm2d(256)
self.block1 = HourglassNet(4, 256)
self.bn3 = nn.BatchNorm2d(256)
self.block2 = HourglassNet(4, 256)
...
torch.save(model.state_dict(), PATH) only saves the model weights.
To also save optimizer, loss, epoch, etc., change it to:
torch.save({'model': model.state_dict(),
'optimizer': optimizer.state_dict(),
'loss': loss,
'epoch': epoch,
# ...
}, PATH)
To load them:
checkpoint = torch.load(PATH)
model.load_state_dict(checkpoint['model'])
optimizer.load_state_dict(checkpoint['optimizer'])
epoch = checkpoint['epoch']
loss = checkpoint['loss']
More on it here.
I am trying to create a logistic model by using CIFAR10 data in PyTorch. After running the model for evaluation I run into an error :
RuntimeError: size mismatch, m1: [750 x 4096], m2: [1024 x 10] at C:\w\1\s\tmp_conda_3.7_100118\conda\conda-bld\pytorch_1579082551706\work\aten\src\TH/generic/THTensorMath.cpp:136
It seems like input_size is creating a problem, I dont know I am new to this. Please let me know what changes should I make in order to overcome this error.
These are the hyperparameters:
batch_size = 100
learning_rate = 0.001
# Other constants
input_size = 4*4*64
num_classes = 10
This is the cell that downloads and splits the dataset into train, validation and test.
transform = torchvision.transforms.Compose(
[torchvision.transforms.ToTensor(),
torchvision.transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5))])
testset = torchvision.datasets.CIFAR10(root='D:\PyTorch\cifar-10-python', train=False,download=False, transform=transform)
trainvalset = torchvision.datasets.CIFAR10(root='D:\PyTorch\cifar-10-python', train=True,download=False, transform=transform)
trainset, valset = torch.utils.data.random_split(trainvalset, [45000, 5000]) # 10% for validation
train_loader = torch.utils.data.DataLoader(trainset, batch_size=50, shuffle=True)
test_loader = torch.utils.data.DataLoader(testset, batch_size=1000, shuffle=False)
val_loader = torch.utils.data.DataLoader(valset, batch_size=1000, shuffle=False)
This is the architecture of my model.
class CifarModel(nn.Module):
def __init__(self):
super().__init__()
self.linear = nn.Linear(input_size, num_classes)
def forward(self, xb):
xb = xb.view(-1, 64*8*8)
#xb = xb.reshape(-1, 784)
print(xb.shape)
out = self.linear(xb)
return out
def training_step(self, batch):
images, labels = batch
out = self(images) # Generate predictions
loss = F.cross_entropy(out, labels) # Calculate loss
return loss
def validation_step(self, batch):
images, labels = batch
out = self(images) # Generate predictions
loss = F.cross_entropy(out, labels) # Calculate loss
acc = accuracy(out, labels) # Calculate accuracy
return {'val_loss': loss.detach(), 'val_acc': acc.detach()}
def validation_epoch_end(self, outputs):
batch_losses = [x['val_loss'] for x in outputs]
epoch_loss = torch.stack(batch_losses).mean() # Combine losses
batch_accs = [x['val_acc'] for x in outputs]
epoch_acc = torch.stack(batch_accs).mean() # Combine accuracies
return {'val_loss': epoch_loss.item(), 'val_acc': epoch_acc.item()}
def epoch_end(self, epoch, result):
print("Epoch [{}], val_loss: {:.4f}, val_acc: {:.4f}".format(epoch, result['val_loss'], result['val_acc']))
model = CifarModel()
def accuracy(outputs, labels):
_, preds = torch.max(outputs, dim=1)
return torch.tensor(torch.sum(preds == labels).item() / len(preds))
def evaluate(model, val_loader):
outputs = [model.validation_step(batch) for batch in val_loader]
return model.validation_epoch_end(outputs)
def fit(epochs, lr, model, train_loader, val_loader, opt_func=torch.optim.SGD):
history = []
optimizer = opt_func(model.parameters(), lr)
for epoch in range(epochs):
# Training Phase
for batch in train_loader:
loss = model.training_step(batch)
loss.backward()
optimizer.step()
optimizer.zero_grad()
# Validation phase
result = evaluate(model, val_loader)
model.epoch_end(epoch, result)
history.append(result)
return history
evaluate(model, val_loader)
Here you are specifying that the number of output classes should be 10:
num_classes = 10
Your forward function does not reflect this:
xb = xb.view(-1, 64*8*8) # you get 750x4096
out = self.linear(xb) # here an input of
# input_size to linear layer = 4*4*64 # 1024
# num_classes = 10
Modify it like this:
xb = xb.view(-1, 64*4*4) # you get 750x1024
out = self.linear(xb) # M1 750x1024 M2 1024x10:
# input_size = 4*4*64 # 1024
# num_classes = 10
I am writing a CNN to classify some images. when I run the training part of my code, the only output I receive is the print "Training finished". This tells me that my code was iterated through but did not successfully compute - I have no errors and have really tried to identify the error.
The fc1's (First connected layer) input size is supposed to be the flattened previous layer.
Data
train_split = 0.70 # Defines the ratio of train/valid/test data.
valid_split = 0.10
train_size = int(len(data_df)*train_split)
valid_size = int(len(data_df)*valid_split)
ins_dataset_train = ImageNet10(
df=data_df[:train_size],
transform=data_transform,
)
ins_dataset_valid = ImageNet10(
df=data_df[train_size:(train_size +
valid_size)].reset_index(drop=True),
transform=data_transform,
)
ins_dataset_test = ImageNet10(
df=data_df[(train_size + valid_size):].reset_index(drop=True),
transform=data_transform,
)
Data Loaders
train_loader = torch.utils.data.DataLoader(
ins_dataset_train,
batch_size=16,
shuffle=True,
num_workers=2
)
valid_loader = torch.utils.data.DataLoader(
ins_dataset_valid,
batch_size=16,
shuffle=True,
num_workers=2
)
test_loader = torch.utils.data.DataLoader(
ins_dataset_test,
batch_size=24, # Forward pass only so batch size can be larger
shuffle=False,
num_workers=2
)
Convolutional neural network
class ConvNet(nn.Module):
def __init__(self, num_classes=10):
super(ConvNet, self).__init__()
# Add network layers here
# Layer 1
self.conv1 = nn.Conv2d(3,16, (3,3))
self.pool = nn.MaxPool2d(2,2)
self.relu = nn.ReLU()
self.dropout = nn.Dropout(p=0.3)
# Layer 2
self.conv2 = nn.Conv2d(16,24, (4,4))
self.pool = nn.MaxPool2d(2,2)
self.relu = nn.ReLU()
self.dropout = nn.Dropout(p=0.3)
# Layer 3
self.conv3 = nn.Conv2d(24,32, (4,4))
self.pool = nn.MaxPool2d(2,2)
self.relu = nn.ReLU()
self.dropout = nn.Dropout(p=0.3)
# Layer 4 - Fully connected
self.fc1 = nn.Linear(32 * 8 * 8, 512)
self.fc2 = nn.Linear(512, num_classes)
self.final = nn.Softmax(dim=1)
def forward(self, x):
out = x.reshape(x.size(0), -1) # TODO what does this do? Why do we need it?
out = self.fc1(out)
return out
Training Model
def train_model_epochs(num_epochs):
for epoch in range(10):
running_loss = 0.0
for i, data in enumerate(test_loader, 0):
images, labels = data
print("Batch", i, "size:", len(images))
optimizer.zero_grad()
outputs = net(images)
loss = loss_function(outputs, labels)
loss.backward()
optimizer.step()
running_loss =+ loss.item()
if i%1000 == 999:
print('Epoch / Batch [%d / %d] - Loss: %.3f' %
(epoch + 1, i + 1, running_loss / 1000))
running_loss = 0.0
print("Training finished")