This is my model:
class BiLSTM(nn.Module):
def __init__(self):
super(BiLSTM, self).__init__()
self.hidden_size = 128
drp = 0.2
n_classes = len(le.classes_)
self.embedding = nn.Embedding(max_features, embed_size)
self.embedding.weight = nn.Parameter(torch.tensor(embedding_matrix, dtype=torch.float32))
self.embedding.weight.requires_grad = False
self.lstm = nn.LSTM(embed_size, self.hidden_size, bidirectional=True, batch_first=True)
self.linear = nn.Linear(self.hidden_size*4 , 128)
self.relu = nn.ReLU()
self.dropout = nn.Dropout(drp)
self.out = nn.Linear(128, n_classes)
def forward(self, x):
#rint(x.size())
h_embedding = self.embedding(x)
_embedding = torch.squeeze(torch.unsqueeze(h_embedding, 0))
h_lstm, _ = self.lstm(h_embedding)
avg_pool = torch.mean(h_lstm, 1)
max_pool, _ = torch.max(h_lstm, 1)
conc = torch.cat(( avg_pool, max_pool), 1)
conc = self.relu(self.linear(conc))
conc = self.dropout(conc)
out = self.out(conc)
return out
n_epochs = 87
model = BiLSTM()
loss_fn = nn.CrossEntropyLoss(reduction='mean',)
optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=0.002)
model.cuda()
# Load train and test in CUDA Memory
x_train = torch.tensor(train_X, dtype=torch.long).cuda()
y_train = torch.tensor(train_y, dtype=torch.long).cuda()
x_cv = torch.tensor(test_X, dtype=torch.long).cuda()
y_cv = torch.tensor(test_y, dtype=torch.long).cuda()
# Create Torch datasets
train = torch.utils.data.TensorDataset(x_train, y_train)
valid = torch.utils.data.TensorDataset(x_cv, y_cv)
# Create Data Loaders
train_loader = torch.utils.data.DataLoader(train, batch_size=batch_size, shuffle=True)
valid_loader = torch.utils.data.DataLoader(valid, batch_size=batch_size, shuffle=True)
train_loss = []
valid_loss = []
for epoch in range(n_epochs):
start_time = time.time()
# Set model to train configuration
model.train()
avg_loss = 0.
for i, (x_batch, y_batch) in enumerate(train_loader):
# Predict/Forward Pass
y_pred = model(x_batch)
# Compute loss
loss = loss_fn(y_pred, y_batch)
optimizer.zero_grad()
loss.backward()
optimizer.step()
avg_loss += loss.item() / len(train_loader)
#acc =n-avg_loss
# Set model to validation configuration
model.eval()
avg_val_loss = 0.
val_preds = np.zeros((len(x_cv),len(le.classes_)))
for i, (x_batch, y_batch) in enumerate(valid_loader):
y_pred = model(x_batch).detach()
avg_val_loss += loss_fn(y_pred, y_batch).item() / len(valid_loader)
#val_accuracy = n- avg_val_loss
# keep/store predictions
val_preds[i * batch_size:(i+1) * batch_size] =F.softmax(y_pred).cpu().numpy()
val_preds=val_preds*5000
# Check Accuracy
val_accuracy = sum(val_preds.argmax(axis=1)==test_y)/len(test_y)
train_loss.append(avg_loss)
valid_loss.append(avg_val_loss)
elapsed_time = time.time() - start_time
print('Epoch {}/{} \t Train_loss={:.4f} \t val_loss={:.4f} \t val_acc={:.4f} \t time={:.2f}s'.format(
epoch + 1, n_epochs , avg_loss, avg_val_loss, val_accuracy, elapsed_time))
This is the output I am receiving.I have used the BiLSTM model. I tried changing the loss techniques and dropout value but it didn't work. The issue is I guess the model is overfitting how can I increase the accuracy this is the output I am receiving.I have used the BiLSTM model. I tried changing the loss techniques and dropout value but it didn't work. The issue is I guess the model is overfitting how can I increase the accuracy
This is the output I am receiving.I have used the BiLSTM model. I tried changing the loss techniques and dropout value but it didn't work. The issue is I guess the model is overfitting how can I increase the accuracy, The size of the dataset is 3000
[1]: https://i.stack.imgur.com/NbK92.png [output looks like this][1]
[1]: https://i.stack.imgur.com/ll12J.png [Data Looks Like this][1]
Related
This is my code, and image of training accuracy
# !curl -L -s -o '/content/ESC-50-master-16k.tar' 'https://drive.google.com/uc?id=1hFt-qarD_Ihjb3jW7pd3tje2pcY5tfuJ&confirm=t'
# !tar -xvf "ESC-50-master-16k.tar"
from torch.utils.data import Dataset, TensorDataset, DataLoader
import torch
import torch.nn as nn
import torch.nn.functional as F
import librosa
import numpy as np
import pandas as pd
from glob import glob
Device Configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
Load ESC-50 Data & Preprocess
metadata = pd.read_csv("./ESC-50-master-16k/meta/esc50.csv")
wav_list = sorted(glob("./ESC-50-master-16k/resample/\*.wav"))
def spec_to_image(spec, eps=1e-6):
mean = spec.mean()
std = spec.std()
spec_norm = (spec - mean) / (std + eps)
spec_min, spec_max = spec_norm.min(), spec_norm.max()
spec_scaled = 255 \* (spec_norm - spec_min) / (spec_max - spec_min)
spec_scaled = spec_scaled.astype(np.uint8)
return spec_scaled
class esc50dataset(Dataset):
def __init__(self, wavlist, metadata):
self.labels = np.array(metadata.target).astype(int)
self.audio = \[\]
for f in wavlist:
wav, sr = librosa.load(f, sr=None)
spec=librosa.feature.melspectrogram(wav, sr=sr, n_fft=1024, hop_length=640, n_mels=126) # \[126,126\]
spec_db=librosa.power_to_db(spec,top_db=80)
spec_image = np.expand_dims(spec_to_image(spec_db), axis=0)
self.audio.append(spec_image.tolist())
self.audio = np.array(self.audio)
def __len__(self):
return len(self.audio)
def __getitem__(self, idx):
return self.audio\[idx\], self.labels\[idx\]
dataset = esc50dataset(wav_list, metadata)
features = dataset\[:\]\[0\]
labels = dataset\[:\]\[1\]
train test split
train_size = 0.7
val_size = 0.5
`make train set`
split_id = int(len(features) \* train_size) # 2000 \* 0.7 = 1400
train_features, remain_features = features\[:split_id\], features\[split_id:\] # train_features = 1400, remain_features = 600
train_labels, remain_labels = labels\[:split_id\], labels\[split_id:\] # train_labels = 1400, remain_labels = 600
`make val and test set`
split_val_id = int(len(remain_features) \* val_size) # 600 \* 0.5 = 300
val_features, test_features = remain_features\[:split_val_id\], remain_features\[split_val_id:\] # val_features = 300, test_features = 300
val_labels, test_labels = remain_labels\[:split_val_id\], remain_labels\[split_val_id:\] # val_labels = 300, test_labels = 300
`define batch size`
batch_size = 32
`create tensor datasets`
train_set = TensorDataset(torch.from_numpy(train_features), torch.from_numpy(train_labels))
valid_set = TensorDataset(torch.from_numpy(val_features), torch.from_numpy(val_labels))
test_set = TensorDataset(torch.from_numpy(test_features), torch.from_numpy(test_labels))
`create dataloaders`
train_loader = DataLoader(train_set, shuffle=True, batch_size=batch_size) # 1400 / 32
val_loader = DataLoader(valid_set, shuffle=True, batch_size=batch_size) # 300 / 32
test_loader = DataLoader(test_set, shuffle=True, batch_size=batch_size) # 300 / 32
`Model Hyperparameter`
in_channel = features\[0\].shape\[0\] # in_channel = 1
max_pool_kernel = 3
hidden_size = 256
hidden_layers = 10
output_classes = labels.max() + 1 # output_classes = 50
dropout = 0.25
learning_rate = 0.001
num_epochs = 10
Model
class CRNN(nn.Module):
def __init__(self, in_channel, hidden_size, hidden_layers, output_classes, dropout):
super(CRNN, self).__init__()
self.hidden_layers = hidden_layers
self.hidden_size = hidden_size
self.CNN_layer1 = nn.Sequential(
nn.Conv2d(in_channels=in_channel, out_channels=32, kernel_size=5, stride=1, padding=2),
nn.BatchNorm2d(num_features=32),
nn.ReLU(),
nn.MaxPool2d(kernel_size=max_pool_kernel), # 32, 42, 42
)
self.CNN_layer2 = nn.Sequential(
nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5, stride=1, padding=2),
nn.BatchNorm2d(num_features=64),
nn.ReLU(),
nn.MaxPool2d(kernel_size=max_pool_kernel), # 64, 14, 14
)
self.lstm = nn.LSTM(input_size=64, hidden_size=hidden_size, num_layers=hidden_layers, batch_first=True)
self.dropout = nn.Dropout(dropout)
self.fc = nn.Linear(hidden_size, output_classes)
self.sigmoid = nn.Sigmoid()
def forward(self, x):
x = x.float()
# print("start : {}".format(x))
x = self.CNN_layer1(x) # 1, 126, 126 -> 32, 42, 42
# print("after CNN_1 : {}".format(x))
x = self.CNN_layer2(x) # 32, 42, 42 -> 64, 14, 14
# print("after CNN_2 : {}".format(x))
x = x.reshape(x.size(0), -1, 64) # 64, 14, 14 -> 12544 , x.size(0) = Batchsize = 32
# print("after reshape : {}".format(x))
h0 = torch.zeros(self.hidden_layers, x.size(0), self.hidden_size).to(device) # torch.size([10, 32(batch_size), 512])
c0 = torch.zeros(self.hidden_layers, x.size(0), self.hidden_size).to(device) # torch.size([10, 32(batch_size), 512])
x, _ = self.lstm(x, (h0, c0))
# print("after lstm : {}".format(x))
x = self.dropout(x)
# print("after dropout : {}".format(x))
x = self.fc(x[:,-1,:])
# print("after fc : {}".format(x))
x = self.sigmoid(x)
# print("after sigmoid : {}".format(x))
return x
model initialization
model = CRNN(in_channel, hidden_size, hidden_layers, output_classes, dropout)
Criterion & Optimizer
criterion = nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
Train
from tqdm import tqdm
tqdm.pandas()
tr_loss = \[\]
tr_acc = \[\]
v_loss = \[\]
v_acc = \[\]
best_valid_loss = torch.inf
best_epoch = 0
model = model.to(device)
epochloop = tqdm(range(num_epochs), position=0, desc='Training', leave=True)
for epoch in epochloop:
model.train()
train_loss = 0
train_acc = 0
epochloop.set_postfix_str(f'Training epoch {epoch}/{len(epochloop)}') # visualize
for idx, (audio, label) in enumerate(train_loader):
audio, label = audio.to(device), label.to(device)
out = model(audio)
\_, pred = torch.max(out.data, 1)
# acc
train_acc += (pred==label).sum()
# loss
optimizer.zero_grad()
loss = criterion(out, label)
train_loss += loss.item()
loss.backward()
optimizer.step()
Validation
model.eval()
val_loss = 0
val_acc = 0
with torch.no_grad():
for idx, (audio, label) in enumerate(val_loader):
epochloop.set_postfix_str(f'Validation batch {idx}/{len(val_loader)}')
audio, label = audio.to(device), label.to(device)
# forward pass
out = model(audio)
_, pred = torch.max(out.data, 1)
# acc
val_acc += (pred==label).sum()
# loss
loss = criterion(out, label)
val_loss += loss.item()
model.train()
save model if validation loss decrease
if val_loss / len(valid_set) \<= best_valid_loss:
best_valid_loss = val_loss / len(val_loader)
best_epoch = epoch
torch.save(model.state_dict(), "LSTM_epoch\_{}.pth".format(epoch))
print epoch loss & accuracy
print(f'Epoch {epoch+1}/{num_epochs} | Train Loss: {train_loss / len(train_loader):.3f} Train Acc: {train_acc / len(train_set) \* 100}% | Val Loss: {val_loss / len(val_loader):.3f} Val Acc: {val_acc / len(valid_set) \* 100}%')
tr_loss.append(train_loss / len(train_loader))
tr_acc.append(train_acc.cpu().numpy() / len(train_set) \* 100)
v_loss.append(val_loss / len(val_loader))
v_acc.append(val_acc.cpu().numpy() / len(valid_set) \* 100)
my training accuracy
Please tell me why my training accuracy is too low..
By your dataset, I'm inferring you are trying to identify sound data, and you are applying a CRNN. If you go to the repo Have you seen the repo for this dataset you will find a table with the accuracy for each method, the CRNN has about 60% accuracy, this is not so great since other methods achieve more than 95%.
You might need to change your model to get better results. Check the table you will find also some models with code
A bit difficult to give you an answer without reviewing your whole code, and re-running the model. Given this situation, I would suggest you create a Github repo, with appropriate the requirements.txt file for your code.
Even more helpful, and more likely to be answered would be if your code is in a jupyter notebook, and you can show the results of your loss curve and epochs. Your results could be more understandable shown like the output of tensorboard.
Hope this helps you to get some answers
I am building a model to classify news (AG news dataset). The vocab size ~33k with custom embedding layer. I have run this for 20 epochs but the loss and accuracy (1.3 and 26% respec.) is almost constant even at the end of 20th epoch. Can someone please help me with this? Also, am I feeding the correct input to the fc layer? I am using CrossEntropyLoss as the loss function.
Here is my model class:
class NewsClassifier(nn.Module):
def __init__(self, vocab_weights = None, rnn_type = 'LSTM', vocab_size = len(vocab.vocab), n_classes = 4, embed_size = 300, rnn_units = 512, \
n_layers = 2, bi_dir = True, rnn_drop = 0.0, padding_index = vocab['<unk>']):
super().__init__()
self.rnn_units = rnn_units
self.n_classes = n_classes
self.rnn_type = rnn_type
if vocab_weights:
self.embedding = nn.Embedding.from_pretrained(torch.as_tensor(vocab_weights))
else:
self.embedding = nn.Embedding(vocab_size, embed_size, padding_idx = padding_index)
if rnn_type == 'LSTM':
self.rnn = nn.LSTM(embed_size, rnn_units, num_layers = n_layers, bidirectional = bi_dir, dropout = rnn_drop)
elif rnn_type == 'GRU':
self.rnn = nn.GRU(embed_size, rnn_units, num_layers = n_layers, bidirectional = bi_dir, dropout = rnn_drop)
else:
raise NotImplementError
self.fc = nn.Linear(2 * rnn_units if bi_dir else rnn_units, self.n_classes)
def forward(self, data, lens):
x_embed = self.embedding(data) # (padded_lens, batch_size, embed_dim)
x_packed = pack_padded_sequence(x_embed, lens.cpu(), enforce_sorted = False) #packing sequences and passing to RNN unit
if self.rnn_type == 'LSTM':
output_packed, (hidden,cell) = self.rnn(x_packed) #output is packed and cannot be fed to linear layers
else:
output_packed, hidden = self.rnn(x_packed) #For GRU there is only hidden state
#Though n number of layers are stacked the output is always 1
output_padded, _ = pad_packed_sequence(output_packed) #output is padded to be fed to linear layer (padded_lens, batch size, hidden_units)
#Picking only the last output --> equivalent to reutrn_sequences = False in Keras
out_reduced = torch.cat((output_padded[-1, :, : self.rnn_units], output_padded[-1, :, self.rnn_units :]), 1)
return self.fc(out_reduced)
model = NewsClassifier()
print(f'The total number of trainable parameters are : {sum(p.numel() for p in model.parameters() if p.requires_grad)}')
My training function is:
def train(model, iterator = trainDataloader, optimizer = optimizer, loss_fn = criterion):
e_loss = e_acc = i = 0
model.train()
for inputs, leng, labels in iterator:
inputs, leng, labels = inputs.to(device), leng.to(device), labels.to(device)
optimizer.zero_grad()
preds = model(inputs, leng).squeeze(1)
loss = loss_fn(preds, labels.long())
acc = accuracy(preds, labels)
loss.backward()
optimizer.step()
e_loss += loss.item()
e_acc += acc.item()
i += 1
return e_loss/i, e_acc/i
def predict(model, iterator = testDataloader, loss_fn = criterion):
e_loss = e_acc = i = 0
model.eval()
with torch.no_grad():
for inputs, leng, labels in iterator:
inputs, leng, labels = inputs.to(device), leng.to(device), labels.to(device)
preds = model(inputs, leng).squeeze(1)
loss = loss_fn(preds, labels.long())
acc = accuracy(preds, labels)
e_loss += loss.item()
e_acc += acc.item()
i += 1
return e_loss/i, e_acc/i
N_EPOCHS = 20
best_valid_loss = float('inf')
for epoch in range(N_EPOCHS):
start_time = time.time()
train_loss, train_acc = train(model)
valid_loss, valid_acc = predict(model)
end_time = time.time()
epoch_mins, epoch_secs = epoch_time(start_time, end_time)
if valid_loss < best_valid_loss:
best_valid_loss = valid_loss
torch.save(model.state_dict(), 'tut1-model.pt')
print(f'Epoch: {epoch+1:02} / {N_EPOCHS} | Epoch Time: {epoch_mins}m {epoch_secs}s')
print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
print(f'\t Val. Loss: {valid_loss:.3f} | Val. Acc: {valid_acc*100:.2f}%')
As a Pytorch newbie (coming from tensorflow), I am unsure of how to implement Early Stopping. My research has led me discover that pytorch does not have a native way to so this. I have also discovered torchsample, but am unable to install it in my conda environment for whatever reason. Is there a simple way to go about applying early stopping without it? Here is my current setup:
class RegressionDataset(Dataset):
def __init__(self, X_data, y_data):
self.X_data = X_data
self.y_data = y_data
def __getitem__(self, index):
return self.X_data[index], self.y_data[index]
def __len__(self):
return len(self.X_data)
train_dataset = RegressionDataset(torch.from_numpy(X_train).float(), torch.from_numpy(y_train).float())
val_dataset = RegressionDataset(torch.from_numpy(X_val).float(), torch.from_numpy(y_val).float())
test_dataset = RegressionDataset(torch.from_numpy(X_test).float(), torch.from_numpy(y_test).float())
# Model Params
EPOCHS = 100
BATCH_SIZE = 1000
LEARNING_RATE = 0.001
NUM_FEATURES = np.shape(X_test)[1]
# Initialize Dataloader
train_loader = DataLoader(dataset = train_dataset, batch_size=BATCH_SIZE, shuffle = True)
val_loader = DataLoader(dataset = val_dataset, batch_size=BATCH_SIZE)
test_loader = DataLoader(dataset = test_dataset, batch_size=BATCH_SIZE)
# Define Neural Network Architecture
class MultipleRegression(nn.Module):
def __init__(self, num_features):
super(MultipleRegression, self).__init__()
# Define architecture
self.layer_1 = nn.Linear(num_features, 16)
self.layer_2 = nn.Linear(16, 32)
self.layer_3 = nn.Linear(32, 25)
self.layer_4 = nn.Linear(25, 20)
self.layer_5 = nn.Linear(20, 16)
self.layer_out = nn.Linear(16, 1)
self.relu = nn.ReLU() # ReLU applied to all layers
# Initialize weights and biases
nn.init.xavier_uniform_(self.layer_1.weight)
nn.init.zeros_(self.layer_1.bias)
nn.init.xavier_uniform_(self.layer_2.weight)
nn.init.zeros_(self.layer_2.bias)
nn.init.xavier_uniform_(self.layer_3.weight)
nn.init.zeros_(self.layer_3.bias)
nn.init.xavier_uniform_(self.layer_4.weight)
nn.init.zeros_(self.layer_4.bias)
nn.init.xavier_uniform_(self.layer_5.weight)
nn.init.zeros_(self.layer_5.bias)
nn.init.xavier_uniform_(self.layer_out.weight)
nn.init.zeros_(self.layer_out.bias)
def forward(self, inputs):
x = self.relu(self.layer_1(inputs))
x = self.relu(self.layer_2(x))
x = self.relu(self.layer_3(x))
x = self.relu(self.layer_4(x))
x = self.relu(self.layer_5(x))
x = self.layer_out(x)
return(x)
def predict(self, test_inputs):
x = self.relu(self.layer_1(test_inputs))
x = self.relu(self.layer_2(x))
x = self.relu(self.layer_3(x))
x = self.relu(self.layer_4(x))
x = self.relu(self.layer_5(x))
x = self.layer_out(x)
return(x)
# Check for GPU
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
model = MultipleRegression(NUM_FEATURES)
model.to(device)
print(model)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr = LEARNING_RATE)
# define dictionary to store loss/epochs for training and validation
loss_stats = {
"train": [],
"val": []
}
# begin training
print("Begin Training")
for e in tqdm(range(1, EPOCHS+1)):
# Training
train_epoch_loss = 0
model.train()
for X_train_batch, y_train_batch in train_loader:
X_train_batch, y_train_batch = X_train_batch.to(device), y_train_batch.to(device)
optimizer.zero_grad()
y_train_pred = model(X_train_batch)
train_loss = criterion(y_train_pred, y_train_batch.unsqueeze(1))
train_loss.backward()
optimizer.step()
train_epoch_loss += train_loss.item()
# validation
with torch.no_grad():
val_epoch_loss = 0
model.eval()
for X_val_batch, y_val_batch in val_loader:
X_val_batch, y_val_batch = X_val_batch.to(device), y_val_batch.to(device)
y_val_pred = model(X_val_batch)
val_loss = criterion(y_val_pred, y_val_batch.unsqueeze(1))
val_epoch_loss += val_loss.item()
loss_stats["train"].append(train_epoch_loss/len(train_loader))
loss_stats["val"].append(val_epoch_loss/len(val_loader))
print(f"Epoch {e}: \ Train loss: {train_epoch_loss/len(train_loader):.5f} \ Val loss: {val_epoch_loss/len(val_loader):.5f}")
# Visualize loss and accuracy
train_val_loss_df = pd.DataFrame.from_dict(loss_stats).reset_index().melt(id_vars=["index"]).rename(columns = {"index":"epochs"})
plt.figure()
sns.lineplot(data = train_val_loss_df, x = "epochs", y = "value", hue = "variable").set_title("Train-Val Loss/Epoch")
# Test model
y_pred_list = []
with torch.no_grad():
model.eval()
for X_batch, _ in test_loader:
X_batch = X_batch.to(device)
y_test_pred = model(X_batch)
y_pred_list.append(y_test_pred.cpu().numpy())
y_pred_list = [a.squeeze().tolist() for a in y_pred_list]
y_pred_list = [item for sublist in y_pred_list for item in sublist]
y_pred_list = np.array(y_pred_list)
mse = mean_squared_error(y_test, y_pred_list)
r_square = r2_score(y_test, y_pred_list)
print("Mean Squared Error :", mse)
print("R^2 :", r_square)
A basic way to do this is to keep track of the best validation loss obtained so far.
You can have a variable best_loss = 0 initialized before your loop over epochs (or you could do other things like best loss per epoch, etc.).
After each validation pass then do:
if val_loss > best_loss:
best_loss = val_loss
# At this point also save a snapshot of the current model
torch.save(model, 'my_model_best_loss.pth')
Then, if the best_loss does not improve significantly after some number of training steps, or by the end of the epoch, or if it val_loss gets worse, break out of the loop and terminate the training there.
For implementing algorithms like early stopping (and your training loop in general) you may find it easier to give PyTorch Lightning a try (no affiliation, but it's much easier than trying to roll everything by hand).
I am trying to create a logistic model by using CIFAR10 data in PyTorch. After running the model for evaluation I run into an error :
RuntimeError: size mismatch, m1: [750 x 4096], m2: [1024 x 10] at C:\w\1\s\tmp_conda_3.7_100118\conda\conda-bld\pytorch_1579082551706\work\aten\src\TH/generic/THTensorMath.cpp:136
It seems like input_size is creating a problem, I dont know I am new to this. Please let me know what changes should I make in order to overcome this error.
These are the hyperparameters:
batch_size = 100
learning_rate = 0.001
# Other constants
input_size = 4*4*64
num_classes = 10
This is the cell that downloads and splits the dataset into train, validation and test.
transform = torchvision.transforms.Compose(
[torchvision.transforms.ToTensor(),
torchvision.transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5))])
testset = torchvision.datasets.CIFAR10(root='D:\PyTorch\cifar-10-python', train=False,download=False, transform=transform)
trainvalset = torchvision.datasets.CIFAR10(root='D:\PyTorch\cifar-10-python', train=True,download=False, transform=transform)
trainset, valset = torch.utils.data.random_split(trainvalset, [45000, 5000]) # 10% for validation
train_loader = torch.utils.data.DataLoader(trainset, batch_size=50, shuffle=True)
test_loader = torch.utils.data.DataLoader(testset, batch_size=1000, shuffle=False)
val_loader = torch.utils.data.DataLoader(valset, batch_size=1000, shuffle=False)
This is the architecture of my model.
class CifarModel(nn.Module):
def __init__(self):
super().__init__()
self.linear = nn.Linear(input_size, num_classes)
def forward(self, xb):
xb = xb.view(-1, 64*8*8)
#xb = xb.reshape(-1, 784)
print(xb.shape)
out = self.linear(xb)
return out
def training_step(self, batch):
images, labels = batch
out = self(images) # Generate predictions
loss = F.cross_entropy(out, labels) # Calculate loss
return loss
def validation_step(self, batch):
images, labels = batch
out = self(images) # Generate predictions
loss = F.cross_entropy(out, labels) # Calculate loss
acc = accuracy(out, labels) # Calculate accuracy
return {'val_loss': loss.detach(), 'val_acc': acc.detach()}
def validation_epoch_end(self, outputs):
batch_losses = [x['val_loss'] for x in outputs]
epoch_loss = torch.stack(batch_losses).mean() # Combine losses
batch_accs = [x['val_acc'] for x in outputs]
epoch_acc = torch.stack(batch_accs).mean() # Combine accuracies
return {'val_loss': epoch_loss.item(), 'val_acc': epoch_acc.item()}
def epoch_end(self, epoch, result):
print("Epoch [{}], val_loss: {:.4f}, val_acc: {:.4f}".format(epoch, result['val_loss'], result['val_acc']))
model = CifarModel()
def accuracy(outputs, labels):
_, preds = torch.max(outputs, dim=1)
return torch.tensor(torch.sum(preds == labels).item() / len(preds))
def evaluate(model, val_loader):
outputs = [model.validation_step(batch) for batch in val_loader]
return model.validation_epoch_end(outputs)
def fit(epochs, lr, model, train_loader, val_loader, opt_func=torch.optim.SGD):
history = []
optimizer = opt_func(model.parameters(), lr)
for epoch in range(epochs):
# Training Phase
for batch in train_loader:
loss = model.training_step(batch)
loss.backward()
optimizer.step()
optimizer.zero_grad()
# Validation phase
result = evaluate(model, val_loader)
model.epoch_end(epoch, result)
history.append(result)
return history
evaluate(model, val_loader)
Here you are specifying that the number of output classes should be 10:
num_classes = 10
Your forward function does not reflect this:
xb = xb.view(-1, 64*8*8) # you get 750x4096
out = self.linear(xb) # here an input of
# input_size to linear layer = 4*4*64 # 1024
# num_classes = 10
Modify it like this:
xb = xb.view(-1, 64*4*4) # you get 750x1024
out = self.linear(xb) # M1 750x1024 M2 1024x10:
# input_size = 4*4*64 # 1024
# num_classes = 10
I have a time series data looking something like this:
I am trying to model this with a sequence to sequence RNN in pytorch. It trains well and I can see the loss going down. But on testing it gives the same out put irrespective of the input.
My Model:
class RNNModel(nn.Module):
def __init__(self, predictor_size, hidden_size, num_layers, dropout = 0.3, output_size=83):
super(RNNModel, self).__init__()
self.drop = nn.Dropout(dropout)
self.rnn = nn.GRU(predictor_size, hidden_size, num_layers=num_layers, dropout = dropout)
self.decoder = nn.Linear(hidden_size, output_size)
self.init_weights()
self.hidden_size = hidden_size
self.num_layers = num_layers
def init_weights(self):
initrange = 0.1
self.decoder.bias.data.fill_(0)
self.decoder.weight.data.uniform_(-initrange, initrange)
def forward(self, input, hidden):
output, hidden = self.rnn(input, hidden)
output = self.drop(output)
decoded = self.decoder(output.view(output.size(0) * output.size(1), output.size(2)))
return decoded.view(output.size(0), output.size(1), decoded.size(1)), hidden
def init_hidden(self, batch_size):
weight = next(self.parameters()).data
return Variable(weight.new(self.num_layers, batch_size, self.hidden_size).zero_())
Train Method:
def train(data_source, lr):
# turn on training mode that enables dropout
model.train()
total_loss = 0
hidden = model.init_hidden(bs_train)
optimizer = optim.Adam(model.parameters(), lr = lr)
for batch, i in enumerate(range(0, data_source.size(0) - 1, bptt_size)):
data, targets = get_batch(data_source, i)
# Starting each batch, we detach the hidden state from how it was previously produced
# so that model doesen't ry to backprop to all the way start of the dataset
# unrolling of the graph will go from the last iteration to the first iteration
hidden = Variable(hidden.data)
if cuda.is_available():
hidden = hidden.cuda()
optimizer.zero_grad()
output, hidden = model(data, hidden)
loss = criterion(output, targets)
loss.backward()
# clip_grad_norm to prevent gradient explosion
torch.nn.utils.clip_grad_norm(model.parameters(), clip)
optimizer.step()
total_loss += len(data) * loss.data
# return accumulated loss for all the iterations
return total_loss[0] / len(data_source)
Evaluation Method:
def evaluate(data_source):
# turn on evaluation to disable dropout
model.eval()
model.train(False)
total_loss = 0
hidden = model.init_hidden(bs_valid)
for i in range(0, data_source.size(0) - 1, bptt_size):
data, targets = get_batch(data_source, i, evaluation = True)
if cuda.is_available():
hidden = hidden.cuda()
output, hidden = model(data, hidden)
total_loss += len(data) * criterion(output, targets).data
hidden = Variable(hidden.data)
return total_loss[0]/len(data_source)
Training Loop:
best_val_loss = None
best_epoch = 0
def run(epochs, lr):
val_losses = []
num_epochs = []
global best_val_loss
global best_epoch
for epoch in range(0, epochs):
train_loss = train(train_set, lr)
val_loss = evaluate(test_set)
num_epochs.append(epoch)
val_losses.append(val_loss)
print("Train Loss: ", train_loss, " Validation Loss: ", val_loss)
if not best_val_loss or val_loss < best_val_loss:
best_val_loss = val_loss
torch.save(model.state_dict(), "./4.model.pth")
best_epoch = epoch
return num_epochs, val_losses
Loss with epochs:
Getting the output:
model = RNNModel(predictor_size, hidden_size, num_layers, dropout_pct, output_size)
model.load_state_dict(torch.load("./4.model.pth"))
if cuda.is_available():
model.cuda()
model.eval()
model.train(False)
hidden = model.init_hidden(1)
inp = torch.Tensor(var[105])
input = Variable(inp.contiguous().view(1,1,predictor_size), volatile=True)
if cuda.is_available():
input.data = input.data.cuda()
output, hidden = model(input, hidden)
op = output.squeeze().data.cpu()
print(op)
Here I always get the same output irrespective of datapoint I give as input. Can somebody please tell me what I am doing wrong.