Related
This is my code, and image of training accuracy
# !curl -L -s -o '/content/ESC-50-master-16k.tar' 'https://drive.google.com/uc?id=1hFt-qarD_Ihjb3jW7pd3tje2pcY5tfuJ&confirm=t'
# !tar -xvf "ESC-50-master-16k.tar"
from torch.utils.data import Dataset, TensorDataset, DataLoader
import torch
import torch.nn as nn
import torch.nn.functional as F
import librosa
import numpy as np
import pandas as pd
from glob import glob
Device Configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
Load ESC-50 Data & Preprocess
metadata = pd.read_csv("./ESC-50-master-16k/meta/esc50.csv")
wav_list = sorted(glob("./ESC-50-master-16k/resample/\*.wav"))
def spec_to_image(spec, eps=1e-6):
mean = spec.mean()
std = spec.std()
spec_norm = (spec - mean) / (std + eps)
spec_min, spec_max = spec_norm.min(), spec_norm.max()
spec_scaled = 255 \* (spec_norm - spec_min) / (spec_max - spec_min)
spec_scaled = spec_scaled.astype(np.uint8)
return spec_scaled
class esc50dataset(Dataset):
def __init__(self, wavlist, metadata):
self.labels = np.array(metadata.target).astype(int)
self.audio = \[\]
for f in wavlist:
wav, sr = librosa.load(f, sr=None)
spec=librosa.feature.melspectrogram(wav, sr=sr, n_fft=1024, hop_length=640, n_mels=126) # \[126,126\]
spec_db=librosa.power_to_db(spec,top_db=80)
spec_image = np.expand_dims(spec_to_image(spec_db), axis=0)
self.audio.append(spec_image.tolist())
self.audio = np.array(self.audio)
def __len__(self):
return len(self.audio)
def __getitem__(self, idx):
return self.audio\[idx\], self.labels\[idx\]
dataset = esc50dataset(wav_list, metadata)
features = dataset\[:\]\[0\]
labels = dataset\[:\]\[1\]
train test split
train_size = 0.7
val_size = 0.5
`make train set`
split_id = int(len(features) \* train_size) # 2000 \* 0.7 = 1400
train_features, remain_features = features\[:split_id\], features\[split_id:\] # train_features = 1400, remain_features = 600
train_labels, remain_labels = labels\[:split_id\], labels\[split_id:\] # train_labels = 1400, remain_labels = 600
`make val and test set`
split_val_id = int(len(remain_features) \* val_size) # 600 \* 0.5 = 300
val_features, test_features = remain_features\[:split_val_id\], remain_features\[split_val_id:\] # val_features = 300, test_features = 300
val_labels, test_labels = remain_labels\[:split_val_id\], remain_labels\[split_val_id:\] # val_labels = 300, test_labels = 300
`define batch size`
batch_size = 32
`create tensor datasets`
train_set = TensorDataset(torch.from_numpy(train_features), torch.from_numpy(train_labels))
valid_set = TensorDataset(torch.from_numpy(val_features), torch.from_numpy(val_labels))
test_set = TensorDataset(torch.from_numpy(test_features), torch.from_numpy(test_labels))
`create dataloaders`
train_loader = DataLoader(train_set, shuffle=True, batch_size=batch_size) # 1400 / 32
val_loader = DataLoader(valid_set, shuffle=True, batch_size=batch_size) # 300 / 32
test_loader = DataLoader(test_set, shuffle=True, batch_size=batch_size) # 300 / 32
`Model Hyperparameter`
in_channel = features\[0\].shape\[0\] # in_channel = 1
max_pool_kernel = 3
hidden_size = 256
hidden_layers = 10
output_classes = labels.max() + 1 # output_classes = 50
dropout = 0.25
learning_rate = 0.001
num_epochs = 10
Model
class CRNN(nn.Module):
def __init__(self, in_channel, hidden_size, hidden_layers, output_classes, dropout):
super(CRNN, self).__init__()
self.hidden_layers = hidden_layers
self.hidden_size = hidden_size
self.CNN_layer1 = nn.Sequential(
nn.Conv2d(in_channels=in_channel, out_channels=32, kernel_size=5, stride=1, padding=2),
nn.BatchNorm2d(num_features=32),
nn.ReLU(),
nn.MaxPool2d(kernel_size=max_pool_kernel), # 32, 42, 42
)
self.CNN_layer2 = nn.Sequential(
nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5, stride=1, padding=2),
nn.BatchNorm2d(num_features=64),
nn.ReLU(),
nn.MaxPool2d(kernel_size=max_pool_kernel), # 64, 14, 14
)
self.lstm = nn.LSTM(input_size=64, hidden_size=hidden_size, num_layers=hidden_layers, batch_first=True)
self.dropout = nn.Dropout(dropout)
self.fc = nn.Linear(hidden_size, output_classes)
self.sigmoid = nn.Sigmoid()
def forward(self, x):
x = x.float()
# print("start : {}".format(x))
x = self.CNN_layer1(x) # 1, 126, 126 -> 32, 42, 42
# print("after CNN_1 : {}".format(x))
x = self.CNN_layer2(x) # 32, 42, 42 -> 64, 14, 14
# print("after CNN_2 : {}".format(x))
x = x.reshape(x.size(0), -1, 64) # 64, 14, 14 -> 12544 , x.size(0) = Batchsize = 32
# print("after reshape : {}".format(x))
h0 = torch.zeros(self.hidden_layers, x.size(0), self.hidden_size).to(device) # torch.size([10, 32(batch_size), 512])
c0 = torch.zeros(self.hidden_layers, x.size(0), self.hidden_size).to(device) # torch.size([10, 32(batch_size), 512])
x, _ = self.lstm(x, (h0, c0))
# print("after lstm : {}".format(x))
x = self.dropout(x)
# print("after dropout : {}".format(x))
x = self.fc(x[:,-1,:])
# print("after fc : {}".format(x))
x = self.sigmoid(x)
# print("after sigmoid : {}".format(x))
return x
model initialization
model = CRNN(in_channel, hidden_size, hidden_layers, output_classes, dropout)
Criterion & Optimizer
criterion = nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
Train
from tqdm import tqdm
tqdm.pandas()
tr_loss = \[\]
tr_acc = \[\]
v_loss = \[\]
v_acc = \[\]
best_valid_loss = torch.inf
best_epoch = 0
model = model.to(device)
epochloop = tqdm(range(num_epochs), position=0, desc='Training', leave=True)
for epoch in epochloop:
model.train()
train_loss = 0
train_acc = 0
epochloop.set_postfix_str(f'Training epoch {epoch}/{len(epochloop)}') # visualize
for idx, (audio, label) in enumerate(train_loader):
audio, label = audio.to(device), label.to(device)
out = model(audio)
\_, pred = torch.max(out.data, 1)
# acc
train_acc += (pred==label).sum()
# loss
optimizer.zero_grad()
loss = criterion(out, label)
train_loss += loss.item()
loss.backward()
optimizer.step()
Validation
model.eval()
val_loss = 0
val_acc = 0
with torch.no_grad():
for idx, (audio, label) in enumerate(val_loader):
epochloop.set_postfix_str(f'Validation batch {idx}/{len(val_loader)}')
audio, label = audio.to(device), label.to(device)
# forward pass
out = model(audio)
_, pred = torch.max(out.data, 1)
# acc
val_acc += (pred==label).sum()
# loss
loss = criterion(out, label)
val_loss += loss.item()
model.train()
save model if validation loss decrease
if val_loss / len(valid_set) \<= best_valid_loss:
best_valid_loss = val_loss / len(val_loader)
best_epoch = epoch
torch.save(model.state_dict(), "LSTM_epoch\_{}.pth".format(epoch))
print epoch loss & accuracy
print(f'Epoch {epoch+1}/{num_epochs} | Train Loss: {train_loss / len(train_loader):.3f} Train Acc: {train_acc / len(train_set) \* 100}% | Val Loss: {val_loss / len(val_loader):.3f} Val Acc: {val_acc / len(valid_set) \* 100}%')
tr_loss.append(train_loss / len(train_loader))
tr_acc.append(train_acc.cpu().numpy() / len(train_set) \* 100)
v_loss.append(val_loss / len(val_loader))
v_acc.append(val_acc.cpu().numpy() / len(valid_set) \* 100)
my training accuracy
Please tell me why my training accuracy is too low..
By your dataset, I'm inferring you are trying to identify sound data, and you are applying a CRNN. If you go to the repo Have you seen the repo for this dataset you will find a table with the accuracy for each method, the CRNN has about 60% accuracy, this is not so great since other methods achieve more than 95%.
You might need to change your model to get better results. Check the table you will find also some models with code
A bit difficult to give you an answer without reviewing your whole code, and re-running the model. Given this situation, I would suggest you create a Github repo, with appropriate the requirements.txt file for your code.
Even more helpful, and more likely to be answered would be if your code is in a jupyter notebook, and you can show the results of your loss curve and epochs. Your results could be more understandable shown like the output of tensorboard.
Hope this helps you to get some answers
I am trying to train a neural network. I have two classes. Precision and Recall for one of the classes equals 0 all the time.
Here's the code for the neural network.
class ConvNet(nn.Module):
def __init__(self):
super(ConvNet,self).__init__()
self.conv1 = nn.Sequential(
nn.Conv1d(
in_channels=1,
out_channels=200,
kernel_size=4,
stride=3,
padding = 0)
,nn.ReLU()
)
self.maxpool = nn.MaxPool1d(2)
random_input = torch.rand(1, 1, 1500 , requires_grad=False)
random_input = self.conv1(random_input)
random_input = self.maxpool(random_input)
maxpool_out = random_input.view(1, -1).shape[1]
self.fc1 = nn.Sequential(
nn.Linear(
in_features= maxpool_out,
out_features=200
),
nn.Dropout(p=0.05),
nn.ReLU()
)
self.fc2 = nn.Sequential(
nn.Linear(
in_features=200,
out_features=100
),
nn.Dropout(p=0.05),
nn.ReLU()
)
self.fc3 = nn.Sequential(
nn.Linear(
in_features=100,
out_features=50
),
nn.Dropout(p=0.05),
nn.ReLU()
)
self.lastlayer = nn.Linear(
in_features=50,
out_features=1
)
def forward(self,x):
#adding 1 dimention
x = x.unsqueeze(1)
#conv layers
x = self.conv1(x)
x = self.maxpool(x)
#flatten
x = x.reshape(x.shape[0], -1)
#3fc
x = self.fc1(x)
x = self.fc2(x)
x = self.fc3(x)
#output
x = self.lastlayer(x)
return x
Here's the training loop:
def binary_acc(y_pred, y_test):
y_pred_tag = torch.round(torch.sigmoid(y_pred))
correct_results_sum = (y_pred_tag == y_test).sum().float()
acc = correct_results_sum/y_test.shape[0]
acc = torch.round(acc * 100)
return acc
def Training(model, train_loader, criterion, optimizer, epochs):
train_losses = []
Acc =[]
for epoch in range(epochs):
epoch_accuracy = 0
train_loss = 0
total_pcaps = 0
model.train()
for elem in train_loader:
pcap_byte = elem['feature'].to(device)
labels = elem['label'].to(device)
optimizer.zero_grad()
outputs = model(pcap_byte)
loss = criterion(outputs, labels.unsqueeze(1).float())
loss.backward()
# Updating parameters
optimizer.step()
total_pcaps += labels.size(0)
acc = binary_acc(outputs, labels.unsqueeze(1).float()
train_loss += loss.item()
epoch_accuracy += acc.item()
Acc.append(epoch_accuracy)
average_loss = train_loss / len(train_loader)
train_losses.append(train_loss)
print('epoch %d, train_loss: %.3f' % (epoch + 1, average_loss))
After training The precision and recall of one of the classes equals 0 and the other one is pr = 1 and recall = 0.9.
Is there something wrong with the data? I self-collected the data and I can't understand whether the problem is with the data or something is wrong in my code.
The problem was with the data. I was using undersampling in the pre-processing step. I deleted that part and the model performed well.
This is my first time to train a model myself, and I have some serious problem.
I know this problem has already been discussed here.
https://discuss.pytorch.org/t/why-the-train-loss-and-train-acc-never-change/101334
But I still can't fix it.
My code has slightly changed a little compared to the original one, but the problems are the same as description below.
Problem:
The code can run but the train loss and train acc never change
train_loss = 0.69, train_acc = 0.5.
I think the model does not be trained, but I can’t find my fault.
I try all solution I can find, change lr, reset_parameters, normalize and so on.
Maybe the preprocessing? The image are Gray image
COVER = 1
STEGO = 0
class CustomImageDataset(Dataset):
# [
# (path_0, label_0),
# ...
# (path_n, label_n)
# ] tuple
def __init__(self, img_dir): #training_dataset & testing_dataset
self.data = []
self.readData(img_dir + '/cover_png', COVER)
self.readData(img_dir + '/stego_png', STEGO)
print(self.data)
def readData(self, path, label):
file_names = os.listdir(path)
for file_name in file_names:
file_path = path + '/' + file_name
self.data.append((file_path, label))
def __len__(self):
return len(self.data)
def __getitem__(self, idx):
img_path, label = self.data[idx]
image = read_image(img_path)
return image, label
def gaussian1(x):
mean = torch.mean(x)
std = torch.std(x)
return torch.exp(-((x-mean)**2)/(torch.std(x))**2)
def gaussian2(x):
mean = torch.mean(x)
std = torch.std(x)
return 0.5 * torch.exp(-((x-mean)**2)/(torch.std(x))**2)
KV = torch.tensor([[-1,2,-2,2,-1],
[2,-6,8,-6,2],
[-2,8,-12,8,-2],
[2,-6,8,-6,2],
[-1,2,-2,2,-1]])/12.
KV = KV.view(1,1,5,5).to(device=device, dtype=torch.float)
KV = torch.autograd.Variable(KV, requires_grad=False)
class GNCNN(nn.Module):
def __init__(self):
super(GNCNN, self).__init__()
self.gaussian1 = gaussian1
self.gaussian2 = gaussian2
self.conv1 = nn.Conv2d(1, 16, kernel_size=5, stride=1, padding=0, bias=True)
self.avg_pool1 = nn.AvgPool2d(kernel_size=3, stride=2, padding=1)
self.conv2 = nn.Conv2d(16, 16, kernel_size=3, stride=1, padding=0, bias=True)
self.avg_pool2 =nn.AvgPool2d(kernel_size=3, stride=2, padding=1)
self.conv3 = nn.Conv2d(16, 16, kernel_size=3, stride=1, padding=0, bias=True)
self.avg_pool3 = nn.AvgPool2d(kernel_size=3, stride=2, padding=0)
self.conv4 = nn.Conv2d(16, 16, kernel_size=3, stride=1, padding=0, bias=True)
self.avg_pool4 = nn.AvgPool2d(kernel_size=3, stride=2, padding=0)
self.conv5 = nn.Conv2d(16, 16, kernel_size=5, stride=1, padding=0, bias=True)
self.avg_pool5 = nn.AvgPool2d(kernel_size=3, stride=2, padding=0)
self.fc1 = nn.Linear(16*4*4, 128)
self.fc2 = nn.Linear(128, 128)
self.fc3 = nn.Linear(128, 2)
self.reset_parameters()
def forward(self, x):
prep = F.conv2d(x, KV, padding=2) # preprocessing
out = self.avg_pool1(gaussian1(self.conv1(prep)))
out = self.avg_pool2(gaussian2(self.conv2(out)))
out = self.avg_pool3(gaussian2(self.conv3(out)))
out = self.avg_pool4(gaussian2(self.conv4(out)))
out = self.avg_pool5(gaussian2(self.conv5(out)))
out = out.view(out.size(0), -1)
out = F.relu(self.fc1(out))
out = F.relu(self.fc2(out))
out = self.fc3(out)
return out
def reset_parameters(self):
for mod in self.modules():
if isinstance(mod, nn.Conv2d):
nn.init.xavier_uniform_(self.conv1.weight)
elif isinstance(mod, nn.Linear):
nn.init.kaiming_normal_(mod.weight.data)
def accuracy(outputs, labels):
_, argmax = torch.max(outputs, 1)
return (labels == argmax.squeeze()).float().mean()
def default_loader(path):
try:
img = Image.open(path)
return img.convert('RGB')
except:
print("Cannot read image: {}".format(path))
def train_model(model, criterion, optimizer, num_epochs, batch_size, use_gpu):
since = time.time()
best_model_wts = model.state_dict()
best_acc = 0.0
val_acc_history = []
val_loss_history = []
is_best = False
for epoch in range(num_epochs):
begin_time = time.time()
print('Epoch {}/{}'.format(epoch, num_epochs - 1))
print('-' * 10)
count_batch = 0
epoch_loss = 0.
epoch_accuracy = 0.
running_loss = 0.
running_accuracy = 0.
val_loss = 0.
val_accuracy = 0.
test_loss = 0.
test_accuracy = 0.
for inputs, labels in train_dataloader:
# print(inputs[0].shape)
# for data in train_loader:
# # count_batch += 1
# inputs, labels = data
inputs = inputs.float()
if use_gpu:
inputs = Variable(inputs.cuda())
labels = Variable(labels.cuda())
else:
inputs, labels = Variable(inputs), Variable(labels)
optimizer.zero_grad()
outputs = model(inputs)
###
accuracys = accuracy(outputs, labels).item()
print("accuracy = ", accuracys)
running_accuracy += accuracys
epoch_accuracy += accuracys
###
loss = criterion(outputs, labels)
running_loss += loss.item()
epoch_loss += loss.item()
loss.backward()
optimizer.step()
print("epch loss = ", epoch_loss)
epoch_loss /= (train_sizes/batch_size)
train_loss_history.append(epoch_loss)
print("epch acc = ", epoch_accuracy)
#epoch_accuracy /= train_sizes
epoch_accuracy /= (train_sizes / batch_size)
train_acc_history.append(epoch_accuracy)
print('\nTrain: Epoch [{}] Loss: {:.4f} Acc: {:.4f}%'.format(epoch, epoch_loss, 100*epoch_accuracy))
print('\nTrain: Epoch [{}] Loss: {:.4f}'.format(epoch, epoch_loss))
if __name__ == '__main__':
use_gpu = torch.cuda.is_available()
batch_size = 10
learning_rate = 0.001
# atransforms = transforms.Compose([
# transforms.Resize((256, 256)),
# transforms.Grayscale(1),
# # transforms.RandomResizedCrop(224),
# transforms.RandomHorizontalFlip(),
# transforms.ToTensor(),
# transforms.Normalize([0.485, ], [0.229, ])
# ])
training_dataset = CustomImageDataset('/content/gdrive/MyDrive/Topic_Research/toy_BOSSBase/train')
testing_dataset = CustomImageDataset('/content/gdrive/MyDrive/Topic_Research/toy_BOSSBase/test')
train_sizes = len(training_dataset)
print(train_sizes)
train_dataloader = DataLoader(training_dataset, batch_size, shuffle=True)
test_dataloader = DataLoader(testing_dataset, batch_size, shuffle=True)
model = GNCNN()
################
#model.reset_parameters()
if use_gpu:
model = model.cuda()
criterion = nn.CrossEntropyLoss().cuda()
optimizer = optim.SGD(model.parameters(), lr = learning_rate, momentum=0.9)
train_acc_history = []
train_loss_history = []
model = train_model(model=model, criterion=criterion, optimizer=optimizer,
num_epochs=10, batch_size=batch_size, use_gpu=use_gpu)
We are using the BOSSbase_1.01 dataset,HUGO algorithm 0.4bpp and 8000
train set, 2000 val set, epoch = 50 but always get train_acc = 50% and
train_loss = 69% our result
I made a CNN model for emotion recognition on 5 emotions. I wanted to test it on an single image to get the individual class predictions for each emotion.
Evaluating the model works, but I can't seem to find how to make a prediction with a single image. How can I do that?
The Model
def conv_block(in_channels, out_channels, pool=False):
layers = [nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
nn.BatchNorm2d(out_channels),
nn.ELU(inplace=True)]
if pool: layers.append(nn.MaxPool2d(2))
return nn.Sequential(*layers)
class ResNet(ImageClassificationBase):
def __init__(self, in_channels, num_classes):
super().__init__()
self.conv1 = conv_block(in_channels, 128)
self.conv2 = conv_block(128, 128, pool=True)
self.res1 = nn.Sequential(conv_block(128, 128), conv_block(128, 128))
self.drop1 = nn.Dropout(0.5)
self.conv3 = conv_block(128, 256)
self.conv4 = conv_block(256, 256, pool=True)
self.res2 = nn.Sequential(conv_block(256, 256), conv_block(256, 256))
self.drop2 = nn.Dropout(0.5)
self.conv5 = conv_block(256, 512)
self.conv6 = conv_block(512, 512, pool=True)
self.res3 = nn.Sequential(conv_block(512, 512), conv_block(512, 512))
self.drop3 = nn.Dropout(0.5)
self.classifier = nn.Sequential(nn.MaxPool2d(6),
nn.Flatten(),
nn.Linear(512, num_classes))
def forward(self, xb):
out = self.conv1(xb)
out = self.conv2(out)
out = self.res1(out) + out
out = self.drop1(out)
out = self.conv3(out)
out = self.conv4(out)
out = self.res2(out) + out
out = self.drop2(out)
out = self.conv5(out)
out = self.conv6(out)
out = self.res3(out) + out
out = self.drop3(out)
out = self.classifier(out)
return out
The fit_one_cycle function is called to train the model
#torch.no_grad()
def evaluate(model, val_loader):
model.eval()
outputs = [model.validation_step(batch) for batch in val_loader]
return model.validation_epoch_end(outputs)
def get_lr(optimizer):
for param_group in optimizer.param_groups:
return param_group['lr']
def fit_one_cycle(epochs, max_lr, model, train_loader, val_loader,
weight_decay=0, grad_clip=None, opt_func=torch.optim.SGD):
torch.cuda.empty_cache()
history = []
# Set up custom optimizer with weight decay
optimizer = opt_func(model.parameters(), max_lr, weight_decay=weight_decay)
# Set up one-cycle learning rate scheduler
sched = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr, epochs=epochs,
steps_per_epoch=len(train_loader))
for epoch in range(epochs):
# Training Phase
model.train()
train_losses = []
lrs = []
for batch in train_loader:
loss = model.training_step(batch)
train_losses.append(loss)
loss.backward()
# Gradient clipping
if grad_clip:
nn.utils.clip_grad_value_(model.parameters(), grad_clip)
optimizer.step()
optimizer.zero_grad()
# Record & update learning rate
lrs.append(get_lr(optimizer))
sched.step()
# Validation phase
result = evaluate(model, val_loader)
result['train_loss'] = torch.stack(train_losses).mean().item()
result['lrs'] = lrs
model.epoch_end(epoch, result)
history.append(result)
return history
This returns the accuracy and loss, i want to change this so it returns prediction percentages for each class.
def accuracy(outputs, labels):
_, preds = torch.max(outputs, dim=1)
return torch.tensor(torch.sum(preds == labels).item() / len(preds))
class ImageClassificationBase(nn.Module):
def training_step(self, batch):
images, labels = batch
out = self(images)
loss = F.cross_entropy(out, labels)
return loss
def validation_step(self, batch):
images, labels = batch
out = self(images)
loss = F.cross_entropy(out, labels)
acc = accuracy(out, labels)
return {'val_loss': loss, 'val_acc': acc}
def validation_epoch_end(self, outputs):
batch_losses = [x['val_loss'] for x in outputs]
epoch_loss = torch.stack(batch_losses).mean()
batch_accs = [x['val_acc'] for x in outputs]
epoch_acc = torch.stack(batch_accs).mean()
return {'val_loss': epoch_loss.item(), 'val_acc': epoch_acc.item()}
def epoch_end(self, epoch, result):
print("Epoch [{}], last_lr: {:.5f}, train_loss: {:.4f}, val_loss: {:.4f}, val_acc: {:.4f}".format(
epoch, result['lrs'][-1], result['train_loss'], result['val_loss'], result['val_acc']))
Evaluating the model works, but I can't seem to find how to make a
prediction with a single image. How can I do that?
Simply, if you have a single image make sure to:
use additional 1 dimension at the beginning
make sure to use CHW format instead of HWC (or specify that within pytorch, check out how to do that here)
For example:
my_model = CNN(...)
random_image = torch.randn(1, 3, 100, 100) # 3 channels, 100x100 img
BTW. Your accuracy could be written a little simpler like this:
def accuracy(outputs, labels):
preds = torch.argmax(outputs, dim=1)
return torch.sum(preds == labels) / len(preds)
Getting class probability
Similar to argmax you can use softmax which transforms logits (unnormalized probability outputted by your network) into probabilities:
def probability(outputs):
return torch.nn.functional.softmax(outputs, dim=1)
I set my model and data to the same device,
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
net.to(device)
and I also do this:
inputs, labels = data
inputs, labels = inputs.to(device), labels.to(device)
but the error still exists. When it comes to 5000 times or more, the error will take place.
RuntimeError: Input type (torch.FloatTensor) and weight type (torch.cuda.FloatTensor) should be the same
The following is the whole training code, I hope you can answer it. Thanks!
import torch
import os
import torchvision.transforms as transforms
from PIL import Image
from torch import nn
from torch.utils.data import Dataset, DataLoader
captcha_list = list('0123456789abcdefghijklmnopqrstuvwxyz_')
captcha_length = 6
# 验证码文本转为向量
def text2vec(text):
vector = torch.zeros((captcha_length, len(captcha_list)))
text_len = len(text)
if text_len > captcha_length:
raise ValueError("验证码超过6位啦!")
for i in range(text_len):
vector[i,captcha_list.index(text[i])] = 1
return vector
# 验证码向量转为文本
def vec2text(vec):
label = torch.nn.functional.softmax(vec, dim =1)
vec = torch.argmax(label, dim=1)
for v in vec:
text_list = [captcha_list[v] for v in vec]
return ''.join(text_list)
# 加载所有图片,并将验证码向量化
def make_dataset(data_path):
img_names = os.listdir(data_path)
samples = []
for img_name in img_names:
img_path = data_path+img_name
target_str = img_name.split('_')[0].lower()
samples.append((img_path, target_str))
return samples
class CaptchaData(Dataset):
def __init__(self, data_path, transform=None):
super(Dataset, self).__init__()
self.transform = transform
self.samples = make_dataset(data_path)
def __len__(self):
return len(self.samples)
def __getitem__(self, index):
img_path, target = self.samples[index]
target = text2vec(target)
target = target.view(1, -1)[0]
img = Image.open(img_path)
img = img.resize((140,44))
img = img.convert('RGB') # img转成向量
if self.transform is not None:
img = self.transform(img)
return img, target
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
# 第一层神经网络
# nn.Sequential: 将里面的模块依次加入到神经网络中
self.layer1 = nn.Sequential(
nn.Conv2d(3, 16, kernel_size=3, padding=1), # 3通道变成16通道,图片:44*140
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2) # 图片:22*70
)
# 第2层神经网络
self.layer2 = nn.Sequential(
nn.Conv2d(16, 64, kernel_size=3), # 16通道变成64通道,图片:20*68
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2) # 图片:10*34
)
# 第3层神经网络
self.layer3 = nn.Sequential(
nn.Conv2d(64, 128, kernel_size=3), # 16通道变成64通道,图片:8*32
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2) # 图片:4*16
)
# 第4层神经网络
self.fc1 = nn.Sequential(
nn.Linear(4*16*128, 1024),
nn.Dropout(0.2), # drop 20% of the neuron
nn.ReLU()
)
# 第5层神经网络
self.fc2 = nn.Linear(1024, 6*37) # 6:验证码的长度, 37: 字母列表的长度
#前向传播
def forward(self, x):
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = x.view(x.size(0), -1)
x = self.fc1(x)
x = self.fc2(x)
return x
net = Net()
def calculat_acc(output, target):
output, target = output.view(-1, len(captcha_list)), target.view(-1, len(captcha_list)) # 每37个就是一个字符
output = nn.functional.softmax(output, dim=1)
output = torch.argmax(output, dim=1)
target = torch.argmax(target, dim=1)
output, target = output.view(-1, captcha_length), target.view(-1, captcha_length) #每6个字符是一个验证码
c = 0
for i, j in zip(target, output):
if torch.equal(i, j):
c += 1
acc = c / output.size()[0] * 100
return acc
def train(epoch_nums):
# 数据准备
transform = transforms.Compose([transforms.ToTensor()]) # 不做数据增强和标准化了
train_dataset = CaptchaData('./sougou_com_Trains/', transform=transform)
train_data_loader = DataLoader(train_dataset, batch_size=32, num_workers=0, shuffle=True, drop_last=True)
test_data = CaptchaData('./sougou_com_Trains/', transform=transform)
test_data_loader = DataLoader(test_data, batch_size=128, num_workers=0, shuffle=True, drop_last=True)
# 更换设备
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print('当前设备是:',device)
net.to(device)
criterion = nn.MultiLabelSoftMarginLoss() # 损失函数
optimizer = torch.optim.Adam(net.parameters(), lr=0.001) # 优化器
# 加载模型
model_path = './module_build/model.pth'
if os.path.exists(model_path):
print('开始加载模型')
checkpoint = torch.load(model_path)
net.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
# 开始训练
i = 1
for epoch in range(epoch_nums):
running_loss = 0.0
net.train() # 神经网络开启训练模式
for data in train_data_loader:
if i%100==0:
print(i)
inputs, labels = data
inputs, labels = inputs.to(device), labels.to(device) #数据发送到指定设备
#每次迭代都要把梯度置零
optimizer.zero_grad()
# 关键步骤
# 前向传播
outputs = net(inputs)
# 计算误差
loss = criterion(outputs, labels)
# 后向传播
loss.backward()
# 优化参数
optimizer.step()
running_loss += loss.item()
if i % 2000 == 0:
acc = calculat_acc(outputs, labels)
print('第%s次训练正确率: %.3f %%, loss: %.3f' % (i,acc,running_loss/2000))
running_loss = 0
# 保存模型
torch.save({
'model_state_dict':net.state_dict(),
'optimizer_state_dict':optimizer.state_dict(),
},model_path)
i += 1
# 结束一个epoch,计算测试集的正确率
net.eval() #测试模式
with torch.no_grad():
for inputs, labels in test_data_loader:
outputs = net(inputs)
acc = calculat_acc(outputs, labels)
print('测试集正确率: %.3f %%' % (acc))
break # 只测试一个batch
# 每5个epoch 更新学习率
if epoch % 5 == 4:
for p in optimizer.param_groups:
p['lr'] *= 0.9
train(10)
In evaluation part: Do this
net.eval() #测试模式
with torch.no_grad():
for inputs, labels in test_data_loader:
inputs, labels = inputs.to(device), labels.to(device)
outputs = net(inputs)
acc = calculat_acc(outputs, labels)
print('测试集正确率: %.3f %%' % (acc))
break # 只测试一个batch
This will work since you dint move your data from test loader to device.