The loss function is MSE which is not decreasing (2.32 to 2.24). What is the problem AE architecture or the way I train the AE? After 100 epochs the loss doesn't change. Is the input data(200,1,52) can't be compressed? Should I increase the compressed data size(200,16) by changing the encoder architecture?
# Standard Torch Packages
import torch
from sklearn.model_selection import train_test_split
from torch.nn import functional as F
from torch import optim
from os.path import join, exists
from os import mkdir
import numpy as np
import glob
import matplotlib.pyplot as plt
# Import own functions
from ae_model import AE
from learning import EarlyStopping, ReduceLROnPlateau, LSIZE
# Define parameters
num_epochs = 50
rollout_path = "data/rollouts/rollout_*.npz"
logdir = "data/"
X = []
for x in glob.glob(rollout_path):
data_point = np.load(x, allow_pickle=True)
X.append(data_point)
train_loader, test_loader = train_test_split(X, test_size=0.2, shuffle= False)
cuda = torch.cuda.is_available()
device = torch.device("cuda" if cuda else "cpu")
print(device)
model = AE(LSIZE).to(device)
#optimizer = optim.Adam(model.parameters())
optimizer = torch.optim.Adam(model.parameters(),
lr = 1e-1,
weight_decay = 1e-8)
scheduler = ReduceLROnPlateau(optimizer, "min", factor=0.5, patience=5)
earlystopping = EarlyStopping("min", patience=3)
# Validation using MSE Loss function
loss_function = torch.nn.MSELoss()
def train(epoch):
""" One training epoch """
model.train()
train_loss = []
#train_data_points = 0
for batch_idx, rollout in enumerate(train_loader):
data = torch.tensor(rollout["data"]).to(device)
train_data_points = len(data) * len(train_loader)
#recon_batch, mu, logvar = model(data)
recon_batch, _ = model(data)
loss = loss_function(recon_batch, data)
optimizer.zero_grad()
loss.backward()
#train_loss += loss.item()
optimizer.step()
plot_train_data.append(loss.item())
# if batch_idx % 20 == 0:
# print(
# "Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}".format(
# epoch,
# batch_idx * len(data),
# train_data_points,
# 100.0 * batch_idx / len(train_loader),
# loss.item() / len(data),
# )
# )
# train_loss /= train_data_points
# print(
# "====> Epoch: {} Average loss: {:.4f}".format(
# epoch, train_loss / train_data_points
# )
print(
"====> Epoch: {} batchId: {} Average loss: {:.4f}".format(
epoch, batch_idx, loss.item()
))
# )
return train_loss
def test():
model.eval()
test_loss = 0
test_data_points = 0
with torch.no_grad():
for rollout in test_loader:
data = torch.tensor(rollout["data"]).to(device)
test_data_points = len(data) * len(test_loader)
#recon_batch, mu, logvar = model(data)
recon_batch, _ = model(data)
test_loss += loss_function(recon_batch, data).item()
test_loss /= test_data_points
print("====> Test set loss: {:.4f}".format(test_loss))
return test_loss
def save_checkpoint(state, is_best, filename, best_filename):
""" Save state in filename. Also save in best_filename if is_best. """
torch.save(state, filename)
if is_best:
torch.save(state, best_filename)
# check vae dir exists, if not, create it
ae_dir = join(logdir, "ae_gpu_run_false")
if not exists(ae_dir):
mkdir(ae_dir)
reload_file = join(ae_dir, "best.tar")
noreload = False
if not noreload and exists(reload_file):
state = torch.load(reload_file)
print(
"Reloading model at epoch {}"
", with test error {}".format(state["epoch"], state["precision"])
)
model.load_state_dict(state["state_dict"])
optimizer.load_state_dict(state["optimizer"])
scheduler.load_state_dict(state["scheduler"])
earlystopping.load_state_dict(state["earlystopping"])
cur_best = None
plot_train_data = []
plot_test_data = []
for epoch in range(1, num_epochs + 1):
#plot_train_data.append(train(epoch))
train(epoch)
test_loss = test()
scheduler.step(test_loss)
earlystopping.step(test_loss)
# checkpointing
best_filename = join(ae_dir, "best.tar")
filename = join(ae_dir, "checkpoint.tar")
is_best = not cur_best or test_loss < cur_best
if is_best:
cur_best = test_loss
save_checkpoint(
{
"epoch": epoch,
"state_dict": model.state_dict(),
"precision": test_loss,
"optimizer": optimizer.state_dict(),
"scheduler": scheduler.state_dict(),
"earlystopping": earlystopping.state_dict(),
},
is_best,
filename,
best_filename,
)
if earlystopping.stop:
print("End of Training because of early stopping at epoch {}".format(epoch))
break
test_plot_path = join(ae_dir, "test_fig.png")
# legend_strings = []
plt.title("AE Training and Test")
#plt.xlabel("Epochs")
#plt.ylabel("MSE losses")
#plt.plot(plot_test_data)
# legend_strings.append('Test')
#plt.legend('Test')
#plt.savefig(test_plot_path)
#plt.close()
#train_plot_path = join(ae_dir, "train_fig.png")
#plt.title("AE ")
#plt.xlabel("Epochs")
#plt.ylabel("MSE Loss")
plt.plot(plot_train_data)
# legend_strings.append('Train')
#plt.legend('Train')
plt.xticks(range(0, len(plot_train_data), 75))
plt.savefig(test_plot_path)
plt.close()
Below is the encoder and decoder model:
import torch
import torch.nn as nn
import torch.nn.functional as F
reduced_size = 22
class Decoder(nn.Module):
""" VAE decoder """
def __init__(self, latent_size):
super(Decoder, self).__init__()
self.latent_size = latent_size
self.fc1 = nn.Linear(latent_size, reduced_size)
self.deconv1 = nn.ConvTranspose1d(16, 32, 1, stride=1)
self.deconv2 = nn.ConvTranspose1d(32, 52, 1, stride=1)
def forward(self, x): # pylint: disable=arguments-differ
x = x.unsqueeze(2)
x = F.relu(self.deconv1(x))
x = torch.sigmoid(self.deconv2(x))
x = x.view(x.size(0), x.size(2), x.size(1))
return x
class Encoder(nn.Module): # pylint: disable=too-many-instance-attributes
""" VAE encoder """
def __init__(self, latent_size):
super(Encoder, self).__init__()
# input shape (200, 1, 52)
# batch_size, in_channel, len_channel
self.latent_size = latent_size
self.conv1 = nn.Conv1d(52, 32, 1, stride=1)
self.conv2 = nn.Conv1d(32, 16, 1, stride=1)
# output shape (200, 1, x)
self.fc_mu = nn.Linear(reduced_size, latent_size)
def forward(self, x): # pylint: disable=arguments-differ
x = x.view(x.size(0), x.size(2), x.size(1))
x = F.relu(self.conv1(x))
x = F.relu(self.conv2(x))
x = x.view(x.size(0), -1)
return x
class AE(nn.Module):
""" Variational Autoencoder """
def __init__(self, latent_size):
super(AE, self).__init__()
self.encoder = Encoder(latent_size)
self.decoder = Decoder(latent_size)
def forward(self, x): # pylint: disable=arguments-differ
x = x.unsqueeze(1)
encoded = self.encoder(x)
decoded = self.decoder(encoded)
decoded = decoded.squeeze(1)
return decoded, encoded
Related
This is my code, and image of training accuracy
# !curl -L -s -o '/content/ESC-50-master-16k.tar' 'https://drive.google.com/uc?id=1hFt-qarD_Ihjb3jW7pd3tje2pcY5tfuJ&confirm=t'
# !tar -xvf "ESC-50-master-16k.tar"
from torch.utils.data import Dataset, TensorDataset, DataLoader
import torch
import torch.nn as nn
import torch.nn.functional as F
import librosa
import numpy as np
import pandas as pd
from glob import glob
Device Configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
Load ESC-50 Data & Preprocess
metadata = pd.read_csv("./ESC-50-master-16k/meta/esc50.csv")
wav_list = sorted(glob("./ESC-50-master-16k/resample/\*.wav"))
def spec_to_image(spec, eps=1e-6):
mean = spec.mean()
std = spec.std()
spec_norm = (spec - mean) / (std + eps)
spec_min, spec_max = spec_norm.min(), spec_norm.max()
spec_scaled = 255 \* (spec_norm - spec_min) / (spec_max - spec_min)
spec_scaled = spec_scaled.astype(np.uint8)
return spec_scaled
class esc50dataset(Dataset):
def __init__(self, wavlist, metadata):
self.labels = np.array(metadata.target).astype(int)
self.audio = \[\]
for f in wavlist:
wav, sr = librosa.load(f, sr=None)
spec=librosa.feature.melspectrogram(wav, sr=sr, n_fft=1024, hop_length=640, n_mels=126) # \[126,126\]
spec_db=librosa.power_to_db(spec,top_db=80)
spec_image = np.expand_dims(spec_to_image(spec_db), axis=0)
self.audio.append(spec_image.tolist())
self.audio = np.array(self.audio)
def __len__(self):
return len(self.audio)
def __getitem__(self, idx):
return self.audio\[idx\], self.labels\[idx\]
dataset = esc50dataset(wav_list, metadata)
features = dataset\[:\]\[0\]
labels = dataset\[:\]\[1\]
train test split
train_size = 0.7
val_size = 0.5
`make train set`
split_id = int(len(features) \* train_size) # 2000 \* 0.7 = 1400
train_features, remain_features = features\[:split_id\], features\[split_id:\] # train_features = 1400, remain_features = 600
train_labels, remain_labels = labels\[:split_id\], labels\[split_id:\] # train_labels = 1400, remain_labels = 600
`make val and test set`
split_val_id = int(len(remain_features) \* val_size) # 600 \* 0.5 = 300
val_features, test_features = remain_features\[:split_val_id\], remain_features\[split_val_id:\] # val_features = 300, test_features = 300
val_labels, test_labels = remain_labels\[:split_val_id\], remain_labels\[split_val_id:\] # val_labels = 300, test_labels = 300
`define batch size`
batch_size = 32
`create tensor datasets`
train_set = TensorDataset(torch.from_numpy(train_features), torch.from_numpy(train_labels))
valid_set = TensorDataset(torch.from_numpy(val_features), torch.from_numpy(val_labels))
test_set = TensorDataset(torch.from_numpy(test_features), torch.from_numpy(test_labels))
`create dataloaders`
train_loader = DataLoader(train_set, shuffle=True, batch_size=batch_size) # 1400 / 32
val_loader = DataLoader(valid_set, shuffle=True, batch_size=batch_size) # 300 / 32
test_loader = DataLoader(test_set, shuffle=True, batch_size=batch_size) # 300 / 32
`Model Hyperparameter`
in_channel = features\[0\].shape\[0\] # in_channel = 1
max_pool_kernel = 3
hidden_size = 256
hidden_layers = 10
output_classes = labels.max() + 1 # output_classes = 50
dropout = 0.25
learning_rate = 0.001
num_epochs = 10
Model
class CRNN(nn.Module):
def __init__(self, in_channel, hidden_size, hidden_layers, output_classes, dropout):
super(CRNN, self).__init__()
self.hidden_layers = hidden_layers
self.hidden_size = hidden_size
self.CNN_layer1 = nn.Sequential(
nn.Conv2d(in_channels=in_channel, out_channels=32, kernel_size=5, stride=1, padding=2),
nn.BatchNorm2d(num_features=32),
nn.ReLU(),
nn.MaxPool2d(kernel_size=max_pool_kernel), # 32, 42, 42
)
self.CNN_layer2 = nn.Sequential(
nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5, stride=1, padding=2),
nn.BatchNorm2d(num_features=64),
nn.ReLU(),
nn.MaxPool2d(kernel_size=max_pool_kernel), # 64, 14, 14
)
self.lstm = nn.LSTM(input_size=64, hidden_size=hidden_size, num_layers=hidden_layers, batch_first=True)
self.dropout = nn.Dropout(dropout)
self.fc = nn.Linear(hidden_size, output_classes)
self.sigmoid = nn.Sigmoid()
def forward(self, x):
x = x.float()
# print("start : {}".format(x))
x = self.CNN_layer1(x) # 1, 126, 126 -> 32, 42, 42
# print("after CNN_1 : {}".format(x))
x = self.CNN_layer2(x) # 32, 42, 42 -> 64, 14, 14
# print("after CNN_2 : {}".format(x))
x = x.reshape(x.size(0), -1, 64) # 64, 14, 14 -> 12544 , x.size(0) = Batchsize = 32
# print("after reshape : {}".format(x))
h0 = torch.zeros(self.hidden_layers, x.size(0), self.hidden_size).to(device) # torch.size([10, 32(batch_size), 512])
c0 = torch.zeros(self.hidden_layers, x.size(0), self.hidden_size).to(device) # torch.size([10, 32(batch_size), 512])
x, _ = self.lstm(x, (h0, c0))
# print("after lstm : {}".format(x))
x = self.dropout(x)
# print("after dropout : {}".format(x))
x = self.fc(x[:,-1,:])
# print("after fc : {}".format(x))
x = self.sigmoid(x)
# print("after sigmoid : {}".format(x))
return x
model initialization
model = CRNN(in_channel, hidden_size, hidden_layers, output_classes, dropout)
Criterion & Optimizer
criterion = nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
Train
from tqdm import tqdm
tqdm.pandas()
tr_loss = \[\]
tr_acc = \[\]
v_loss = \[\]
v_acc = \[\]
best_valid_loss = torch.inf
best_epoch = 0
model = model.to(device)
epochloop = tqdm(range(num_epochs), position=0, desc='Training', leave=True)
for epoch in epochloop:
model.train()
train_loss = 0
train_acc = 0
epochloop.set_postfix_str(f'Training epoch {epoch}/{len(epochloop)}') # visualize
for idx, (audio, label) in enumerate(train_loader):
audio, label = audio.to(device), label.to(device)
out = model(audio)
\_, pred = torch.max(out.data, 1)
# acc
train_acc += (pred==label).sum()
# loss
optimizer.zero_grad()
loss = criterion(out, label)
train_loss += loss.item()
loss.backward()
optimizer.step()
Validation
model.eval()
val_loss = 0
val_acc = 0
with torch.no_grad():
for idx, (audio, label) in enumerate(val_loader):
epochloop.set_postfix_str(f'Validation batch {idx}/{len(val_loader)}')
audio, label = audio.to(device), label.to(device)
# forward pass
out = model(audio)
_, pred = torch.max(out.data, 1)
# acc
val_acc += (pred==label).sum()
# loss
loss = criterion(out, label)
val_loss += loss.item()
model.train()
save model if validation loss decrease
if val_loss / len(valid_set) \<= best_valid_loss:
best_valid_loss = val_loss / len(val_loader)
best_epoch = epoch
torch.save(model.state_dict(), "LSTM_epoch\_{}.pth".format(epoch))
print epoch loss & accuracy
print(f'Epoch {epoch+1}/{num_epochs} | Train Loss: {train_loss / len(train_loader):.3f} Train Acc: {train_acc / len(train_set) \* 100}% | Val Loss: {val_loss / len(val_loader):.3f} Val Acc: {val_acc / len(valid_set) \* 100}%')
tr_loss.append(train_loss / len(train_loader))
tr_acc.append(train_acc.cpu().numpy() / len(train_set) \* 100)
v_loss.append(val_loss / len(val_loader))
v_acc.append(val_acc.cpu().numpy() / len(valid_set) \* 100)
my training accuracy
Please tell me why my training accuracy is too low..
By your dataset, I'm inferring you are trying to identify sound data, and you are applying a CRNN. If you go to the repo Have you seen the repo for this dataset you will find a table with the accuracy for each method, the CRNN has about 60% accuracy, this is not so great since other methods achieve more than 95%.
You might need to change your model to get better results. Check the table you will find also some models with code
A bit difficult to give you an answer without reviewing your whole code, and re-running the model. Given this situation, I would suggest you create a Github repo, with appropriate the requirements.txt file for your code.
Even more helpful, and more likely to be answered would be if your code is in a jupyter notebook, and you can show the results of your loss curve and epochs. Your results could be more understandable shown like the output of tensorboard.
Hope this helps you to get some answers
I am trying to train my BERT Model on CONLL2003 dataset and hence the following classes were made by me
class DataSequence(torch.utils.data.Dataset):
def __init__(self, df):
lb = [i.split() for i in df['labels'].values.tolist()]
txt = df['text'].values.tolist()
self.texts = [tokenizer(str(i),
padding='max_length', max_length = 512, truncation=True, return_tensors="pt") for i in txt]
self.labels = [align_label(i,j) for i,j in zip(txt, lb)]
def __len__(self):
return len(self.labels)
def get_batch_data(self, idx):
return self.texts[idx]
def get_batch_labels(self, idx):
return torch.LongTensor(self.labels[idx])
def __getitem__(self, idx):
batch_data = self.get_batch_data(idx)
batch_labels = self.get_batch_labels(idx)
return batch_data, batch_labels
from transformers import BertForTokenClassification
class BertModel(torch.nn.Module):
def __init__(self):
super(BertModel, self).__init__()
self.bert = BertForTokenClassification.from_pretrained('bert-base-cased', num_labels=len(unique_labels))
def forward(self, input_id, mask, label):
output = self.bert(input_ids=input_id, attention_mask=mask, labels=label, return_dict=False)
return output
And for training
from torch.utils.data import Dataset, DataLoader
from torch.optim import SGD
from tqdm import tqdm
def train_loop(model, df_train, df_val):
train_dataset = DataSequence(df_train)
val_dataset = DataSequence(df_val)
train_dataloader = DataLoader(train_dataset, num_workers=4, batch_size=1, shuffle=True)
val_dataloader = DataLoader(val_dataset, num_workers=4, batch_size=1)
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
optimizer = SGD(model.parameters(), lr=LEARNING_RATE)
if use_cuda:
model = model.cuda()
best_acc = 0
best_loss = 1000
for epoch_num in range(EPOCHS):
total_acc_train = 0
total_loss_train = 0
model.train()
for train_data, train_label in tqdm(train_dataloader):
train_label = train_label[0].to(device)
mask = train_data['attention_mask'][0].to(device)
input_id = train_data['input_ids'][0].to(device)
optimizer.zero_grad()
loss, logits = model(input_id, mask, train_label)
logits_clean = logits[0][train_label != -100]
label_clean = train_label[train_label != -100]
predictions = logits_clean.argmax(dim=1)
acc = (predictions == label_clean).float().mean()
total_acc_train += acc
total_loss_train += loss.item()
loss.backward()
optimizer.step()
model.eval()
total_acc_val = 0
total_loss_val = 0
for val_data, val_label in val_dataloader:
val_label = val_label[0].to(device)
mask = val_data['attention_mask'][0].to(device)
input_id = val_data['input_ids'][0].to(device)
loss, logits = model(input_id, mask, val_label)
logits_clean = logits[0][val_label != -100]
label_clean = val_label[val_label != -100]
predictions = logits_clean.argmax(dim=1)
acc = (predictions == label_clean).float().mean()
total_acc_val += acc
total_loss_val += loss.item()
val_accuracy = total_acc_val / len(df_val)
val_loss = total_loss_val / len(df_val)
print(
f'Epochs: {epoch_num + 1} | Loss: {total_loss_train / len(df_train): .3f} | Accuracy: {total_acc_train / len(df_train): .3f} | Val_Loss: {total_loss_val / len(df_val): .3f} | Accuracy: {total_acc_val / len(df_val): .3f}')
LEARNING_RATE = 1e-2
EPOCHS = 5
model = BertModel()
train_loop(model, df_train, df_val)
The training thereby does not start and hence this error persists while coding on jupyter.
I am trying to build a BERT model for text classification with the help of this code [https://towardsdatascience.com/bert-text-classification-using-pytorch-723dfb8b6b5b]. My dataset contains two columns(label, text).
The labels can have three values of (0,1,2). The code works without any error but all values of confusion matrix are 0. Is there something wrong with my code?
import matplotlib.pyplot as plt
import pandas as pd
import torch
from torchtext.data import Field, TabularDataset, BucketIterator, Iterator
import torch.nn as nn
from transformers import BertTokenizer, BertForSequenceClassification
import torch.optim as optim
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import seaborn as sns
torch.manual_seed(42)
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
MAX_SEQ_LEN = 128
PAD_INDEX = tokenizer.convert_tokens_to_ids(tokenizer.pad_token)
UNK_INDEX = tokenizer.convert_tokens_to_ids(tokenizer.unk_token)
label_field = Field(sequential=False, use_vocab=False, batch_first=True, dtype=torch.float)
text_field = Field(use_vocab=False, tokenize=tokenizer.encode, lower=False, include_lengths=False, batch_first=True, fix_length=MAX_SEQ_LEN, pad_token=PAD_INDEX, unk_t>
fields = [('label', label_field), ('text', text_field)]
CLASSIFICATION_REPORT = "classification_report.jsonl"
train, valid, test = TabularDataset.splits(path='', train='train.csv', validation='validate.csv', test='test.csv', format='CSV', fields=fields, skip_header=True)
train_iter = BucketIterator(train, batch_size=16, sort_key=lambda x: len(x.text), device=device, train=True, sort=True, sort_within_batch=True)
valid_iter = BucketIterator(valid, batch_size=16, sort_key=lambda x: len(x.text), device=device, train=True, sort=True, sort_within_batch=True)
test_iter = Iterator(test, batch_size=16, device=device, train=False, shuffle=False, sort=False)
class BERT(nn.Module):
def __init__(self):
super(BERT, self).__init__()
options_name = "bert-base-uncased"
self.encoder = BertForSequenceClassification.from_pretrained(options_name, num_labels = 3)
def forward(self, text, label):
loss, text_fea = self.encoder(text, labels=label)[:2]
return loss, text_fea
def train(model, optimizer, criterion = nn.BCELoss(), train_loader = train_iter, valid_loader = valid_iter, num_epochs = 5, eval_every = len(train_iter) // 2, file_pat> running_loss = 0.0
valid_running_loss = 0.0
global_step = 0
train_loss_list = []
valid_loss_list = []
global_steps_list = []
model.train()
for epoch in range(num_epochs):
for (label, text), _ in train_loader:
label = label.type(torch.LongTensor)
label = label.to(device)
text = text.type(torch.LongTensor)
text = text.to(device)
output = model(text, label)
loss, _ = output
optimizer.zero_grad()
loss.backward()
optimizer.step()
running_loss += loss.item()
global_step += 1
if global_step % eval_every == 0:
model.eval()
with torch.no_grad():
for (label, text), _ in valid_loader:
label = label.type(torch.LongTensor)
label = label.to(device)
text = text.type(torch.LongTensor)
text = text.to(device)
output = model(text, label)
loss, _ = output
valid_running_loss += loss.item()
average_train_loss = running_loss / eval_every
average_valid_loss = valid_running_loss / len(valid_loader)
train_loss_list.append(average_train_loss)
valid_loss_list.append(average_valid_loss)
global_steps_list.append(global_step)
# resetting running values
running_loss = 0.0
valid_running_loss = 0.0
model.train()
# print progress
print('Epoch [{}/{}], Step [{}/{}], Train Loss: {:.4f}, Valid Loss: {:.4f}'.format(epoch+1, num_epochs, global_step, num_epochs*len(tra>
if best_valid_loss > average_valid_loss:
best_valid_loss = average_valid_loss
print('Finished Training!')
model = BERT().to(device)
optimizer = optim.Adam(model.parameters(), lr=2e-5)
train(model=model, optimizer=optimizer)
def evaluate(model, test_loader):
y_pred = []
y_true = []
model.eval()
with torch.no_grad():
for (label, text), _ in test_loader:
label = label.type(torch.LongTensor)
label = label.to(device)
text = text.type(torch.LongTensor)
text = text.to(device)
output = model(text, label)
_, output = output
y_pred.extend(torch.argmax(output, 2).tolist())
y_true.extend(label.tolist())
print('Classification Report:')
print(classification_report(y_true, y_pred, labels=[0,1,2], digits=4))
best_model = BERT().to(device)
evaluate(best_model, test_iter)
you are using criterion = nn.BCELoss(), binary cross entropy for a multi class classification problem, "the labels can have three values of (0,1,2)". use suitable loss function for multiclass classification.
I set my model and data to the same device,
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
net.to(device)
and I also do this:
inputs, labels = data
inputs, labels = inputs.to(device), labels.to(device)
but the error still exists. When it comes to 5000 times or more, the error will take place.
RuntimeError: Input type (torch.FloatTensor) and weight type (torch.cuda.FloatTensor) should be the same
The following is the whole training code, I hope you can answer it. Thanks!
import torch
import os
import torchvision.transforms as transforms
from PIL import Image
from torch import nn
from torch.utils.data import Dataset, DataLoader
captcha_list = list('0123456789abcdefghijklmnopqrstuvwxyz_')
captcha_length = 6
# 验证码文本转为向量
def text2vec(text):
vector = torch.zeros((captcha_length, len(captcha_list)))
text_len = len(text)
if text_len > captcha_length:
raise ValueError("验证码超过6位啦!")
for i in range(text_len):
vector[i,captcha_list.index(text[i])] = 1
return vector
# 验证码向量转为文本
def vec2text(vec):
label = torch.nn.functional.softmax(vec, dim =1)
vec = torch.argmax(label, dim=1)
for v in vec:
text_list = [captcha_list[v] for v in vec]
return ''.join(text_list)
# 加载所有图片,并将验证码向量化
def make_dataset(data_path):
img_names = os.listdir(data_path)
samples = []
for img_name in img_names:
img_path = data_path+img_name
target_str = img_name.split('_')[0].lower()
samples.append((img_path, target_str))
return samples
class CaptchaData(Dataset):
def __init__(self, data_path, transform=None):
super(Dataset, self).__init__()
self.transform = transform
self.samples = make_dataset(data_path)
def __len__(self):
return len(self.samples)
def __getitem__(self, index):
img_path, target = self.samples[index]
target = text2vec(target)
target = target.view(1, -1)[0]
img = Image.open(img_path)
img = img.resize((140,44))
img = img.convert('RGB') # img转成向量
if self.transform is not None:
img = self.transform(img)
return img, target
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
# 第一层神经网络
# nn.Sequential: 将里面的模块依次加入到神经网络中
self.layer1 = nn.Sequential(
nn.Conv2d(3, 16, kernel_size=3, padding=1), # 3通道变成16通道,图片:44*140
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2) # 图片:22*70
)
# 第2层神经网络
self.layer2 = nn.Sequential(
nn.Conv2d(16, 64, kernel_size=3), # 16通道变成64通道,图片:20*68
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2) # 图片:10*34
)
# 第3层神经网络
self.layer3 = nn.Sequential(
nn.Conv2d(64, 128, kernel_size=3), # 16通道变成64通道,图片:8*32
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2) # 图片:4*16
)
# 第4层神经网络
self.fc1 = nn.Sequential(
nn.Linear(4*16*128, 1024),
nn.Dropout(0.2), # drop 20% of the neuron
nn.ReLU()
)
# 第5层神经网络
self.fc2 = nn.Linear(1024, 6*37) # 6:验证码的长度, 37: 字母列表的长度
#前向传播
def forward(self, x):
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = x.view(x.size(0), -1)
x = self.fc1(x)
x = self.fc2(x)
return x
net = Net()
def calculat_acc(output, target):
output, target = output.view(-1, len(captcha_list)), target.view(-1, len(captcha_list)) # 每37个就是一个字符
output = nn.functional.softmax(output, dim=1)
output = torch.argmax(output, dim=1)
target = torch.argmax(target, dim=1)
output, target = output.view(-1, captcha_length), target.view(-1, captcha_length) #每6个字符是一个验证码
c = 0
for i, j in zip(target, output):
if torch.equal(i, j):
c += 1
acc = c / output.size()[0] * 100
return acc
def train(epoch_nums):
# 数据准备
transform = transforms.Compose([transforms.ToTensor()]) # 不做数据增强和标准化了
train_dataset = CaptchaData('./sougou_com_Trains/', transform=transform)
train_data_loader = DataLoader(train_dataset, batch_size=32, num_workers=0, shuffle=True, drop_last=True)
test_data = CaptchaData('./sougou_com_Trains/', transform=transform)
test_data_loader = DataLoader(test_data, batch_size=128, num_workers=0, shuffle=True, drop_last=True)
# 更换设备
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print('当前设备是:',device)
net.to(device)
criterion = nn.MultiLabelSoftMarginLoss() # 损失函数
optimizer = torch.optim.Adam(net.parameters(), lr=0.001) # 优化器
# 加载模型
model_path = './module_build/model.pth'
if os.path.exists(model_path):
print('开始加载模型')
checkpoint = torch.load(model_path)
net.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
# 开始训练
i = 1
for epoch in range(epoch_nums):
running_loss = 0.0
net.train() # 神经网络开启训练模式
for data in train_data_loader:
if i%100==0:
print(i)
inputs, labels = data
inputs, labels = inputs.to(device), labels.to(device) #数据发送到指定设备
#每次迭代都要把梯度置零
optimizer.zero_grad()
# 关键步骤
# 前向传播
outputs = net(inputs)
# 计算误差
loss = criterion(outputs, labels)
# 后向传播
loss.backward()
# 优化参数
optimizer.step()
running_loss += loss.item()
if i % 2000 == 0:
acc = calculat_acc(outputs, labels)
print('第%s次训练正确率: %.3f %%, loss: %.3f' % (i,acc,running_loss/2000))
running_loss = 0
# 保存模型
torch.save({
'model_state_dict':net.state_dict(),
'optimizer_state_dict':optimizer.state_dict(),
},model_path)
i += 1
# 结束一个epoch,计算测试集的正确率
net.eval() #测试模式
with torch.no_grad():
for inputs, labels in test_data_loader:
outputs = net(inputs)
acc = calculat_acc(outputs, labels)
print('测试集正确率: %.3f %%' % (acc))
break # 只测试一个batch
# 每5个epoch 更新学习率
if epoch % 5 == 4:
for p in optimizer.param_groups:
p['lr'] *= 0.9
train(10)
In evaluation part: Do this
net.eval() #测试模式
with torch.no_grad():
for inputs, labels in test_data_loader:
inputs, labels = inputs.to(device), labels.to(device)
outputs = net(inputs)
acc = calculat_acc(outputs, labels)
print('测试集正确率: %.3f %%' % (acc))
break # 只测试一个batch
This will work since you dint move your data from test loader to device.
I am new to PyTorch and have written the following code. I am using one neural network for the encoding part followed by some processing of the encoded data and then using another network for the decoding this data.
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR
import numpy as np
import itertools
import datetime
class Encoder(nn.Module):
def __init__(self):
super(Encoder, self).__init__()
self.fc1 = nn.Linear(4, 32, bias=False)
self.fc2 = nn.Linear(32, 16, bias=False)
self.fc3 = nn.Linear(16, 7, bias=False)
def forward(self, x):
x = self.fc1(x)
x = torch.tanh(x)
x = self.fc2(x)
x = torch.tanh(x)
x = self.fc3(x)
output = torch.tanh(x)
return output
def channel(codeword, snr_db, device):
snr_value = 10 ** (snr_db / 10)
h_real = torch.normal(mean=0, std=1, size=(codeword.shape[0], 1)) * torch.sqrt(torch.as_tensor(1/2))
h_imag = torch.normal(mean=0, std=1, size=(codeword.shape[0], 1)) * torch.sqrt(torch.as_tensor(1/2))
h_real_t = h_real.repeat(1, codeword.shape[1]).to(device)
h_imag_t = h_imag.repeat(1, codeword.shape[1]).to(device)
noise_real = torch.normal(mean=0, std=1, size=codeword.shape) * torch.sqrt(torch.as_tensor(1/(2*snr_value)))
noise_imag = torch.normal(mean=0, std=1, size=codeword.shape) * torch.sqrt(torch.as_tensor(1/(2*snr_value)))
noise_real = noise_real.to(device)
noise_imag = noise_imag.to(device)
faded_cw_real = torch.mul(h_real_t, codeword) + noise_real
faded_cw_imag = torch.mul(h_imag_t, codeword) + noise_imag
return torch.cat([faded_cw_real[:, :, None], faded_cw_imag[:, :, None]], dim=2), h_real, h_imag
class Decoder(nn.Module):
def __init__(self):
super(Decoder, self).__init__()
self.bigru = nn.GRU(input_size=7, hidden_size=200, num_layers=20, bidirectional=True)
self.fc0 = nn.Linear(4, 1)
self.fc1 = nn.Linear(400, 4)
def forward(self, x):
x, states = self.bigru(x)
output = torch.squeeze(self.fc0(torch.transpose(x, 2, 1)))
output = torch.tanh(output)
output = self.fc1(output)
output = torch.tanh(output)
# output = torch.softmax(output, dim=0)
return output
def train(args, model1, model2, device, optimizer, epoch, snr):
model1.train()
model2.train()
count = 1000
for i in range(count):
data = np.array([list(i) for i in itertools.product([-1, 1], repeat=4)])
p = np.random.permutation(16)
# p = np.random.randint(low=0, high=16, size=(16,))
train_data = data[p]
data_one_hot = np.eye(16)
truth = data_one_hot[p]
# truth = torch.as_tensor(truth).to(device).float() # Uncomment this for BCE loss
train_data = torch.as_tensor(train_data).float()
train_data = train_data.to(device)
# optimizer1.zero_grad()
optimizer.zero_grad()
output = model1(train_data)
output = output.to(device)
ch_out, h_r, h_i = channel(output, snr, device)
h_r = torch.as_tensor(h_r[:, :, None].repeat(1, 7, 1)).to(device)
h_i = torch.as_tensor(h_i[:, :, None].repeat(1, 7, 1)).to(device)
dec_ip = torch.cat([ch_out, h_r, h_i], 2)
dec_ip = torch.transpose(dec_ip, 2, 1)
hat = model2(torch.as_tensor(dec_ip).float())
loss_d = F.mse_loss(hat, train_data)
# loss_d = F.binary_cross_entropy(hat, truth)
loss_d.backward()
optimizer.step()
if i % 10 == 0:
# print(f"Train epoch: {epoch}, Batch: {i}, Encoder Loss: {loss_e.item()}, SNR: {snr}")
print(f"Train epoch: {epoch}, Batch: {i}, Decoder Loss: {loss_d.item()}, SNR: {snr}")
def main():
epochs = 14
learning_rate = 1
learning_rate_step = 0.7
no_cuda = False
log_interval = 10
use_cuda = not no_cuda and torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
enc_model = Encoder().to(device)
dec_model = Decoder().to(device)
optimizer = optim.Adam(list(dec_model.parameters())+list(enc_model.parameters()), lr=learning_rate)
scheduler = StepLR(optimizer, step_size=1, gamma=learning_rate_step)
for epoch in range(1, epochs+1):
snr = 20 - 20 * epoch / epochs
train(log_interval, enc_model, dec_model, device, optimizer, epoch, snr)
scheduler.step()
if __name__ == "__main__":
main()
However when I run this, the output is:
Train epoch: x, Batch: y, Decoder Loss: 2.0, SNR: z
x, y and z values are dependent on the iteration. The decoder loss is stuck at 2.0. In fact, the loss starts at lesser than 2 and then gets stuck at 2.0.
Is there something simply wrong with the code?