I am working on a multiclass image classification problem that has 4k labeled images. Currently, I am using cross-validation.However, I want to try nested-cross-validation for the problem in pytorch but I couldn't find it.
Is it possible to use nested-cross-validation in pytorch? if so, how?
I built a simple pipeline below. Is it correct to implement nested-cross-validation like this.
import torch
from torch.utils.data import DataLoader,SubsetRandomSampler
from sklearn.model_selection import KFold
from torchvision import datasets
input_size = (256,3,224,244)
target_size = (256,)
class Dataset(datasets.VisionDataset):
def __init__(self):
super().__init__(self)
self.images = torch.rand(input_size).float()
self.targets = torch.randint(0,3,target_size)
def __getitem__(self, index: int) -> any:
return self.images[index],self.targets[index]
def __len__(self) -> int:
return len(self.images)
class BasicModel(torch.nn.Module):
def __init__(self) -> None:
super(BasicModel,self).__init__()
self.conv = torch.nn.Conv2d(3,16,kernel_size=(5,5))
self.adp = torch.nn.AdaptiveAvgPool2d(1)
self.linear = torch.nn.Linear(16,3)
def forward(self,x):
x = self.conv(x)
x = self.adp(x)
x = x.view(x.size(0),-1)
x = self.linear(x)
return x
data_ids = [*range(input_size[0])]
data = Dataset()
model = BasicModel()
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(),lr=0.01)
k_fold = 5
kfold_test = KFold(n_splits=k_fold, shuffle=True)
num_epochs = 2
for test_fold,(remain_ids,test_ids) in enumerate(kfold_test.split(data_ids)):
test_sampler = SubsetRandomSampler(test_ids)
testLoader = DataLoader(data,sampler=test_sampler,batch_size=2)
kfold_val = KFold(n_splits=k_fold-1, shuffle=True)
for epoch in range(num_epochs):
for val_fold,(train_ids,val_ids) in enumerate(kfold_val.split(remain_ids)):
train_sampler = SubsetRandomSampler(train_ids)
trainLoader = DataLoader(data,sampler=train_sampler,batch_size=2)
val_sampler = SubsetRandomSampler(val_ids)
valLoader = DataLoader(data,sampler=val_sampler,batch_size=2)
model.train()
for image,target in trainLoader:
with torch.cuda.amp.autocast():
output = model(image)
loss = criterion(output,target)
optimizer.zero_grad()
loss.backward()
optimizer.step()
print(f"train loss:{loss.item()}")
model.eval()
with torch.no_grad():
for image,target in valLoader:
output = model(image)
loss = criterion(output,target)
print(f"test loss:{loss.item()}")
model.eval()
with torch.no_grad():
for image,target in testLoader:
output = model(image)
loss = criterion(output,target)
print(f"test loss:{loss.item()}")
Related
while running this code for my dataset , I am getting errors. my data head looks like this
fridge_temperature temp_condition label
0 13.10 high 0
1 8.65 high 0
2 2.00 low 0
3 4.80 low 0
4 10.70 high 0
and this the shape of my data (587076, 3)
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split
# --------------- Dataset ---------------
class StudentsPerformanceDataset(Dataset):
"""Students Performance dataset."""
def __init__(self, csv_file):
"""Initializes instance of class StudentsPerformanceDataset.
Args:
csv_file (str): Path to the csv file with the students data.
"""
df = pd.read_csv("Z:/new_file.csv")
# Drop the column you want to remove
df = df.drop('date_time', axis=1)
df = df.drop('type', axis=1)
print(df.head())
print(df.shape)
# Grouping variable names
self.categorical = ["temp_condition"]
self.target = "label"
# One-hot encoding of categorical variables
self.students_frame = pd.get_dummies(df, prefix=self.categorical)
# Save target and predictors
self.X = self.students_frame.drop(self.target, axis=1)
self.y = self.students_frame[self.target]
def __len__(self):
return len(self.students_frame)
def __getitem__(self, idx):
# Convert idx from tensor to list due to pandas bug (that arises when using pytorch's random_split)
if isinstance(idx, torch.Tensor):
idx = idx.tolist()
return [self.X.iloc[idx].values, self.y[idx]]
# --------------- Model ---------------
class Net(nn.Module):
def __init__(self, D_in, H=15, D_out=1):
super().__init__()
self.fc1 = nn.Linear(D_in, H)
self.fc2 = nn.Linear(H, D_out)
self.relu = nn.ReLU()
def forward(self, x):
x = self.fc1(x)
x = self.relu(x)
x = self.fc2(x)
return x.squeeze()
""" class Net(nn.Module):
def __init__(self, D_in, H=15, D_out=1):
super().__init__()
self.fc1 = nn.Linear(D_in, H)
self.fc2 = nn.Linear(H, D_out)
self.relu = nn.ReLU()
def forward(self, x):
x = self.fc1(x)
x = self.relu(x)
x = self.fc2(x)
return x.squeeze() """
# --------------- Training ---------------
def train(csv_file, n_epochs=100):
"""Trains the model.
Args:
csv_file (str): Absolute path of the dataset used for training.
n_epochs (int): Number of epochs to train.
"""
# Load dataset
dataset = StudentsPerformanceDataset(csv_file)
# Split into training and test
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
trainset, testset = random_split(dataset, [train_size, test_size])
# Dataloaders
trainloader = DataLoader(trainset, batch_size=200, shuffle=True)
testloader = DataLoader(testset, batch_size=200, shuffle=False)
# Use gpu if available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# Define the model
# Define the model
D_in, H = 3, 15
net = Net(D_in, H).to(device)
#D_in, H = 19, 15
#net = Net(D_in, H).to(device)
# Loss function
criterion = nn.MSELoss()
# Optimizer
optimizer = optim.Adam(net.parameters(), weight_decay=0.0001)
# Train the net
loss_per_iter = []
loss_per_batch = []
for epoch in range(n_epochs):
running_loss = 0.0
for i, (inputs, labels) in enumerate(trainloader):
inputs = inputs.to(device)
labels = labels.to(device)
# Zero the parameter gradients
optimizer.zero_grad()
# Forward + backward + optimize
outputs = net(inputs.float())
loss = criterion(outputs, labels.float())
loss.backward()
optimizer.step()
# Save loss to plot
running_loss += loss.item()
loss_per_iter.append(loss.item())
loss_per_batch.append(running_loss / (i + 1))
running_loss = 0.0
# Comparing training to test
dataiter = iter(testloader)
inputs, labels = dataiter.next()
inputs = inputs.to(device)
labels = labels.to(device)
outputs = net(inputs.float())
print("Root mean squared error")
print("Training:", np.sqrt(loss_per_batch[-1]))
print("Test", np.sqrt(criterion(labels.float(), outputs).detach().cpu().numpy()))
# Plot training loss curve
plt.plot(np.arange(len(loss_per_iter)), loss_per_iter, "-", alpha=0.5, label="Loss per epoch")
plt.plot(np.arange(len(loss_per_iter), step=4) + 3, loss_per_batch, ".-", label="Loss per mini-batch")
plt.xlabel("Number of epochs")
plt.ylabel("Loss")
plt.legend()
plt.show()
if __name__ == "__main__":
import os
import sys
import argparse
# By default, read csv file in the same directory as this script
csv_file = os.path.join(sys.path[0], "Z:/new_file.csv")
# Parsing arguments
parser = argparse.ArgumentParser()
parser.add_argument("--file", "-f", nargs="?", const=csv_file, default=csv_file,
help="Dataset file used for training")
parser.add_argument("--epochs", "-e", type=int, nargs="?", default=100, help="Number of epochs to train")
args = parser.parse_args()
# Call the main function of the script
train(args.file, args.epochs)
I am getting this error return F.linear(input, self.weight,
self.bias) RuntimeError: mat1 and mat2 shapes cannot be multiplied
(200x7 and 3x15)
I'm currently trying to train a Recurrent Neural Network with PyTorch and I am having trouble managing the DataLoader. Let's start from the beginning.
import matplotlib.pyplot as plt
import numpy as np
import torch
T = 50 #period
t = 300 #time
timeStep = np.linspace(0,t,300)
mu = 0
sigma = np.sqrt(0.001)
x1 = []
x2 = []
for s in timeStep:
eps1 = np.random.randn(1)*sigma+mu
eps2 = np.random.randn(1)*sigma+mu
x1.append(np.cos(2*s*np.pi/T)+eps1)
x2.append(np.sin(4*s*np.pi/T)+eps2)
from torch import nn
from torch.optim.lr_scheduler import LambdaLR
from torch.utils.data import Dataset, DataLoader
class Data(torch.utils.data.Dataset):
def __init__(self):
for sample in range(10):
self.X = torch.from_numpy(np.stack([x1, x2], axis=1).reshape([-1, 2, 2])).float()
self.Y = torch.from_numpy(np.append(np.delete(self.X, 0, axis=0), self.X[1].reshape([1, 2, 2]), axis=0)).float()
print(self.X.shape,self.Y.shape)
def __len__(self):
return len(self.X)
def __getitem__(self, index):
feature = self.X[index]
label = self.Y[index]
return feature, label
dataset = Data()
At this point, dataset.X.shape,dataset.Y.shape gives [150,2,2], [150,2,2]. So up until here, that's what I need to get so no problem. (I get 2 samples of 150 time series data from a 300 data time series).
from torch.autograd import Variable
from typing import Tuple
class Recurrent(nn.Module):
def __init__(self, hidden_dim: int = 20):
super().__init__()
self.hidden_dim: int = hidden_dim
self.hidden: Tuple[Variable, Variable] = self.init_hidden()
self.rnn = nn.LSTM(2, self.hidden_dim)
self.fc = nn.Sequential(
nn.ReLU(),
nn.Linear(self.hidden_dim, 2)
)
def forward(self, x: torch.Tensor) -> torch.Tensor:
x, hidden = self.rnn(x, self.hidden)
self.hidden = (Variable(hidden[0].data), Variable(hidden[1].data))
x = self.fc(x)
return x
def init_hidden(self) -> Tuple[Variable, Variable]:
return (
Variable(torch.zeros(1, 2, self.hidden_dim)),
Variable(torch.zeros(1, 2, self.hidden_dim))
)
def fit(model, dataset, batch_size=2, epochs = 100, loss_print_per_epoch = 10):
def _w(worker_id):
np.random.seed(np.random.get_state()[1][0] + worker_id)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
scheduler = LambdaLR(optimizer, lr_lambda=lambda _e: 0.97 ** _e)
train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
def _train_model(_m, _d):
_m.train()
train_batch_loss = []
for x, y in _d:
optimizer.zero_grad()
output = _m(x)
print(output.shape)
loss = criterion(output, y)
loss.backward(retain_graph=True)
optimizer.step()
train_batch_loss.append(loss.item())
return _m, np.mean(train_batch_loss)
for epoch in range(epochs+1):
model, train_loss = _train_model(model, train_loader)
if epoch % loss_print_per_epoch == 0:
print(f'epoch: {epoch}/{epochs} loss: {train_loss} lr: {scheduler.get_last_lr()[0]}')
scheduler.step()
return model
model = fit(model=Recurrent(), dataset=dataset, batch_size=2, epochs=100)
When I iterate through the DataLoader with my for loop, this is where the problem comes up. output.shape should be [150,2,2] for batch size = 2 and [150,1,2] for batch size = 1. But using the print in the for loop gives me a shape of [2,2,2] and I have no idea why. If anyone could help me understand what is going on here, it would be a great help.
As a Pytorch newbie (coming from tensorflow), I am unsure of how to implement Early Stopping. My research has led me discover that pytorch does not have a native way to so this. I have also discovered torchsample, but am unable to install it in my conda environment for whatever reason. Is there a simple way to go about applying early stopping without it? Here is my current setup:
class RegressionDataset(Dataset):
def __init__(self, X_data, y_data):
self.X_data = X_data
self.y_data = y_data
def __getitem__(self, index):
return self.X_data[index], self.y_data[index]
def __len__(self):
return len(self.X_data)
train_dataset = RegressionDataset(torch.from_numpy(X_train).float(), torch.from_numpy(y_train).float())
val_dataset = RegressionDataset(torch.from_numpy(X_val).float(), torch.from_numpy(y_val).float())
test_dataset = RegressionDataset(torch.from_numpy(X_test).float(), torch.from_numpy(y_test).float())
# Model Params
EPOCHS = 100
BATCH_SIZE = 1000
LEARNING_RATE = 0.001
NUM_FEATURES = np.shape(X_test)[1]
# Initialize Dataloader
train_loader = DataLoader(dataset = train_dataset, batch_size=BATCH_SIZE, shuffle = True)
val_loader = DataLoader(dataset = val_dataset, batch_size=BATCH_SIZE)
test_loader = DataLoader(dataset = test_dataset, batch_size=BATCH_SIZE)
# Define Neural Network Architecture
class MultipleRegression(nn.Module):
def __init__(self, num_features):
super(MultipleRegression, self).__init__()
# Define architecture
self.layer_1 = nn.Linear(num_features, 16)
self.layer_2 = nn.Linear(16, 32)
self.layer_3 = nn.Linear(32, 25)
self.layer_4 = nn.Linear(25, 20)
self.layer_5 = nn.Linear(20, 16)
self.layer_out = nn.Linear(16, 1)
self.relu = nn.ReLU() # ReLU applied to all layers
# Initialize weights and biases
nn.init.xavier_uniform_(self.layer_1.weight)
nn.init.zeros_(self.layer_1.bias)
nn.init.xavier_uniform_(self.layer_2.weight)
nn.init.zeros_(self.layer_2.bias)
nn.init.xavier_uniform_(self.layer_3.weight)
nn.init.zeros_(self.layer_3.bias)
nn.init.xavier_uniform_(self.layer_4.weight)
nn.init.zeros_(self.layer_4.bias)
nn.init.xavier_uniform_(self.layer_5.weight)
nn.init.zeros_(self.layer_5.bias)
nn.init.xavier_uniform_(self.layer_out.weight)
nn.init.zeros_(self.layer_out.bias)
def forward(self, inputs):
x = self.relu(self.layer_1(inputs))
x = self.relu(self.layer_2(x))
x = self.relu(self.layer_3(x))
x = self.relu(self.layer_4(x))
x = self.relu(self.layer_5(x))
x = self.layer_out(x)
return(x)
def predict(self, test_inputs):
x = self.relu(self.layer_1(test_inputs))
x = self.relu(self.layer_2(x))
x = self.relu(self.layer_3(x))
x = self.relu(self.layer_4(x))
x = self.relu(self.layer_5(x))
x = self.layer_out(x)
return(x)
# Check for GPU
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
model = MultipleRegression(NUM_FEATURES)
model.to(device)
print(model)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr = LEARNING_RATE)
# define dictionary to store loss/epochs for training and validation
loss_stats = {
"train": [],
"val": []
}
# begin training
print("Begin Training")
for e in tqdm(range(1, EPOCHS+1)):
# Training
train_epoch_loss = 0
model.train()
for X_train_batch, y_train_batch in train_loader:
X_train_batch, y_train_batch = X_train_batch.to(device), y_train_batch.to(device)
optimizer.zero_grad()
y_train_pred = model(X_train_batch)
train_loss = criterion(y_train_pred, y_train_batch.unsqueeze(1))
train_loss.backward()
optimizer.step()
train_epoch_loss += train_loss.item()
# validation
with torch.no_grad():
val_epoch_loss = 0
model.eval()
for X_val_batch, y_val_batch in val_loader:
X_val_batch, y_val_batch = X_val_batch.to(device), y_val_batch.to(device)
y_val_pred = model(X_val_batch)
val_loss = criterion(y_val_pred, y_val_batch.unsqueeze(1))
val_epoch_loss += val_loss.item()
loss_stats["train"].append(train_epoch_loss/len(train_loader))
loss_stats["val"].append(val_epoch_loss/len(val_loader))
print(f"Epoch {e}: \ Train loss: {train_epoch_loss/len(train_loader):.5f} \ Val loss: {val_epoch_loss/len(val_loader):.5f}")
# Visualize loss and accuracy
train_val_loss_df = pd.DataFrame.from_dict(loss_stats).reset_index().melt(id_vars=["index"]).rename(columns = {"index":"epochs"})
plt.figure()
sns.lineplot(data = train_val_loss_df, x = "epochs", y = "value", hue = "variable").set_title("Train-Val Loss/Epoch")
# Test model
y_pred_list = []
with torch.no_grad():
model.eval()
for X_batch, _ in test_loader:
X_batch = X_batch.to(device)
y_test_pred = model(X_batch)
y_pred_list.append(y_test_pred.cpu().numpy())
y_pred_list = [a.squeeze().tolist() for a in y_pred_list]
y_pred_list = [item for sublist in y_pred_list for item in sublist]
y_pred_list = np.array(y_pred_list)
mse = mean_squared_error(y_test, y_pred_list)
r_square = r2_score(y_test, y_pred_list)
print("Mean Squared Error :", mse)
print("R^2 :", r_square)
A basic way to do this is to keep track of the best validation loss obtained so far.
You can have a variable best_loss = 0 initialized before your loop over epochs (or you could do other things like best loss per epoch, etc.).
After each validation pass then do:
if val_loss > best_loss:
best_loss = val_loss
# At this point also save a snapshot of the current model
torch.save(model, 'my_model_best_loss.pth')
Then, if the best_loss does not improve significantly after some number of training steps, or by the end of the epoch, or if it val_loss gets worse, break out of the loop and terminate the training there.
For implementing algorithms like early stopping (and your training loop in general) you may find it easier to give PyTorch Lightning a try (no affiliation, but it's much easier than trying to roll everything by hand).
I am a beginner looking to code an ANN in PyTorch for the task of prediction for a dynamic engineering system of a Free Piston Sterling Engine. The dataset consists of 6 inputs and 3 outputs, as shown below:
Dataset
I have a basic code which I believe should be able to accommodate for this task, however I believe there may be an issue with the labelling of the dataset, and the datatype used. I have tried converting to longtensor datatype but it has not helped.
I receive the following error when changing the output datatype to float32:
"expected scalar type Long but found Float."
and when I put it as int64, I receive:
"Target 85 is out of bounds."
Please take a look, and any advice would be very appreciated. I have included the code below:
import os
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.nn.functional as F
SEED = 4096
torch.manual_seed(SEED)
if torch.cuda.is_available():
torch.cuda.manual_seed_all(SEED)
np.random.seed(SEED)
file_path = "./Dynamics of Sterling Engine Data(1).csv"
df = pd.read_csv(
file_path,
header=None,
names=[
"Kdp(N/m)",
"Kpp(N/m)",
"Cdp(Ns/m)",
"Cl(Ns/m)",
"mdp(kg)",
"mpp(kg)",
"f(Hz)",
"γ(DP/PP)",
"α(°)",
],
)
n = len(df.index) # 55
shuffle_indices = np.random.permutation(n)
df = df.iloc[shuffle_indices]
x = df.iloc[:, :6].values.astype(np.float32)
y = df.iloc[:, -3].values.astype(np.float32)
mu = x.mean(axis=0)
span = x.max(axis=0) - x.min(axis=0)
def rescale(inputs):
return (inputs - mu) / span
x = rescale(x)
num_train = int(n * 0.82)
num_test = n - num_train
x_train = x[:num_train]
y_train = y[:num_train]
x_test = x[-num_test:]
y_test = y[-num_test:]
class NpDataset(Dataset):
def __init__(self, data, label):
assert len(data) == len(label)
self.data = torch.from_numpy(data)
self.label = torch.from_numpy(label)
def __getitem__(self, index):
return self.data[index], self.label[index]
def __len__(self):
return len(self.label)
train_dataset = NpDataset(x_train, y_train)
test_dataset = NpDataset(x_test, y_test)
train_dataloader = DataLoader(train_dataset, batch_size=128, shuffle=False)
test_dataloader = DataLoader(test_dataset, batch_size=128, shuffle=False)
device = torch.device("cpu")
print(device)
class SterlingNN(nn.Module):
def __init__(self):
super(SterlingNN, self).__init__()
# 6 input feautures per data point
self.fn1 = nn.Linear(6, 6) # 6 features, 6 nodes in hidden layer
self.fn2 = nn.Linear(6, 3) # 6 nodes in hidden layer, 3 outputs
def forward(self, x):
x = torch.sigmoid(self.fn1(x)) # sigmoid activation function
x = self.fn2(x)
return x
model = SterlingNN()
print(model.to(device))
loss_fn = nn.CrossEntropyLoss()
optimiser = torch.optim.Adam(
model.parameters(), lr=0.01, weight_decay=0.01
)
x, y = next(iter(train_dataloader))
x = x[:5].to(device)
score = model(x)
print(score)
def train():
model.train() # model into training mode and iteratate through data loader
for x, y in train_dataloader:
x = x.to(device)
y = y.to(device)
n = x.size(0)
optimiser.zero_grad()
score = model(x)
loss = loss_fn(score, y)
loss.backward()
optimiser.step()
predictions = score.max(1, keepdim=True)[1]
num_correct = predictions.eq(y.view_as(predictions)).sum().item()
acc = num_correct / n
return loss, acc
def evaluate():
model.eval()
with torch.no_grad():
for x, y in test_dataloader:
x = x.to(device)
y = y.to(device)
n = x.size(0)
score = model(x)
loss = loss_fn(score, y)
predictions = score.max(1, keepdim=True)[1]
num_correct = predictions.eq(y.view_as(predictions)).sum().item()
acc = num_correct / n
return loss, acc
max_epochs = 128
for epoch in range(max_epochs):
tr_loss, tr_acc = train()
eva_loss, eva_acc = evaluate()
print(
"[{epoch}/{max_epochs}] Train loss:{tr_loss:.4f} acc:{tr_acc*100:.2f}% - Test loss:{eva_loss:.4f} acc:{eva_acc*100:.2f}%".format()
)
I am trying to implement the code from a Pytorch beginner's tutorial. But I have written the code for loading the saved model in another Python file.
The FashionClassify file contains the code exactly as its in the tutorial.
Below is the code:
from FashionClassify import NeuralNetwork
from FashionClassify import test_data
import torch
model = NeuralNetwork()
model.load_state_dict(torch.load("model.pth"))
classes = [
"T-shirt/top", "Trouser","Pullover","Dress","Coat","Sandal","Shirt","Sneaker","Bag","Ankle boot",
]
model.eval()
x, y = test_data[0][0], test_data[0][1]
with torch.no_grad():
pred = model(x)
predicted, actual = classes[pred[0].argmax(0)],classes[y]
print(f'Predicted: "{predicted}", Actual: "{actual}"')
However, when I run this, the entire training process starts again. Why is that so ?
OR
Is it an expected behavior ?
(I have gone through a couple of webpages and StackOverflow answers but couldn't find my problem)
FashionClassify file code:
import torch
from torch import nn
from torch.utils.data import DataLoader # wraps an iterable around dataset
from torchvision import datasets # stores samples and their label
from torchvision.transforms import ToTensor, Lambda, Compose
import matplotlib as plt
training_data = datasets.FashionMNIST(root='data', train=True, download=True, transform=ToTensor(), )
test_data = datasets.FashionMNIST(root='data', train=False, download=True, transform=ToTensor(), )
batch_size = 64
train_dataLoader = DataLoader(training_data, batch_size=batch_size)
test_dataLoader = DataLoader(test_data, batch_size=batch_size)
for X, y in test_dataLoader:
print('Shape of X [N,C,H,W]:', X.size())
print('Shape of y:', y.shape, y.dtype)
break
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Using {} device'.format(device))
# to define a NN, we inherit a class from nn.Module
class NeuralNetwork(nn.Module):
def __init__(self):
# will specify how data will proceed in the forward pass
super(NeuralNetwork, self).__init__()
self.flatten = nn.Flatten()
self.linear_relu_stack = nn.Sequential(
nn.Linear(28 * 28, 512),
nn.ReLU(),
nn.Linear(512, 512),
nn.ReLU(),
nn.Linear(512, 10),
nn.ReLU()
)
def forward(self, x):
x = self.flatten(x)
logits = self.linear_relu_stack(x)
return logits
model = NeuralNetwork().to(device)
print(model)
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)
def train(dataloader, model, loss_fn, optimizer):
size = len(dataloader.dataset)
for batch, (X,y) in enumerate(dataloader):
X,y = X.to(device), y.to(device)
#compute prediction error
pred = model(X)
loss = loss_fn(pred, y)
#backprop
optimizer.zero_grad()
loss.backward()
optimizer.step()
if batch%100 ==0:
loss,current = loss.item(), batch * len(X)
print(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}]")
def test(dataloader, model):
size = len(dataloader.dataset)
model.eval()
test_loss, correct = 0,0
with torch.no_grad():
for X, y in dataloader:
X,y = X.to(device), y.to(device)
pred = model(X)
test_loss += loss_fn(pred, y).item()
correct += (pred.argmax(1) == y).type(torch.float).sum().item()
test_loss /= size
correct /= size
print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
epochs = 5
for t in range(epochs):
print(f"Epoch {t+1}\n-------------------------------")
train(train_dataLoader, model, loss_fn, optimizer)
test(test_dataLoader, model)
print("Done!")
torch.save(model.state_dict(), "model.pth")
print("Saved PyTorch Model State to model.pth")
That's what happens when you import another file. All the code gets rerun.
Instead, in your training file:
class FancyNetwork(nn.Module):
[...]
def train():
[train code]
if __name__ == "__main__":
train()
Now when you run this file train() will get called, but when you import this file in another one, train won't get called automatically.