'int' object has no attribute 'size'" - python

F.nll_loss: I am getting
AttributeError: 'int' object has no attribute 'size'
when I try to run this code. I also get a snippet of the module code.
raise ValueError('Expected 2 or more dimensions (got {})'.format(dim))
if input.size(0) != target.size(0):
raise ValueError('Expected input batch_size ({}) to match target batch_size ({}).'
format(input.size(0), target.size(0)))
import torch
from torchvision import transforms, datasets
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pylab as plt
train_dataset = datasets.MNIST(root = '', train =True, download = True,
transform =transforms.Compose([transforms.ToTensor()]))
test_dataset = datasets.MNIST(root ='', download =True, train =False,
transform =transforms.Compose([transforms.ToTensor()]))
batch_size = 10
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size, shuffle =True)
test_dataset = torch.utils.data.DataLoader(test_dataset, batch_size, shuffle =True)
class Net(nn.Module):
def __init__(self):
super().__init__()
self.fc1 = nn.Linear(28*28, 64)
self.fc2 = nn.Linear(64,64)
self.fc3 = nn.Linear(64,64)
self.fc4 = nn.Linear(64,10)
def forward(self, x):
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = F.relu(self.fc2(x))
x = self.fc4(x)
return F.log_softmax(x, dim=1)
x=torch.rand((28,28))
x=x.view(-1,28*28)
net =Net()
out=net(x)
out
import torch.optim as optim
optimizer =optim.Adam(net.parameters(), lr=0.001)
EPOCHS = 3
for epoch in range(EPOCHS):
for data in train_dataset:
x, y = data
net.zero_grad()
x=x.view(-1, 28*28)
output = net(x)
loss = F.nll_loss(output, y)
loss.backward()
optimizer.step()
print(loss)

Just change the for loop from:
for data in train_dataset:
to
for data in train_loader:

Related

Nested Cross Validation in PyTorch

I am working on a multiclass image classification problem that has 4k labeled images. Currently, I am using cross-validation.However, I want to try nested-cross-validation for the problem in pytorch but I couldn't find it.
Is it possible to use nested-cross-validation in pytorch? if so, how?
I built a simple pipeline below. Is it correct to implement nested-cross-validation like this.
import torch
from torch.utils.data import DataLoader,SubsetRandomSampler
from sklearn.model_selection import KFold
from torchvision import datasets
input_size = (256,3,224,244)
target_size = (256,)
class Dataset(datasets.VisionDataset):
def __init__(self):
super().__init__(self)
self.images = torch.rand(input_size).float()
self.targets = torch.randint(0,3,target_size)
def __getitem__(self, index: int) -> any:
return self.images[index],self.targets[index]
def __len__(self) -> int:
return len(self.images)
class BasicModel(torch.nn.Module):
def __init__(self) -> None:
super(BasicModel,self).__init__()
self.conv = torch.nn.Conv2d(3,16,kernel_size=(5,5))
self.adp = torch.nn.AdaptiveAvgPool2d(1)
self.linear = torch.nn.Linear(16,3)
def forward(self,x):
x = self.conv(x)
x = self.adp(x)
x = x.view(x.size(0),-1)
x = self.linear(x)
return x
data_ids = [*range(input_size[0])]
data = Dataset()
model = BasicModel()
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(),lr=0.01)
k_fold = 5
kfold_test = KFold(n_splits=k_fold, shuffle=True)
num_epochs = 2
for test_fold,(remain_ids,test_ids) in enumerate(kfold_test.split(data_ids)):
test_sampler = SubsetRandomSampler(test_ids)
testLoader = DataLoader(data,sampler=test_sampler,batch_size=2)
kfold_val = KFold(n_splits=k_fold-1, shuffle=True)
for epoch in range(num_epochs):
for val_fold,(train_ids,val_ids) in enumerate(kfold_val.split(remain_ids)):
train_sampler = SubsetRandomSampler(train_ids)
trainLoader = DataLoader(data,sampler=train_sampler,batch_size=2)
val_sampler = SubsetRandomSampler(val_ids)
valLoader = DataLoader(data,sampler=val_sampler,batch_size=2)
model.train()
for image,target in trainLoader:
with torch.cuda.amp.autocast():
output = model(image)
loss = criterion(output,target)
optimizer.zero_grad()
loss.backward()
optimizer.step()
print(f"train loss:{loss.item()}")
model.eval()
with torch.no_grad():
for image,target in valLoader:
output = model(image)
loss = criterion(output,target)
print(f"test loss:{loss.item()}")
model.eval()
with torch.no_grad():
for image,target in testLoader:
output = model(image)
loss = criterion(output,target)
print(f"test loss:{loss.item()}")

Pytorch crashes with error IndexError: Target 32 is out of bounds

I am attempting to train a model using CIFAR-100 dataset, on CPU.
But, I get an error:
Traceback (most recent call last):
File "recog.py", line 68, in <module>
loss = criterion(outputs, labels)
File "/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py", line 1102, in _call_impl
return forward_call(*input, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/torch/nn/modules/loss.py", line 1152, in forward
label_smoothing=self.label_smoothing)
File "/usr/local/lib/python3.7/dist-packages/torch/nn/functional.py", line 2846, in cross_entropy
return torch._C._nn.cross_entropy_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index, label_smoothing)
IndexError: Target 32 is out of bounds.
I took a snippet from here and modified it a little.
Code:
import torch
import torchvision
import torchvision.transforms as transforms
transform = transforms.Compose(
[transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
batch_size = 4
trainset = torchvision.datasets.CIFAR100(root='./dataone', train=True,
download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
shuffle=True, num_workers=2)
testset = torchvision.datasets.CIFAR100(root='./dataone', train=False,
download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
shuffle=False, num_workers=2)
classes = ('aquatic mammals','fish','flowers','food containers','fruit and vegetables','household electrical devices','household furniture','insects','large carnivores','large man-made outdoor things','large natural outdoor scenes','large omnivores and herbivores','medium-sized mammals','non-insect invertebrates','people','reptiles','small mammals','trees','vehicles 1','vehicles 2')
import torch.nn as nn
import torch.nn.functional as F
class Net(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(3, 6, 5)
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(16 * 5 * 5, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = torch.flatten(x, 1) # flatten all dimensions except batch
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
net = Net()
import torch.optim as optim
#criterion = nn.CrossEntropyLoss()
#optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)import torch.optim as optim
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
for epoch in range(2): # loop over the dataset multiple times
running_loss = 0.0
for i, data in enumerate(trainloader, 0):
# get the inputs; data is a list of [inputs, labels]
inputs, labels = data
# zero the parameter gradients
optimizer.zero_grad()
# forward + backward + optimize
outputs = net(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
# print statistics
running_loss += loss.item()
if i % 2000 == 1999: # print every 2000 mini-batches
print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}')
running_loss = 0.0
print('Finished Training')
Also, the target number is always different. I use Python 3.9, with the last pytorch.
When I attempt to do the same thing,but with CIFAR-10, it works perfectly. I'm stuck.
Please help.
Your model only predicts 10 classes. CIFAR100 has 100 classes.
Change
self.fc3 = nn.Linear(84, 10)
to
self.fc3 = nn.Linear(84, 100)

I should have a shape [150,2,2] but get a shape of [2,2,2] with DataLoader

I'm currently trying to train a Recurrent Neural Network with PyTorch and I am having trouble managing the DataLoader. Let's start from the beginning.
import matplotlib.pyplot as plt
import numpy as np
import torch
T = 50 #period
t = 300 #time
timeStep = np.linspace(0,t,300)
mu = 0
sigma = np.sqrt(0.001)
x1 = []
x2 = []
for s in timeStep:
eps1 = np.random.randn(1)*sigma+mu
eps2 = np.random.randn(1)*sigma+mu
x1.append(np.cos(2*s*np.pi/T)+eps1)
x2.append(np.sin(4*s*np.pi/T)+eps2)
from torch import nn
from torch.optim.lr_scheduler import LambdaLR
from torch.utils.data import Dataset, DataLoader
class Data(torch.utils.data.Dataset):
def __init__(self):
for sample in range(10):
self.X = torch.from_numpy(np.stack([x1, x2], axis=1).reshape([-1, 2, 2])).float()
self.Y = torch.from_numpy(np.append(np.delete(self.X, 0, axis=0), self.X[1].reshape([1, 2, 2]), axis=0)).float()
print(self.X.shape,self.Y.shape)
def __len__(self):
return len(self.X)
def __getitem__(self, index):
feature = self.X[index]
label = self.Y[index]
return feature, label
dataset = Data()
At this point, dataset.X.shape,dataset.Y.shape gives [150,2,2], [150,2,2]. So up until here, that's what I need to get so no problem. (I get 2 samples of 150 time series data from a 300 data time series).
from torch.autograd import Variable
from typing import Tuple
class Recurrent(nn.Module):
def __init__(self, hidden_dim: int = 20):
super().__init__()
self.hidden_dim: int = hidden_dim
self.hidden: Tuple[Variable, Variable] = self.init_hidden()
self.rnn = nn.LSTM(2, self.hidden_dim)
self.fc = nn.Sequential(
nn.ReLU(),
nn.Linear(self.hidden_dim, 2)
)
def forward(self, x: torch.Tensor) -> torch.Tensor:
x, hidden = self.rnn(x, self.hidden)
self.hidden = (Variable(hidden[0].data), Variable(hidden[1].data))
x = self.fc(x)
return x
def init_hidden(self) -> Tuple[Variable, Variable]:
return (
Variable(torch.zeros(1, 2, self.hidden_dim)),
Variable(torch.zeros(1, 2, self.hidden_dim))
)
def fit(model, dataset, batch_size=2, epochs = 100, loss_print_per_epoch = 10):
def _w(worker_id):
np.random.seed(np.random.get_state()[1][0] + worker_id)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
scheduler = LambdaLR(optimizer, lr_lambda=lambda _e: 0.97 ** _e)
train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
def _train_model(_m, _d):
_m.train()
train_batch_loss = []
for x, y in _d:
optimizer.zero_grad()
output = _m(x)
print(output.shape)
loss = criterion(output, y)
loss.backward(retain_graph=True)
optimizer.step()
train_batch_loss.append(loss.item())
return _m, np.mean(train_batch_loss)
for epoch in range(epochs+1):
model, train_loss = _train_model(model, train_loader)
if epoch % loss_print_per_epoch == 0:
print(f'epoch: {epoch}/{epochs} loss: {train_loss} lr: {scheduler.get_last_lr()[0]}')
scheduler.step()
return model
model = fit(model=Recurrent(), dataset=dataset, batch_size=2, epochs=100)
When I iterate through the DataLoader with my for loop, this is where the problem comes up. output.shape should be [150,2,2] for batch size = 2 and [150,1,2] for batch size = 1. But using the print in the for loop gives me a shape of [2,2,2] and I have no idea why. If anyone could help me understand what is going on here, it would be a great help.

AttributeError in torch_geometric.transforms

I have a problem that I cannot understand: even though a module ‘torch_geometric.transforms’ has an attribute ‘AddTrainValTestMask’ according to documentation , I cannot import it. I keep receiving an error AttributeError: module 'torch_geometric.transforms' has no attribute 'AddTrainValTestMask
My Pytorch version is 1.7.1
I took the code from here
Minimum reproducible example:
import os.path as osp
import torch
import torch.nn.functional as F
from torch_geometric.datasets import Planetoid
import torch_geometric.transforms as T
from torch_geometric.nn import SplineConv
dataset = 'Cora'
transform = T.Compose([
T.AddTrainValTestMask('train_rest', num_val=500, num_test=500),
T.TargetIndegree(),
])
path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', dataset)
dataset = Planetoid(path, dataset, transform=transform)
data = dataset[0]
class Net(torch.nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = SplineConv(dataset.num_features, 16, dim=1, kernel_size=2)
self.conv2 = SplineConv(16, dataset.num_classes, dim=1, kernel_size=2)
def forward(self):
x, edge_index, edge_attr = data.x, data.edge_index, data.edge_attr
x = F.dropout(x, training=self.training)
x = F.elu(self.conv1(x, edge_index, edge_attr))
x = F.dropout(x, training=self.training)
x = self.conv2(x, edge_index, edge_attr)
return F.log_softmax(x, dim=1)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model, data = Net().to(device), data.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-3)
def train():
model.train()
optimizer.zero_grad()
F.nll_loss(model()[data.train_mask], data.y[data.train_mask]).backward()
optimizer.step()
def test():
model.eval()
log_probs, accs = model(), []
for _, mask in data('train_mask', 'test_mask'):
pred = log_probs[mask].max(1)[1]
acc = pred.eq(data.y[mask]).sum().item() / mask.sum().item()
accs.append(acc)
return accs
for epoch in range(1, 201):
train()
log = 'Epoch: {:03d}, Train: {:.4f}, Test: {:.4f}'
print(log.format(epoch, *test()))
Can anybody explain to me the problem?
It has been renamed to RandomNodeSplit in the latest version of torch_geometric. You can directly use RandomNodeSplit to replace it.

Load a saved NN model in different Python file

I am trying to implement the code from a Pytorch beginner's tutorial. But I have written the code for loading the saved model in another Python file.
The FashionClassify file contains the code exactly as its in the tutorial.
Below is the code:
from FashionClassify import NeuralNetwork
from FashionClassify import test_data
import torch
model = NeuralNetwork()
model.load_state_dict(torch.load("model.pth"))
classes = [
"T-shirt/top", "Trouser","Pullover","Dress","Coat","Sandal","Shirt","Sneaker","Bag","Ankle boot",
]
model.eval()
x, y = test_data[0][0], test_data[0][1]
with torch.no_grad():
pred = model(x)
predicted, actual = classes[pred[0].argmax(0)],classes[y]
print(f'Predicted: "{predicted}", Actual: "{actual}"')
However, when I run this, the entire training process starts again. Why is that so ?
OR
Is it an expected behavior ?
(I have gone through a couple of webpages and StackOverflow answers but couldn't find my problem)
FashionClassify file code:
import torch
from torch import nn
from torch.utils.data import DataLoader # wraps an iterable around dataset
from torchvision import datasets # stores samples and their label
from torchvision.transforms import ToTensor, Lambda, Compose
import matplotlib as plt
training_data = datasets.FashionMNIST(root='data', train=True, download=True, transform=ToTensor(), )
test_data = datasets.FashionMNIST(root='data', train=False, download=True, transform=ToTensor(), )
batch_size = 64
train_dataLoader = DataLoader(training_data, batch_size=batch_size)
test_dataLoader = DataLoader(test_data, batch_size=batch_size)
for X, y in test_dataLoader:
print('Shape of X [N,C,H,W]:', X.size())
print('Shape of y:', y.shape, y.dtype)
break
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Using {} device'.format(device))
# to define a NN, we inherit a class from nn.Module
class NeuralNetwork(nn.Module):
def __init__(self):
# will specify how data will proceed in the forward pass
super(NeuralNetwork, self).__init__()
self.flatten = nn.Flatten()
self.linear_relu_stack = nn.Sequential(
nn.Linear(28 * 28, 512),
nn.ReLU(),
nn.Linear(512, 512),
nn.ReLU(),
nn.Linear(512, 10),
nn.ReLU()
)
def forward(self, x):
x = self.flatten(x)
logits = self.linear_relu_stack(x)
return logits
model = NeuralNetwork().to(device)
print(model)
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)
def train(dataloader, model, loss_fn, optimizer):
size = len(dataloader.dataset)
for batch, (X,y) in enumerate(dataloader):
X,y = X.to(device), y.to(device)
#compute prediction error
pred = model(X)
loss = loss_fn(pred, y)
#backprop
optimizer.zero_grad()
loss.backward()
optimizer.step()
if batch%100 ==0:
loss,current = loss.item(), batch * len(X)
print(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}]")
def test(dataloader, model):
size = len(dataloader.dataset)
model.eval()
test_loss, correct = 0,0
with torch.no_grad():
for X, y in dataloader:
X,y = X.to(device), y.to(device)
pred = model(X)
test_loss += loss_fn(pred, y).item()
correct += (pred.argmax(1) == y).type(torch.float).sum().item()
test_loss /= size
correct /= size
print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
epochs = 5
for t in range(epochs):
print(f"Epoch {t+1}\n-------------------------------")
train(train_dataLoader, model, loss_fn, optimizer)
test(test_dataLoader, model)
print("Done!")
torch.save(model.state_dict(), "model.pth")
print("Saved PyTorch Model State to model.pth")
That's what happens when you import another file. All the code gets rerun.
Instead, in your training file:
class FancyNetwork(nn.Module):
[...]
def train():
[train code]
if __name__ == "__main__":
train()
Now when you run this file train() will get called, but when you import this file in another one, train won't get called automatically.

Categories