I have a problem that I cannot understand: even though a module ‘torch_geometric.transforms’ has an attribute ‘AddTrainValTestMask’ according to documentation , I cannot import it. I keep receiving an error AttributeError: module 'torch_geometric.transforms' has no attribute 'AddTrainValTestMask
My Pytorch version is 1.7.1
I took the code from here
Minimum reproducible example:
import os.path as osp
import torch
import torch.nn.functional as F
from torch_geometric.datasets import Planetoid
import torch_geometric.transforms as T
from torch_geometric.nn import SplineConv
dataset = 'Cora'
transform = T.Compose([
T.AddTrainValTestMask('train_rest', num_val=500, num_test=500),
T.TargetIndegree(),
])
path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', dataset)
dataset = Planetoid(path, dataset, transform=transform)
data = dataset[0]
class Net(torch.nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = SplineConv(dataset.num_features, 16, dim=1, kernel_size=2)
self.conv2 = SplineConv(16, dataset.num_classes, dim=1, kernel_size=2)
def forward(self):
x, edge_index, edge_attr = data.x, data.edge_index, data.edge_attr
x = F.dropout(x, training=self.training)
x = F.elu(self.conv1(x, edge_index, edge_attr))
x = F.dropout(x, training=self.training)
x = self.conv2(x, edge_index, edge_attr)
return F.log_softmax(x, dim=1)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model, data = Net().to(device), data.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-3)
def train():
model.train()
optimizer.zero_grad()
F.nll_loss(model()[data.train_mask], data.y[data.train_mask]).backward()
optimizer.step()
def test():
model.eval()
log_probs, accs = model(), []
for _, mask in data('train_mask', 'test_mask'):
pred = log_probs[mask].max(1)[1]
acc = pred.eq(data.y[mask]).sum().item() / mask.sum().item()
accs.append(acc)
return accs
for epoch in range(1, 201):
train()
log = 'Epoch: {:03d}, Train: {:.4f}, Test: {:.4f}'
print(log.format(epoch, *test()))
Can anybody explain to me the problem?
It has been renamed to RandomNodeSplit in the latest version of torch_geometric. You can directly use RandomNodeSplit to replace it.
Related
I am working on a multiclass image classification problem that has 4k labeled images. Currently, I am using cross-validation.However, I want to try nested-cross-validation for the problem in pytorch but I couldn't find it.
Is it possible to use nested-cross-validation in pytorch? if so, how?
I built a simple pipeline below. Is it correct to implement nested-cross-validation like this.
import torch
from torch.utils.data import DataLoader,SubsetRandomSampler
from sklearn.model_selection import KFold
from torchvision import datasets
input_size = (256,3,224,244)
target_size = (256,)
class Dataset(datasets.VisionDataset):
def __init__(self):
super().__init__(self)
self.images = torch.rand(input_size).float()
self.targets = torch.randint(0,3,target_size)
def __getitem__(self, index: int) -> any:
return self.images[index],self.targets[index]
def __len__(self) -> int:
return len(self.images)
class BasicModel(torch.nn.Module):
def __init__(self) -> None:
super(BasicModel,self).__init__()
self.conv = torch.nn.Conv2d(3,16,kernel_size=(5,5))
self.adp = torch.nn.AdaptiveAvgPool2d(1)
self.linear = torch.nn.Linear(16,3)
def forward(self,x):
x = self.conv(x)
x = self.adp(x)
x = x.view(x.size(0),-1)
x = self.linear(x)
return x
data_ids = [*range(input_size[0])]
data = Dataset()
model = BasicModel()
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(),lr=0.01)
k_fold = 5
kfold_test = KFold(n_splits=k_fold, shuffle=True)
num_epochs = 2
for test_fold,(remain_ids,test_ids) in enumerate(kfold_test.split(data_ids)):
test_sampler = SubsetRandomSampler(test_ids)
testLoader = DataLoader(data,sampler=test_sampler,batch_size=2)
kfold_val = KFold(n_splits=k_fold-1, shuffle=True)
for epoch in range(num_epochs):
for val_fold,(train_ids,val_ids) in enumerate(kfold_val.split(remain_ids)):
train_sampler = SubsetRandomSampler(train_ids)
trainLoader = DataLoader(data,sampler=train_sampler,batch_size=2)
val_sampler = SubsetRandomSampler(val_ids)
valLoader = DataLoader(data,sampler=val_sampler,batch_size=2)
model.train()
for image,target in trainLoader:
with torch.cuda.amp.autocast():
output = model(image)
loss = criterion(output,target)
optimizer.zero_grad()
loss.backward()
optimizer.step()
print(f"train loss:{loss.item()}")
model.eval()
with torch.no_grad():
for image,target in valLoader:
output = model(image)
loss = criterion(output,target)
print(f"test loss:{loss.item()}")
model.eval()
with torch.no_grad():
for image,target in testLoader:
output = model(image)
loss = criterion(output,target)
print(f"test loss:{loss.item()}")
while running this code for my dataset , I am getting errors. my data head looks like this
fridge_temperature temp_condition label
0 13.10 high 0
1 8.65 high 0
2 2.00 low 0
3 4.80 low 0
4 10.70 high 0
and this the shape of my data (587076, 3)
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split
# --------------- Dataset ---------------
class StudentsPerformanceDataset(Dataset):
"""Students Performance dataset."""
def __init__(self, csv_file):
"""Initializes instance of class StudentsPerformanceDataset.
Args:
csv_file (str): Path to the csv file with the students data.
"""
df = pd.read_csv("Z:/new_file.csv")
# Drop the column you want to remove
df = df.drop('date_time', axis=1)
df = df.drop('type', axis=1)
print(df.head())
print(df.shape)
# Grouping variable names
self.categorical = ["temp_condition"]
self.target = "label"
# One-hot encoding of categorical variables
self.students_frame = pd.get_dummies(df, prefix=self.categorical)
# Save target and predictors
self.X = self.students_frame.drop(self.target, axis=1)
self.y = self.students_frame[self.target]
def __len__(self):
return len(self.students_frame)
def __getitem__(self, idx):
# Convert idx from tensor to list due to pandas bug (that arises when using pytorch's random_split)
if isinstance(idx, torch.Tensor):
idx = idx.tolist()
return [self.X.iloc[idx].values, self.y[idx]]
# --------------- Model ---------------
class Net(nn.Module):
def __init__(self, D_in, H=15, D_out=1):
super().__init__()
self.fc1 = nn.Linear(D_in, H)
self.fc2 = nn.Linear(H, D_out)
self.relu = nn.ReLU()
def forward(self, x):
x = self.fc1(x)
x = self.relu(x)
x = self.fc2(x)
return x.squeeze()
""" class Net(nn.Module):
def __init__(self, D_in, H=15, D_out=1):
super().__init__()
self.fc1 = nn.Linear(D_in, H)
self.fc2 = nn.Linear(H, D_out)
self.relu = nn.ReLU()
def forward(self, x):
x = self.fc1(x)
x = self.relu(x)
x = self.fc2(x)
return x.squeeze() """
# --------------- Training ---------------
def train(csv_file, n_epochs=100):
"""Trains the model.
Args:
csv_file (str): Absolute path of the dataset used for training.
n_epochs (int): Number of epochs to train.
"""
# Load dataset
dataset = StudentsPerformanceDataset(csv_file)
# Split into training and test
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
trainset, testset = random_split(dataset, [train_size, test_size])
# Dataloaders
trainloader = DataLoader(trainset, batch_size=200, shuffle=True)
testloader = DataLoader(testset, batch_size=200, shuffle=False)
# Use gpu if available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# Define the model
# Define the model
D_in, H = 3, 15
net = Net(D_in, H).to(device)
#D_in, H = 19, 15
#net = Net(D_in, H).to(device)
# Loss function
criterion = nn.MSELoss()
# Optimizer
optimizer = optim.Adam(net.parameters(), weight_decay=0.0001)
# Train the net
loss_per_iter = []
loss_per_batch = []
for epoch in range(n_epochs):
running_loss = 0.0
for i, (inputs, labels) in enumerate(trainloader):
inputs = inputs.to(device)
labels = labels.to(device)
# Zero the parameter gradients
optimizer.zero_grad()
# Forward + backward + optimize
outputs = net(inputs.float())
loss = criterion(outputs, labels.float())
loss.backward()
optimizer.step()
# Save loss to plot
running_loss += loss.item()
loss_per_iter.append(loss.item())
loss_per_batch.append(running_loss / (i + 1))
running_loss = 0.0
# Comparing training to test
dataiter = iter(testloader)
inputs, labels = dataiter.next()
inputs = inputs.to(device)
labels = labels.to(device)
outputs = net(inputs.float())
print("Root mean squared error")
print("Training:", np.sqrt(loss_per_batch[-1]))
print("Test", np.sqrt(criterion(labels.float(), outputs).detach().cpu().numpy()))
# Plot training loss curve
plt.plot(np.arange(len(loss_per_iter)), loss_per_iter, "-", alpha=0.5, label="Loss per epoch")
plt.plot(np.arange(len(loss_per_iter), step=4) + 3, loss_per_batch, ".-", label="Loss per mini-batch")
plt.xlabel("Number of epochs")
plt.ylabel("Loss")
plt.legend()
plt.show()
if __name__ == "__main__":
import os
import sys
import argparse
# By default, read csv file in the same directory as this script
csv_file = os.path.join(sys.path[0], "Z:/new_file.csv")
# Parsing arguments
parser = argparse.ArgumentParser()
parser.add_argument("--file", "-f", nargs="?", const=csv_file, default=csv_file,
help="Dataset file used for training")
parser.add_argument("--epochs", "-e", type=int, nargs="?", default=100, help="Number of epochs to train")
args = parser.parse_args()
# Call the main function of the script
train(args.file, args.epochs)
I am getting this error return F.linear(input, self.weight,
self.bias) RuntimeError: mat1 and mat2 shapes cannot be multiplied
(200x7 and 3x15)
I'm currently trying to train a Recurrent Neural Network with PyTorch and I am having trouble managing the DataLoader. Let's start from the beginning.
import matplotlib.pyplot as plt
import numpy as np
import torch
T = 50 #period
t = 300 #time
timeStep = np.linspace(0,t,300)
mu = 0
sigma = np.sqrt(0.001)
x1 = []
x2 = []
for s in timeStep:
eps1 = np.random.randn(1)*sigma+mu
eps2 = np.random.randn(1)*sigma+mu
x1.append(np.cos(2*s*np.pi/T)+eps1)
x2.append(np.sin(4*s*np.pi/T)+eps2)
from torch import nn
from torch.optim.lr_scheduler import LambdaLR
from torch.utils.data import Dataset, DataLoader
class Data(torch.utils.data.Dataset):
def __init__(self):
for sample in range(10):
self.X = torch.from_numpy(np.stack([x1, x2], axis=1).reshape([-1, 2, 2])).float()
self.Y = torch.from_numpy(np.append(np.delete(self.X, 0, axis=0), self.X[1].reshape([1, 2, 2]), axis=0)).float()
print(self.X.shape,self.Y.shape)
def __len__(self):
return len(self.X)
def __getitem__(self, index):
feature = self.X[index]
label = self.Y[index]
return feature, label
dataset = Data()
At this point, dataset.X.shape,dataset.Y.shape gives [150,2,2], [150,2,2]. So up until here, that's what I need to get so no problem. (I get 2 samples of 150 time series data from a 300 data time series).
from torch.autograd import Variable
from typing import Tuple
class Recurrent(nn.Module):
def __init__(self, hidden_dim: int = 20):
super().__init__()
self.hidden_dim: int = hidden_dim
self.hidden: Tuple[Variable, Variable] = self.init_hidden()
self.rnn = nn.LSTM(2, self.hidden_dim)
self.fc = nn.Sequential(
nn.ReLU(),
nn.Linear(self.hidden_dim, 2)
)
def forward(self, x: torch.Tensor) -> torch.Tensor:
x, hidden = self.rnn(x, self.hidden)
self.hidden = (Variable(hidden[0].data), Variable(hidden[1].data))
x = self.fc(x)
return x
def init_hidden(self) -> Tuple[Variable, Variable]:
return (
Variable(torch.zeros(1, 2, self.hidden_dim)),
Variable(torch.zeros(1, 2, self.hidden_dim))
)
def fit(model, dataset, batch_size=2, epochs = 100, loss_print_per_epoch = 10):
def _w(worker_id):
np.random.seed(np.random.get_state()[1][0] + worker_id)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
scheduler = LambdaLR(optimizer, lr_lambda=lambda _e: 0.97 ** _e)
train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
def _train_model(_m, _d):
_m.train()
train_batch_loss = []
for x, y in _d:
optimizer.zero_grad()
output = _m(x)
print(output.shape)
loss = criterion(output, y)
loss.backward(retain_graph=True)
optimizer.step()
train_batch_loss.append(loss.item())
return _m, np.mean(train_batch_loss)
for epoch in range(epochs+1):
model, train_loss = _train_model(model, train_loader)
if epoch % loss_print_per_epoch == 0:
print(f'epoch: {epoch}/{epochs} loss: {train_loss} lr: {scheduler.get_last_lr()[0]}')
scheduler.step()
return model
model = fit(model=Recurrent(), dataset=dataset, batch_size=2, epochs=100)
When I iterate through the DataLoader with my for loop, this is where the problem comes up. output.shape should be [150,2,2] for batch size = 2 and [150,1,2] for batch size = 1. But using the print in the for loop gives me a shape of [2,2,2] and I have no idea why. If anyone could help me understand what is going on here, it would be a great help.
I’m just getting started with pytorch. I am trying to do a simple binary classification project with the cats and dogs dataset. After much fumbling around, I was able to get the model to train, but I’m not getting the expected results.
First, the loss starts out way too low. To me, that seems to indicate I’m not measuring loss correctly.
Second, the model just predicts everything as 0.
I’m sure there are many mistakes here, but I would appreciate it if someone could take a look and let me know what I’m doing wrong. Thank you!
import torch
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.nn.functional as F
from torchvision.io import read_image
from torch.utils.data import Dataset, DataLoader
from torchvision.utils import make_grid
from torchvision.utils import save_image
from sklearn.model_selection import train_test_split
import os
import numpy as np
from sklearn import preprocessing
import glob
import cv2
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)
IMAGE_SIZE = 64
DATA_DIR = "C:\\Users\\user\\source\\repos\\pytorch-youtube\\data\\catsdogs\\PetImages\\"
LABELS = ('cat', 'dog')
# custom dataset class
# expects the root folder to have sub folders with class names
# and pictures of classes inside folder
class CustomImageDataset(Dataset):
def __init__(self):
self.imgs_path = DATA_DIR
file_list = glob.glob(self.imgs_path + "*")
self.data = []
for class_path in file_list:
class_name = class_path.split("\\")[-1]
for img_path in glob.glob(class_path + "\\*.jpg"):
self.data.append([img_path, class_name])
self.class_map = {"Dog": 0, "Cat": 1}
self.img_dim = (IMAGE_SIZE, IMAGE_SIZE)
def __len__(self):
return len(self.data)
def __getitem__(self, idx):
img_path, class_name = self.data[idx]
# this is to handle corrupt images in the dataset
# could probably be handled better
try:
img = cv2.imread(img_path)
img = cv2.resize(img, self.img_dim)
except:
img_path, class_name = self.data[idx+1]
img = cv2.imread(img_path)
img = cv2.resize(img, self.img_dim)
class_id = self.class_map[class_name]
img_tensor = torch.from_numpy(img)
img_tensor = img_tensor.permute(2, 0, 1) # not exactly sure what/why for this line
class_id = torch.tensor([class_id])
return img_tensor, class_id
# as is, we aren't using these
transform = transforms.Compose(
[transforms.Resize((64, 64)),
transforms.ConvertImageDtype(torch.float32),
transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5)),
]
)
dataset = CustomImageDataset()
dataloader = DataLoader(dataset, batch_size=4, shuffle=True)
dataiter = iter(dataloader)
train_features, train_labels = dataiter.next()
class Net(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(3, 6, 5)
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(2704, 128) # only way I got input size was by running code
self.fc2 = nn.Linear(128, 64)
self.fc3 = nn.Linear(64, 2)
def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = torch.flatten(x, 1)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
net = Net()
# net.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(net.parameters(), lr=0.001)
for epoch in range(2):
running_loss = 0.0
for i, data in enumerate(dataloader, 0):
inputs, labels = data
# this is the fix for "expected scalar type Byte but found Float"
# this seems to completely destroy the features in the image to just white
inputs = inputs.float()
optimizer.zero_grad()
outputs = net(inputs)
loss = criterion(outputs, torch.max(labels,1)[1])
loss.backward()
optimizer.step()
running_loss += loss.item()
if i % 2000 == 1999: # print every 2000 mini-batches
print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.10f}')
running_loss = 0.0
print("finished")
# save the model
PATH = './custom_trained_model_dogs_cats.pth'
torch.save(net.state_dict(), PATH)
It seems I was passing in the wrong thing to my loss function. I changed this line
loss = criterion(outputs, torch.max(labels,1)[1])
to this
loss = criterion(outputs, torch.max(labels,1)[0])
and everything seems to be working. I'm able to correctly classify the cats and dogs.
F.nll_loss: I am getting
AttributeError: 'int' object has no attribute 'size'
when I try to run this code. I also get a snippet of the module code.
raise ValueError('Expected 2 or more dimensions (got {})'.format(dim))
if input.size(0) != target.size(0):
raise ValueError('Expected input batch_size ({}) to match target batch_size ({}).'
format(input.size(0), target.size(0)))
import torch
from torchvision import transforms, datasets
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pylab as plt
train_dataset = datasets.MNIST(root = '', train =True, download = True,
transform =transforms.Compose([transforms.ToTensor()]))
test_dataset = datasets.MNIST(root ='', download =True, train =False,
transform =transforms.Compose([transforms.ToTensor()]))
batch_size = 10
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size, shuffle =True)
test_dataset = torch.utils.data.DataLoader(test_dataset, batch_size, shuffle =True)
class Net(nn.Module):
def __init__(self):
super().__init__()
self.fc1 = nn.Linear(28*28, 64)
self.fc2 = nn.Linear(64,64)
self.fc3 = nn.Linear(64,64)
self.fc4 = nn.Linear(64,10)
def forward(self, x):
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = F.relu(self.fc2(x))
x = self.fc4(x)
return F.log_softmax(x, dim=1)
x=torch.rand((28,28))
x=x.view(-1,28*28)
net =Net()
out=net(x)
out
import torch.optim as optim
optimizer =optim.Adam(net.parameters(), lr=0.001)
EPOCHS = 3
for epoch in range(EPOCHS):
for data in train_dataset:
x, y = data
net.zero_grad()
x=x.view(-1, 28*28)
output = net(x)
loss = F.nll_loss(output, y)
loss.backward()
optimizer.step()
print(loss)
Just change the for loop from:
for data in train_dataset:
to
for data in train_loader: