I am still grappling with PyTorch, having played with Keras for a while (which feels a lot more intuitive).
Anyway - I have the nn.linear model code below, which works fine for just one input feature, where:
inputDim = 1
I am now trying to expand the same code to include 2 features, and so I have included another column in my feature dataframe and also set:
inputDim = 2
However, when I run the code, I get the dreaded error:
RuntimeError: mat1 dim 1 must match mat2 dim 0
This error references line 63, which is:
outputs = model(inputs)
I have gone through several other posts here relating to this dimensionality error, but I still can't see what is wrong with my code. Any help would be appreciated.
The full code looks like this:
import numpy as np
import pandas as pd
import torch
from torch.autograd import Variable
import matplotlib.pyplot as plt
device = 'cuda' if torch.cuda.is_available() else 'cpu'
df = pd.read_csv('Adjusted Close - BAC-UBS-WFC.csv')
x = df[['BAC', 'UBS']]
y = df['WFC']
# number_of_features = x.shape[1]
# print(number_of_features)
x_train = np.array(x, dtype=np.float32)
x_train = x_train.reshape(-1, 1)
y_train = np.array(y, dtype=np.float32)
y_train = y_train.reshape(-1, 1)
class linearRegression(torch.nn.Module):
def __init__(self, inputSize, outputSize):
super(linearRegression, self).__init__()
self.linear = torch.nn.Linear(inputSize, outputSize)
def forward(self, x):
out = self.linear(x)
return out
inputDim = 2
outputDim = 1
learningRate = 0.01
epochs = 500
# Model instantiation
model = linearRegression(inputDim, outputDim)
if torch.cuda.is_available(): model.cuda()
criterion = torch.nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learningRate)
# Model training
loss_series = []
for epoch in range(epochs):
# Converting inputs and labels to Variable
inputs = Variable(torch.from_numpy(x_train).cuda())
labels = Variable(torch.from_numpy(y_train).cuda())
# Clear gradient buffers because we don't want any gradient from previous epoch to carry forward, dont want to cummulate gradients
# get output from the model, given the inputs
outputs = model(inputs)
# get loss for the predicted output
loss = criterion(outputs, labels)
# get gradients w.r.t to parameters
# update parameters
print('epoch {}, loss {}'.format(epoch, loss.item()))
# Calculate predictions on training data
with torch.no_grad(): # we don't need gradients in the testing phase
predicted = model(Variable(torch.from_numpy(x_train).cuda())).cpu().data.numpy()
General advice: For errors with dimension, it usually helps to print out dimensions at each step of the computation.
Most likely in this specific case, you have made mistake in reshaping the input with this x_train = x_train.reshape(-1, 1)
Your input is (N,1) but NN expects (N,2).
I'm new in pytorch, and i have been stuck for a while on this problem. I have trained a CNN for classifying X-ray images. The images can be found in this Kaggle page https://www.kaggle.com/prashant268/chest-xray-covid19-pneumonia/ .
I managed to get good accuracy both on training and test data, but when i try to make predictions on new images i get the same (wrong class) output for every image. Here's my model in detail.
import os
import matplotlib.pyplot as plt
import numpy as np
import torch
import glob
import torch.nn.functional as F
import torch.nn as nn
from torchvision.transforms import transforms
from torch.utils.data import DataLoader
from torch.optim import Adam
from torch.autograd import Variable
import torchvision
import pathlib
from google.colab import drive
epochs = 20
batch_size = 128
learning_rate = 0.001
#Data Transformation
transformer = transforms.Compose([
transforms.Normalize([0.5,0.5,0.5], [0.5,0.5,0.5])
#Load data with DataLoader
train_path = '/content/drive/MyDrive/Chest X-ray (Covid-19 & Pneumonia)/Data/train'
test_path = '/content/drive/MyDrive/Chest X-ray (Covid-19 & Pneumonia)/Data/test'
train_loader = DataLoader(torchvision.datasets.ImageFolder(train_path,transform = transformer), batch_size= batch_size, shuffle= True)
test_loader = DataLoader(torchvision.datasets.ImageFolder(test_path,transform = transformer), batch_size= batch_size, shuffle= False)
root = pathlib.Path(train_path)
classes = sorted([j.name.split('/')[-1] for j in root.iterdir()])
train_count = len(glob.glob(train_path+'/**/*.jpg')) + len(glob.glob(train_path+'/**/*.png')) + len(glob.glob(train_path+'/**/*.jpeg'))
test_count = len(glob.glob(test_path+'/**/*.jpg')) + len(glob.glob(test_path+'/**/*.png')) + len(glob.glob(test_path+'/**/*.jpeg'))
#Create the CNN
class CNN(nn.Module):
def __init__(self):
'''nout = [(width + 2*padding - kernel_size) / stride] + 1 '''
# [128,3,224,224]
self.conv1 = nn.Conv2d(in_channels = 3, out_channels = 12, kernel_size = 5)
# [4,12,220,220]
self.pool1 = nn.MaxPool2d(2,2) #reduces the images by a factor of 2
# [4,12,110,110]
self.conv2 = nn.Conv2d(in_channels = 12, out_channels = 24, kernel_size = 5)
# [4,24,106,106]
self.pool2 = nn.MaxPool2d(2,2)
# [4,24,53,53] which becomes the input of the fully connected layer
self.fc1 = nn.Linear(in_features = (24 * 53 * 53), out_features = 120)
self.fc2 = nn.Linear(in_features = 120, out_features = 84)
self.fc3 = nn.Linear(in_features = 84, out_features = len(classes)) #final layer, output will be the number of classes
def forward(self, x):
x = self.pool1(F.relu(self.conv1(x)))
x = self.pool2(F.relu(self.conv2(x)))
x = x.view(-1, 24 * 53 * 53)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
# Training the model
model = CNN()
loss_function = nn.CrossEntropyLoss() #includes the softmax activation function
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)
n_total_steps = len(train_loader)
for epoch in range(epochs):
n_correct = 0
n_samples = 0
for i, (images, labels) in enumerate(train_loader):
# Forward pass
outputs = model(images)
_, predicted = torch.max(outputs, 1)
n_samples += labels.size(0)
n_correct += (predicted == labels).sum().item()
loss = loss_function(outputs, labels)
# Backpropagation and optimization
optimizer.zero_grad() #empty gradients
acc = 100.0 * n_correct / n_samples
print(f'Epoch [{epoch+1}/{epochs}], Step [{i+1}/{n_total_steps}], Accuracy: {round(acc,2)} %, Loss: {loss.item():.4f}')
# Testing the model
with torch.no_grad():
n_correct = 0
n_samples = 0
n_class_correct = [0 for i in range(3)]
n_class_samples = [0 for i in range(3)]
for images, labels in test_loader:
outputs = model(images)
# max returns (value ,index)
_, predicted = torch.max(outputs, 1)
n_samples += labels.size(0)
n_correct += (predicted == labels).sum().item()
acc = 100.0 * n_correct / n_samples
print(f'Accuracy of the network: {acc} %')
torch.save(model.state_dict(),'/content/drive/MyDrive/Chest X-ray (Covid-19 & Pneumonia)/model.model')
For loading the model and trying to make predictions on new images, the code is as follows:
checkpoint = torch.load('/content/drive/MyDrive/Chest X-ray (Covid-19 & Pneumonia)/model.model')
model = CNN()
#Data Transformation
transformer = transforms.Compose([
transforms.Normalize([0.5,0.5,0.5], [0.5,0.5,0.5])
#Making preidctions on new data
from PIL import Image
def prediction(img_path,transformer):
image = Image.open(img_path).convert('RGB')
image_tensor = transformer(image)
image_tensor = image_tensor.unsqueeze_(0) #so img is not treated as a batch
input_img = Variable(image_tensor)
output = model(input_img)
index = output.data.numpy().argmax()
pred = classes[index]
return pred
pred_path = '/content/drive/MyDrive/Chest X-ray (Covid-19 & Pneumonia)/Test_images/Data/'
test_imgs = glob.glob(pred_path+'/*')
for i in test_imgs:
I'm guessing the problem must be in the way that i am preprocessing the data, although i cannot find my mistake. Any help will be deeply appreciated, since i have been stuck on this for a while now.
p.s. i can share my notebook as well, if it is of any help
Regarding your problem, I have a really good way to debug this to target where the problem most likely will be and so it will be really easy to fix your issue.
So, my debugging process would be based on the fact that your CNN performs well on the test set. Firstly set your test loader batch size to 1 temporarily. After that, One thing to do is in your test loop when you calculate the amount correct, you can run the following code:
#Your code
outputs = model(images) # Really only one image and 1 output.
#Altered Code:
correct = (predicted == labels).sum().item() # This will be either 1 or 0 since you have only one image per batch
# My new code:
if correct:
# if value is 1 instead of 0 then turn value into a single image with no batch size
single_correct_image = images.squeeze(0)
# Then convert tensor image into PIL image
pil_image = transforms.ToPILImage()(single_correct_image)
# Save the pil image to any directory specified in quotes.
pil_image = pil_image.save("/content")
#Terminate testing process. Ignore Value Error if it says terminating process
raise ValueError("terminating process")
Now you have an image saved to disk that you know is correct in the test set. The next step would be to open such image and run it to your predict function. Couple of things can happen and thus give info about your situation
If your model returns the wrong answer then there is something wrong with the different code you have within the prediction and testing code. One uses a torch.sum and torch.max the other uses np.argmax.Then you can use print statements to debug what is going on there. Perhaps some conversion error or your expectation of the output's format is different.
If your code return the right answer then your model is just failing to predict on new images. I suggest running more trial cases with the above process.
For additional reference, if you still get very stuck to the point where you feel like you can't solve it, then I suggest using this notebook to guide and give some suggestions on what code to atleast inspect.
Sarthak Jain
I'm new to Pytorch and I'm trying to implemente a simple CNN to recognize MNIST images.
I'm training the network using MSE Loss as loss function and SGD as optimizer. When I get to the training it gives me the following
warning: " UserWarning: Using a target size (torch.Size([64])) that is different to the input size (torch.Size([64, 10])). This will likely lead to incorrect results due to broadcasting. Please ensure they have the same size."
And then I get the following
error: "RuntimeError: The size of tensor a (10) must match the size of tensor b
(64) at non-singleton dimension 1".
I've tried to solve it using some solutions I've found in other questions but nothing seems to work. Here's the code of how I load the dataset:
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,),(0.5,))])
trainset = torchvision.datasets.MNIST(root='./data', train = True, transform = transform, download = True)
trainloader = torch.utils.data.DataLoader(trainset, batch_size = 64, shuffle = True)
testset = torchvision.datasets.MNIST(root='./data', train = False, transform = transform, download = True)
testloader = torch.utils.data.DataLoader(testset, batch_size = 64, shuffle = False)
The code to define my network:
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
#Convolutional layers
self.conv1 = nn.Conv2d(1, 6, 5)
self.conv2 = nn.Conv2d(6, 12, 5)
#Fully connected layers
self.fc1 = nn.Linear(12*4*4, 120)
self.fc2 = nn.Linear(120, 60)
self.out = nn.Linear(60,10)
def forward(self, x):
x = F.max_pool2d(F.relu(self.conv1(x)), (2,2))
x = F.max_pool2d(F.relu(self.conv2(x)), (2,2))
x = x.reshape(-1, 12*4*4)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.out(x)
return x
And this is the training:
net = Net()
criterion = nn.MSELoss()
optimizer = optim.SGD(net.parameters(), lr=0.001)
epochs = 3
for epoch in range(epochs):
running_loss = 0;
for images, labels in trainloader:
output = net(images)
loss = criterion(output, labels)
running_loss += loss.item()
print(f"Training loss: {running_loss/len(trainloader)}")
print('Finished training')
Thank you!
The loss you're using (nn.MSELoss) is incorrect for this problem. You should use nn.CrossEntropyLoss.
Mean Squared Loss measures the mean squared error between input x and target y. Here the input and target naturally should be of the same shape.
Cross Entropy Loss computes the probability over the classes for each image. The output would be a matrix N x C and target would be a vector of size N. (N = batch size, C = number of classes)
Since your aim is to classify the image, this is what you'll want to use.
In your case, your network output will be a matrix of size 64 x 10 and target is a vector of size 64. Each row of the output matrix (after applying the softmax function) indicates the probability of that class after which the Cross entropy loss is computed. Pytorch's nn.CrossEntropyLoss combines both the softmax operation with the loss computation.
You can refer the documentation here for more info on how Pytorch computes losses.
I agree with #AshwinNair advise and I did change in for loop in train and eval section as below it work for me.
for i, (img, label) in enumerate(dataloader):
img = img.to(device)
label = label.to(device)`
I'm learning how to use pytorch and I was able to get a grasp on the overall process of construction and execution of ML models. However, what I am not able to grasp is how to "format" or "reshape" the data before executing the model. I keep getting errors like:
RuntimeError: size mismatch, m1: [1 x 700], m2: [1 x 1] at c:\programdata\miniconda3\conda-bld\pytorch_1524543037166\work\aten\src\th\generic/THTensorMath.c:2033
Expected object of type Variable[torch.DoubleTensor] but found type Variable[torch.FloatTensor] for argument #1 ‘mat2’
So, I have a csv file named "train.csv" with attributes called 'x' and 'y' and there are 700 samples in it, I want to perform a simple linear regression on the data, and I parse data from it using pandas, how do I format or reshape the data such that it will execute smoothly? How does pytorch iterate through input data?
The recent code i executed is:
import torch
import torch.nn as nn
from torch.autograd import Variable
import pandas as pd
class Linear_Reg(nn.Module):
def __init__(self, inp_sz, out_sz):
super(Linear_Reg, self).__init__()
self.linear = nn.Linear(inp_sz, out_sz)
def forward(self, x):
out = self.linear(x)
return out
train = pd.read_csv('C:\\Users\\hgstr\\Jupyter_Files\\Data_Sets\\linear_regression\\train.csv')
test = pd.read_csv('C:\\Users\\hgstr\\Jupyter_Files\\Data_Sets\\linear_regression\\test.csv')
x_train = torch.Tensor(train['x'])
y_train = torch.Tensor(train['y'])
x_test = torch.Tensor(test['x'])
y_test = torch.Tensor(test['y'])
x_train = torch.Tensor(x_train)
x_train = x_train.view(1,-1)
input_sz = 1;
output_sz = 1
epochs = 60
learning_rate = 0.001
model = Linear_Reg(input_sz, output_sz)
crit = nn.MSELoss()
opt = torch.optim.SGD(model.parameters(), learning_rate)
for e in range(epochs):
out = model(x_train)
loss = crit(out, y_train)
print('epoch {}, loss {}'.format(e,loss.data[0]))
And it gave out the following:
RuntimeError: size mismatch, m1: [1 x 700], m2: [1 x 1] at c:\programdata\miniconda3\conda-bld\pytorch_1524543037166\work\aten\src\th\generic/THTensorMath.c:2033
According to the error, I believe that your data is not correctly formatted. The tensor should be in the form [700, 2] (batch x data) and yours is [1, 700] (data x batch). This makes the model 'think' that you are adding only one entry as training with 700 features instead of 700 entries with only 1 feature.
Reshaping the x_train variable should make the code work. Just remove the line x_train = x_train.view(1,-1).
Regarding the second error, it can be that after reading the .csv into a variable its type is Double (due to pd.read_csv) while in pytorch by default Tensors are created as floats. I think that casting your input data before feeding it to the model should be enough: model(x_train.float()) or specifying it in the Tensor creation part x_train = torch.FloatTensor(train['x']). Note that you should cast all the Tensors that are not Floats.
edit: This piece of code works for me
import torch
import torch.nn as nn
import pandas as pd
class Linear_Reg(nn.Module):
def __init__(self, inp_sz, out_sz):
super(Linear_Reg, self).__init__()
self.linear = nn.Linear(inp_sz, out_sz)
def forward(self, x):
out = self.linear(x)
return out
train = pd.read_csv('yourpath')
test = pd.read_csv('yourpath')
x_train = torch.Tensor(train['x']).to(torch.float).view(700, 1)
y_train = torch.Tensor(train['y']).to(torch.float).view(700, 1)
x_test = torch.Tensor(test['x']).to(torch.float).view(300, 1)
y_test = torch.Tensor(test['y']).to(torch.float).view(300, 1)
# ================================
input_sz = 1;
output_sz = 1
epochs = 60
learning_rate = 0.001
# ================================
model = Linear_Reg(input_sz, output_sz)
crit = nn.MSELoss()
opt = torch.optim.SGD(model.parameters(), learning_rate)
for e in range(epochs):
out = model(x_train)
loss = crit(out, y_train)
print('epoch {}, loss {}'.format(e, loss.data[0]))
I'm pretty new to tensorflow and trying to do some experiments with the Iris dataset. I created following model function (MWE):
def model_fn(features, labels, mode):
net = tf.feature_column.input_layer(features, [tf.feature_column.numeric_column(key=key) for key in FEATURE_NAMES])
logits = tf.layers.dense(inputs=net, units=3)
loss = tf.nn.softmax_cross_entropy_with_logits(labels=labels, logits=logits)
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)
train_op = optimizer.minimize(
return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)
Unfortunately I get the following error:
InvalidArgumentError: Input to reshape is a tensor with 256 values, but the requested shape has 1
[[Node: Reshape = Reshape[T=DT_FLOAT, Tshape=DT_INT32, _device="/job:localhost/replica:0/task:0/device:CPU:0"](softmax_cross_entropy_with_logits_sg, Reshape/shape)]]
Seems to be some problem with the shapes of the tensors. However both logits and labels have an equal shape of (256, 3) - as it is required by the documentation. Also both tensors have type float32.
Just for the sake of completeness, here is the input function for the estimator:
import pandas as pd
import tensorflow as tf
import numpy as np
IRIS_DATA = "data/iris.csv"
FEATURE_NAMES = ["sepal_length", "sepal_width", "petal_length", "petal_width"]
CLASS_NAME = ["class"]
# read dataset
iris = pd.read_csv(IRIS_DATA, header=None, names=COLUMNS)
# encode classes
iris["class"] = iris["class"].astype('category').cat.codes
# train test split
msk = np.random.rand(len(iris)) < 0.8
train = iris[msk]
test = iris[~msk]
def iris_input_fn(batch_size=256, mode="TRAIN"):
def prepare_input(data=None):
#do mean normaization across all samples
mu = np.mean(data)
sigma = np.std(data)
data = data - mu
data = data / sigma
is_nan = np.isnan(data)
is_inf = np.isinf(data)
if np.any(is_nan) or np.any(is_inf):
print('data is not well-formed : is_nan {n}, is_inf: {i}'.format(n= np.any(is_nan), i=np.any(is_inf)))
data = transform_data(data)
return data
def transform_data(data):
data = data.astype(np.float32)
return data
def load_data():
global train
return (trn_all_data.astype(np.float32),
data, labels = load_data()
data = prepare_input(data)
labels = tf.one_hot(labels, depth=3)
labels = tf.cast(labels, tf.float32)
dataset = tf.data.Dataset.from_tensor_slices((data.to_dict(orient="list"), labels))
dataset = dataset.shuffle(1000).repeat().batch(batch_size)
return dataset.make_one_shot_iterator().get_next()
Dataset from UCI repo
Solved the problem by replacing the loss function from nn module:
loss = tf.nn.softmax_cross_entropy_with_logits(labels=labels, logits=logits)
by the loss function of losses module
loss = tf.losses.softmax_cross_entropy(onehot_labels=labels, logits=logits)
or by
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=labels, logits=logits))
loss which is fed to the minimize method of GradientDescentOptimizer needed to be a scalar. A single value for the whole batch.
Problem was, I computed the softmax cross entropy for each element in the batch, which resulted in a tensor containing 256 (batch size) cross entropy values, and tried to feed this in the minimize method. Therefore the error message
Input to reshape is a tensor with 256 values, but the requested shape has 1
I want to train, evaluate the accuracy and eventually predict with my model. This is my first time using high level APIs such as tf.estimator.
I'm getting a value error from estimator.train(train_input_fn):
'ValueError: features should be a dictionary of `Tensor's. Given type: '
I'm not sure what is going on here. My model is taking 3 inputs and producing a binary output from one neuron.
Before this error I was getting an error about the requested shape not equal to the actual shape, or something along those lines. I fixed it by reducing the batchSize down to 1, instead of 100. I'm sure this isn't going to do so well when it comes to training though.
Any ideas? Heres my code:
import tensorflow as tf
import numpy as np
import sys
sys.path.insert(0, '/Users/blairburns/Documents/DeepLearning/BackgroundColourPredictor/Dataset/Testing/')
sys.path.insert(0, '/Users/blairburns/Documents/DeepLearning/BackgroundColourPredictor/Dataset/Training/')
#other files
from TestDataNormaliser import *
from TrainDataNormaliser import *
learning_rate = 0.01
trainingIteration = 15
batchSize = 1
displayStep = 2
#Layers using tf.layers
def get_logits(features):
l1 = tf.layers.dense(features, 3, activation=tf.nn.relu)
l2 = tf.layers.dense(l1, 4, activation=tf.nn.relu)
l3 = tf.layers.dense(l2, 1, activation=None)
a = l3
return a
#cost function
def get_loss(a, labels):
#cross_entropy = tf.reduce_mean(-tf.reduce_sum(y * tf.log(a)))
return tf.nn.sigmoid_cross_entropy_with_logits(logits=a, labels=labels)
#cross_entropy = tf.reduce_mean((l3 - y)**2)
#cross_entropy = -tf.reduce_sum(y*tf.log(a))-tf.reduce_sum((1-y)*tf.log(1-a))
def get_train_op(loss):
learning_rate = 1e-3
optimizer = tf.train.RMSPropOptimizer(learning_rate)
return optimizer.minimize(loss, global_step=tf.train.get_global_step())
def get_inputs(feature_data, label_data, batch_size, n_epochs=None, shuffle=True):
dataset = tf.data.Dataset.from_tensor_slices(
(feature_data, label_data))
dataset = dataset.repeat(n_epochs)
if shuffle:
dataset = dataset.shuffle(len(feature_data))
dataset = dataset.batch(batch_size)
features, labels = dataset.make_one_shot_iterator().get_next()
return features, labels
def model_fn(features, labels, mode):
a = get_logits(features)
loss = get_loss(a, labels)
train_op = get_train_op(loss)
predictions = tf.greater(a, 0)
accuracy = tf.metrics.accuracy(labels, predictions)
return tf.estimator.EstimatorSpec(
eval_metric_ops={'Accuracy': accuracy},
def train_input_fn():
return get_inputs(
def eval_input_fn():
return get_inputs(
model_dir = './savedModel'
estimator = tf.estimator.LinearRegressor(feature_columns=[model_fn, model_dir])
#estimator.train(train_input_fn, max_steps=1)
Your problem is this line:
estimator = tf.estimator.LinearRegressor(feature_columns=[model_fn, model_dir])
You need to set the feature_columns argument to an array of feature columns. A feature column tells the estimator about the data you're feeding it.
It looks like all your input data is numeric, so I'd call tf.feature_column.numeric_column to create your feature column(s). The documentation is here. For example, the following code creates a numeric feature column containing x-coordinates:
xcol = tf.feature_column.numeric_column('x')
If all your estimator needs are x-coordinates, then you could create the estimator with the following code:
estimator = tf.estimator.LinearRegressor(feature_columns=[xcol])