Pytorch - Problem with fine tune training from custom features and classes - python

The core of my problem is the fact that my features come from NumPy files (.npy).
Therefore I need the following class in my code
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
from torch.utils.data import Dataset, DataLoader
from torchvision.models import resnet50
import time
import copy
class MyDataSet(torch.utils.data.Dataset):
def __init__(self, x, y, transform=None):
super(MyDataSet, self).__init__()
# store the raw tensors
self._x = np.load(x)
self._y = np.load(y)
self.transform = transform
def __len__(self):
# a DataSet must know it size
return self._x.shape[0]
def __getitem__(self, index):
x = self._x[index, :]
y = self._y[index, :]
return x, y
To convert my NumPy files to DataLoaders I do the following. The code below seems to work (at least, no errors are returned)
#Transform dataset
transform = transforms.Compose([transforms.ToTensor()])
dataset = MyDataSet("train1-features.npy","train1-classes.npy",transform=transform)
dataloader = DataLoader(dataset, batch_size=32)
I am trying to fine-tune a RESNET-50 network in these data with 12 classes. Here is what I do
def set_parameter_requires_grad(model, feature_extracting):
if feature_extracting:
for param in model.parameters():
param.requires_grad = False
feature_extract = True
batch_size = 8
num_epochs = 15
num_classes=12
model_ft = resnet50(pretrained=True)
set_parameter_requires_grad(model_ft, feature_extract)
num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Linear(num_ftrs, num_classes)
input_size = 224
if torch.cuda.is_available():
model_ft.cuda()
params_to_update = model_ft.parameters()
print("Params to learn:")
if feature_extract:
params_to_update = []
for name,param in model_ft.named_parameters():
if param.requires_grad == True:
params_to_update.append(param)
print("\t",name)
else:
for name,param in model_ft.named_parameters():
if param.requires_grad == True:
print("\t",name)
# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(params_to_update, lr=0.001, momentum=0.9)
# Setup the loss fxn
criterion = nn.CrossEntropyLoss()
Finally, here is the problematic training function
for epoch in range(num_epochs): # loop over the dataset multiple times
running_loss = 0.0
for i, data in enumerate(dataloader, 0):
# get the inputs; data is a list of [inputs, labels]
inputs, labels = data
#transfer labels and inputs to cuda()
inputs,labels=inputs.cuda(), labels.cuda()
# zero the parameter gradients
optimizer.zero_grad()
# forward + backward + optimize
outputs = model_ft(inputs)
loss = loss_func(outputs, labels)
loss.backward()
optimizer.step()
# print statistics
running_loss += loss.item()
if i % 2000 == 1999: # print every 2000 mini-batches
print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}')
running_loss = 0.0
This returns me the following error once I execute the code:
Traceback (most recent call last):
File "train_my_data_example.py", line 89, in <module>
for i, data in enumerate(dataloader, 0):
File "/usr/local/lib/python3.8/dist-packages/torch/utils/data/dataloader.py", line 517, in __next__
data = self._next_data()
File "/usr/local/lib/python3.8/dist-packages/torch/utils/data/dataloader.py", line 557, in _next_data
data = self._dataset_fetcher.fetch(index) # may raise StopIteration
File "/usr/local/lib/python3.8/dist-packages/torch/utils/data/_utils/fetch.py", line 44, in fetch
data = [self.dataset[idx] for idx in possibly_batched_index]
File "/usr/local/lib/python3.8/dist-packages/torch/utils/data/_utils/fetch.py", line 44, in <listcomp>
data = [self.dataset[idx] for idx in possibly_batched_index]
File "train_my_data_example.py", line 29, in __getitem__
y = self._y[index, :]
IndexError: too many indices for array: array is 1-dimensional, but 2 were indexed
The error is clearly the dataloader variable, so is this creation ok? I mean, I am loading NumPy data and transforming it to a data loader as below:
transform = transforms.Compose([transforms.ToTensor()])
dataset = MyDataSet("train1-features.npy","train1-classes.npy",transform=transform)
dataloader = DataLoader(dataset, batch_size=32)
Is there any error in my data loader or is the problem the training loop of Pytorch?
P.s: you can reproduce my code by downloading the classes and features here

You are trying to index the second axis of an array which only has a single dimension. Simply replace y = self._y[index, :] with y = self._y[index].
Actually when positioned last, : is not required as all dimensions are selected by default.

Related

How to train network on images of different sizes Pytorch

I am trying to feed the Neural network dataset of images and I am getting this error
I don't know what might be the cause as all the images have different sizes
I have also tried to change batch sizes and kernels but I had no success with this.
File "c:\Users\david\Desktop\cs_agent\main.py", line 49, in <module>
for i, data in enumerate(train_loader, 0):
File "C:\Users\david\AppData\Local\Programs\Python\Python310\lib\site-packages\torch\utils\data\dataloader.py", line 530, in __next__
data = self._next_data()
File "C:\Users\david\AppData\Local\Programs\Python\Python310\lib\site-packages\torch\utils\data\dataloader.py", line 570, in _next_data
data = self._dataset_fetcher.fetch(index) # may raise StopIteration
File "C:\Users\david\AppData\Local\Programs\Python\Python310\lib\site-packages\torch\utils\data\_utils\fetch.py", line 52, in fetch
return self.collate_fn(data)
File "C:\Users\david\AppData\Local\Programs\Python\Python310\lib\site-packages\torch\utils\data\_utils\collate.py", line 172, in default_collate
return [default_collate(samples) for samples in transposed] # Backwards compatibility.
File "C:\Users\david\AppData\Local\Programs\Python\Python310\lib\site-packages\torch\utils\data\_utils\collate.py", line 172, in <listcomp>
return [default_collate(samples) for samples in transposed] # Backwards compatibility.
File "C:\Users\david\AppData\Local\Programs\Python\Python310\lib\site-packages\torch\utils\data\_utils\collate.py", line 138, in default_collate
return torch.stack(batch, 0, out=out)
RuntimeError: stack expects each tensor to be equal size, but got [3, 300, 535] at entry 0 and [3, 1080, 1920] at entry 23
this is my main file
import numpy as np
import matplotlib.pyplot as plt
import torch
import dataset
import os
from torch.utils.data import DataLoader
import torch.nn as nn
import torchvision
import check_device
import neural_network
import torch.optim as optim
EPS = 1.e-7
LR=0.5
WEIGHT_DECAY=0.5
batch_size =50
#DATA LOADING ###################################################################################################################
test_dataset =dataset.csHeadBody(csv_file="images\\test_labels.csv",root_dir="images\\test")
train_dataset =dataset.csHeadBody(csv_file="images\\train_labels.csv",root_dir="images\\train")
train_loader =DataLoader(dataset =train_dataset,batch_size=batch_size,shuffle=True)
test_loader =DataLoader(dataset=test_dataset,batch_size=batch_size,shuffle=True)
#DATA LOADING ###################################################################################################################END
#NEURAL NET #####################################################################################################################################################
net=neural_network.Net()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
#NEURAL NET END ######################################################################################
for epoch in range(2): # loop over the dataset multiple times
running_loss = 0.0
for i, data in enumerate(train_loader, 0):
# get the inputs; data is a list of [inputs, labels]
print(data)
inputs, labels = data
# zero the parameter gradients
optimizer.zero_grad()
# forward + backward + optimize
outputs = net(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
# print statistics
running_loss += loss.item()
if i % 2000 == 1999: # print every 2000 mini-batches
print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}')
running_loss = 0.0
print('Finished Training')
and this is my dataset file
class csHeadBody(Dataset):
def __init__(self, csv_file, root_dir, transform=None, target_transform=None):
self.img_labels = pd.read_csv(csv_file)
self.root_dir = root_dir
self.transform = transform
self.target_transform = target_transform
def __len__(self):
return len(self.img_labels)
def __getitem__(self, idx):
img_path = os.path.join(self.root_dir, self.img_labels.iloc[idx, 0])
image = read_image(img_path)
label = self.img_labels.iloc[idx, 1]
if self.transform:
image = self.transform(image)
if self.target_transform:
label = self.target_transform(label)
return image, label
this is my neural network architecture
import torch.nn.functional as F
import torch.nn as nn
import torch
class Net(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(3, 535, 535)
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(16 * 5 * 5, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = torch.flatten(x, 1) # flatten all dimensions except batch
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
You need to adjust the parameters of your convolutional and linear layers. The first argument is the number of input channels (3 for standard RGB images in conv1), then the number of output channels and then the convolution kernel size. To clarify, I've used named arguments in the code below. The code works for images of a square input size of 224x224 pixels (standard imagenet size, adjust if needed). If you want image size agnostic code you could use something like global average pooling (mean of each channel in the last conv layer). The net below supports both:
class Net(nn.Module):
def __init__(self, use_global_average_pooling: bool = False):
super().__init__()
self.use_global_average_pooling = use_global_average_pooling
self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3)
self.pool = nn.MaxPool2d(kernel_size=(2, 2))
self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3)
if use_global_average_pooling:
self.fc_gap = nn.Linear(64, 10)
else:
self.fc_1 = nn.Linear(54 * 54 * 64, 84) # 54 img side times 64 out channels from conv2
self.fc_2 = nn.Linear(84, 10)
def forward(self, x):
x = self.pool(F.relu(self.conv1(x))) # img side: (224 - 2) // 2 = 111
x = self.pool(F.relu(self.conv2(x))) # img side: (111 - 2) // 2 = 54
if self.use_global_average_pooling:
# mean for global average pooling (mean over channel dimension)
x = x.mean(dim=(-1, -2))
x = F.relu(self.fc_gap(x))
else: # use all features
x = torch.flatten(x, 1)
x = F.relu(self.fc_1(x))
x = self.fc_2(x)
return x
Additionally, the torchvision.io.read_image function used in your Dataset returns an uint8 tensor with integer values from 0 to 255. You'll want floating point values for your network, so you have to divide the result by 255 to get values in the [0, 1] range. Furthermore, neural networks work best with normalized inputs (subtracting the mean and then dividing by the standard error of your training dataset). I've added normalization to the image transforms below. For convenience, it is using the imagenet mean and standard error, which should work fine if your images are similar to imagenet images (otherwise you can calculate them on your own images).
Note that the resizing might distort your images (doesn't keep the original aspect ratio). Often this is no problem, but if it is you might want to pad your images with a constant color (e.g. black) to resize them to the required dimensions (there are also transforms for this in the torchvision library).
IMAGENET_MEAN = [0.485, 0.456, 0.406]
IMAGENET_STD = [0.229, 0.224, 0.225]
transforms = torchvision.transforms.Compose([
torchvision.transforms.Lambda(lambda x: x / 255.),
torchvision.transforms.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD),
torchvision.transforms.Resize((224, 224)),
])
You might also need to adjust the code in your Dataset to load images as an RGB image (if they also have an alpha channel). This can be done like this:
image = read_image(img_path, mode=torchvision.io.image.ImageReadMode.RGB)
You can then initialise your Dataset using:
test_dataset = dataset.csHeadBody(csv_file="images\\test_labels.csv", root_dir="images\\test", transform=transforms)
train_dataset = dataset.csHeadBody(csv_file="images\\train_labels.csv", root_dir="images\\train", transform=transforms)
I haven't tested the code, let me know if it doesn't work!

Keras functional model produces subclassing error

I am trying to use the functional api to create an autoencoder in keras. Everything works fine, however, when I try to load the saved model it throws an error that relates to the Model subclassing api. It also throws an error related to autograph that I believe is not relevant to the model loading issue.
I am using tensorflow 2.1 from anaconda on windows 10 and running the code in Spyder 4.
My code with dummy data:
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import Conv2DTranspose
from tensorflow.keras.layers import LeakyReLU
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Reshape
from tensorflow.keras.layers import Input
from tensorflow.keras.models import Model
from tensorflow.keras import backend as K
from tensorflow.keras.utils import Sequence
from tensorflow.keras.models import load_model
import numpy as np
import h5py
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt
import time
import pickle
def build_autoencoder(width, height, depth, filters=[32], latentDim=64):
# initialize the input shape to be "channels last" along with
# the channels dimension itself
inputShape = (height, width, depth)
chanDim = -1
# define the input to the encoder
inputs = Input(shape=inputShape)
x = inputs
# loop over the number of filters
for f in filters:
# apply a CONV => RELU => BN operation
x = Conv2D(f, (3, 3), strides=2, padding="same")(x)
x = LeakyReLU(alpha=0.2)(x)
x = BatchNormalization(axis=chanDim)(x)
# flatten the network and then construct our latent vector
volumeSize = K.int_shape(x)
x = Flatten()(x)
latent = Dense(latentDim)(x)
# build the encoder model
encoder = Model(inputs, latent, name="encoder")
# start building the decoder model which will accept the
# output of the encoder as its inputs
latentInputs = Input(shape=(latentDim,))
x = Dense(np.prod(volumeSize[1:]))(latentInputs)
x = Reshape((volumeSize[1], volumeSize[2], volumeSize[3]))(x)
# loop over our number of filters again, but this time in
# reverse order
for f in filters[::-1]:
# apply a CONV_TRANSPOSE => RELU => BN operation
x = Conv2DTranspose(f, (3, 3), strides=2,
padding="same")(x)
x = LeakyReLU(alpha=0.2)(x)
x = BatchNormalization(axis=chanDim)(x)
# apply a single CONV_TRANSPOSE layer used to recover the
# original depth of the image
x = Conv2DTranspose(depth, (3, 3), padding="same")(x)
outputs = Activation("sigmoid")(x)
# build the decoder model
decoder = Model(latentInputs, outputs, name="decoder")
# our autoencoder is the encoder + decoder
autoencoder = Model(inputs, decoder(encoder(inputs)),
name="autoencoder")
# return a 3-tuple of the encoder, decoder, and autoencoder
return (encoder, decoder, autoencoder)
class DataGenerator(Sequence):
"""Generates data for Keras
Sequence based data generator. Suitable for building
data generator for training and prediction.
"""
def __init__(self, indexes, data_path, dataset_name,
to_fit=True, batch_size=16, dim=(256, 256),
n_channels=3, shuffle=True):
"""Initialization
:param num_samples: number of samples in dataset
:param data_path: path to data file location
:param dataset_name: name of datset in datafile
:param to_fit: True to return X and y, False to return X only
:param batch_size: batch size at each iteration
:param dim: tuple indicating image dimension
:param n_channels: number of image channels
:param shuffle: True to shuffle label indexes after every epoch
"""
self.indexes = np.sort(indexes)
self.data_path = data_path
self.dataset_name = dataset_name
self.to_fit = to_fit
self.batch_size = batch_size
self.dim = dim
self.n_channels = n_channels
self.shuffle = shuffle
self.on_epoch_end()
def __len__(self):
"""Denotes the number of batches per epoch
:return: number of batches per epoch
"""
return int(np.floor(len(self.indexes) / self.batch_size))
def __getitem__(self, index):
"""Generate one batch of data
:param index: index of the batch
:return: X and y when fitting. X only when predicting
"""
# Generate indexes of the batch
indexes = self.indexes[index * self.batch_size:(index + 1) * self.batch_size]
# Generate data
X = self._generate_X(indexes)
# normalise images
X = np.divide(X, 255.0)
if self.to_fit:
return X, X
else:
return X
def on_epoch_end(self):
"""Updates indexes after each epoch
"""
if self.shuffle == True:
np.random.shuffle(self.indexes)
def _generate_X(self, indexes):
"""Generates data containing batch_size images
:param list_IDs_temp: list of label ids to load
:return: batch of images
"""
# Generate data
with h5py.File(self.data_path, 'r') as f:
indexes = np.sort(indexes)
X = f[self.dataset_name][indexes, :, :, :]
return X
DATA_PATH = "simulation_data.hdf5"
DATA_NAME = "visual_obs"
EPOCHS = 3
BATCH = 16
DIM = [256, 256]
CHANNELS = 3
NUM_SAMPLES = 100
# Dummy data
with h5py.File('simulation_data.hdf5', 'w') as f:
vis_data = f.create_dataset('visual_obs', (NUM_SAMPLES, 256, 256, 3))
vis_data[:, :, :, :] = np.random.rand(NUM_SAMPLES, 256, 256, 3)
# construct training data generator and validation generator
number_train_samples = int(np.floor(NUM_SAMPLES*0.7))
number_val_samples = int(np.floor(NUM_SAMPLES*0.2))
indexes = np.arange(NUM_SAMPLES)
np.random.shuffle(indexes)
train_indexes = indexes[:number_train_samples]
val_indexes = indexes[number_train_samples:number_train_samples+number_val_samples]
test_indexes = indexes[number_train_samples+number_val_samples:]
train_generator = DataGenerator(train_indexes, DATA_PATH, DATA_NAME,
to_fit=True, batch_size=BATCH, dim=DIM,
n_channels=CHANNELS, shuffle=True)
val_generator = DataGenerator(val_indexes, DATA_PATH, DATA_NAME,
to_fit=True, batch_size=BATCH, dim=DIM,
n_channels=CHANNELS, shuffle=True)
# construct our convolutional autoencoder
(encoder, decoder, autoencoder) = build_autoencoder(*DIM, CHANNELS)
opt = Adam(lr=1e-3)
autoencoder.compile(loss="mse", optimizer=opt)
# train the convolutional autoencoder
H = autoencoder.fit(train_generator,
epochs=EPOCHS, validation_data = val_generator,
workers=4, use_multiprocessing=False)
ts = time.time()
autoencoder.save("model_test", save_format= "tf")
loaded_model = load_model("model_test")
The error:
WARNING: AutoGraph could not transform <function canonicalize_signatures.<locals>.signature_wrapper at 0x00000138CDBF2948> and will run it as-is.
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause:
INFO:tensorflow:Assets written to: model_test\assets
Traceback (most recent call last):
File "C:\Users\seano\Thesis\test.py", line 190, in <module>
loaded_model = load_model("model_test")
File "C:\Users\seano\Anaconda3\envs\tf2\lib\site-packages\tensorflow_core\python\keras\saving\save.py", line 150, in load_model
return saved_model_load.load(filepath, compile)
File "C:\Users\seano\Anaconda3\envs\tf2\lib\site-packages\tensorflow_core\python\keras\saving\saved_model\load.py", line 89, in load
model = tf_load.load_internal(path, loader_cls=KerasObjectLoader)
File "C:\Users\seano\Anaconda3\envs\tf2\lib\site-packages\tensorflow_core\python\saved_model\load.py", line 552, in load_internal
export_dir)
File "C:\Users\seano\Anaconda3\envs\tf2\lib\site-packages\tensorflow_core\python\keras\saving\saved_model\load.py", line 119, in __init__
self._finalize()
File "C:\Users\seano\Anaconda3\envs\tf2\lib\site-packages\tensorflow_core\python\keras\saving\saved_model\load.py", line 157, in _finalize
created_layers={layer.name: layer for layer in node.layers})
File "C:\Users\seano\Anaconda3\envs\tf2\lib\site-packages\tensorflow_core\python\keras\engine\network.py", line 1903, in reconstruct_from_config
process_node(layer, node_data)
File "C:\Users\seano\Anaconda3\envs\tf2\lib\site-packages\tensorflow_core\python\keras\engine\network.py", line 1851, in process_node
output_tensors = layer(input_tensors, **kwargs)
File "C:\Users\seano\Anaconda3\envs\tf2\lib\site-packages\tensorflow_core\python\keras\engine\base_layer.py", line 773, in __call__
outputs = call_fn(cast_inputs, *args, **kwargs)
File "C:\Users\seano\Anaconda3\envs\tf2\lib\site-packages\tensorflow_core\python\keras\engine\network.py", line 712, in call
raise NotImplementedError('When subclassing the `Model` class, you should'
NotImplementedError: When subclassing the `Model` class, you should implement a `call` method.
I think the root-cause of the issue is using DataGenerator(called as custom_objects) in your model.
When you have custom_objects in your model, then loading of the model is little different. You need to add custom_objects to the load_model as shown below
loaded_model = load_model('model_test',custom_objects={'DataGenerator':DataGenerator})

Model returns a Nan value

I was trying to build a neural network with 4 input nodes/ features and just one output feature(0/1). I wrote this code and it runs but while training the model returns NaN. I debugged too and weights and biases are fine until they go through the model.
From what I've searched so far, this could be a problem in the way I am passing the data.
My input data is : tensor([[0.0000e+00, 0.0000e+00, 0.0000e+00, 1.5340e+00],
[1.5000e+01, 1.0000e-01, 2.4210e+00, 3.0000e+01],
[3.0000e+00, 2.2000e-01, 2.2000e-01, 4.5000e+01],
...,
[1.0000e+00, 2.0000e-02, 2.0000e-02, 1.5000e+01],
[6.0000e+00, 2.0000e-01, 2.0000e-01, 1.5000e+01],
[1.7000e+01, 5.2400e-01, 5.2400e-01, 2.0000e+00]], dtype=torch.float64)
import torch
from torchvision import datasets, transforms
import pandas as pd
import numpy as np
from torch.autograd import Variable
# Import tensor dataset & data loader
from torch.utils.data import TensorDataset, DataLoader
from torch import nn, optim
import torch.nn.functional as F
file = pd.read_csv('ks-projects-201801.csv')
array = np.array(file.values)
result = np.empty(len(array))
input_data = np.empty((len(array), 4))
for i in range(len(array)):
input_data[i] = np.array([array[i][10], array[i][12]/1000, array[i][13]/1000, array[i][14]/1000])
if array[i][9] == 'successful':
result[i] = 1
else:
result[i] = 0
input_node = Variable(torch.from_numpy(input_data))
output = torch.from_numpy(result)
print(input_node)
print(output)
train_ds = TensorDataset(input_node.squeeze(), output.squeeze())
batch_size = 5
train_dl = DataLoader(train_ds, batch_size, shuffle=True)
This is the actual model and training
model = nn.Linear(4, 1)
print(model.weight)
print(model.bias)
criterion = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.003)
epochs = 5
model = model.double()
for e in range(epochs):
running_loss = 0
for xb, yb in train_dl:
optimizer.zero_grad()
res = model(xb)
loss = criterion(res, yb)
loss.backward()
optimizer.step()
running_loss += loss.item()
else:
print(f"model : {loss}")
This prints out model: nan for every epoch and terminates. I am very new to pytorch and I'm not sure how to handle this problem.
If you see NaN's in loss try gradient clipping and data normalisation. Normalising data is a must (i.e normalize input data such that mean = 0 and variance =1)

Pytorch: How to format data before execution of machine learning

I'm learning how to use pytorch and I was able to get a grasp on the overall process of construction and execution of ML models. However, what I am not able to grasp is how to "format" or "reshape" the data before executing the model. I keep getting errors like:
RuntimeError: size mismatch, m1: [1 x 700], m2: [1 x 1] at c:\programdata\miniconda3\conda-bld\pytorch_1524543037166\work\aten\src\th\generic/THTensorMath.c:2033
Or,
Expected object of type Variable[torch.DoubleTensor] but found type Variable[torch.FloatTensor] for argument #1 ‘mat2’
So, I have a csv file named "train.csv" with attributes called 'x' and 'y' and there are 700 samples in it, I want to perform a simple linear regression on the data, and I parse data from it using pandas, how do I format or reshape the data such that it will execute smoothly? How does pytorch iterate through input data?
The recent code i executed is:
import torch
import torch.nn as nn
from torch.autograd import Variable
import pandas as pd
class Linear_Reg(nn.Module):
def __init__(self, inp_sz, out_sz):
super(Linear_Reg, self).__init__()
self.linear = nn.Linear(inp_sz, out_sz)
def forward(self, x):
out = self.linear(x)
return out
train = pd.read_csv('C:\\Users\\hgstr\\Jupyter_Files\\Data_Sets\\linear_regression\\train.csv')
test = pd.read_csv('C:\\Users\\hgstr\\Jupyter_Files\\Data_Sets\\linear_regression\\test.csv')
x_train = torch.Tensor(train['x'])
y_train = torch.Tensor(train['y'])
x_test = torch.Tensor(test['x'])
y_test = torch.Tensor(test['y'])
x_train = torch.Tensor(x_train)
x_train = x_train.view(1,-1)
#================================
input_sz = 1;
output_sz = 1
epochs = 60
learning_rate = 0.001
#================================
model = Linear_Reg(input_sz, output_sz)
crit = nn.MSELoss()
opt = torch.optim.SGD(model.parameters(), learning_rate)
for e in range(epochs):
opt.zero_grad()
out = model(x_train)
loss = crit(out, y_train)
loss.backward()
opt.step()
print('epoch {}, loss {}'.format(e,loss.data[0]))
And it gave out the following:
RuntimeError: size mismatch, m1: [1 x 700], m2: [1 x 1] at c:\programdata\miniconda3\conda-bld\pytorch_1524543037166\work\aten\src\th\generic/THTensorMath.c:2033
Solutions?
According to the error, I believe that your data is not correctly formatted. The tensor should be in the form [700, 2] (batch x data) and yours is [1, 700] (data x batch). This makes the model 'think' that you are adding only one entry as training with 700 features instead of 700 entries with only 1 feature.
Reshaping the x_train variable should make the code work. Just remove the line x_train = x_train.view(1,-1).
Regarding the second error, it can be that after reading the .csv into a variable its type is Double (due to pd.read_csv) while in pytorch by default Tensors are created as floats. I think that casting your input data before feeding it to the model should be enough: model(x_train.float()) or specifying it in the Tensor creation part x_train = torch.FloatTensor(train['x']). Note that you should cast all the Tensors that are not Floats.
edit: This piece of code works for me
import torch
import torch.nn as nn
import pandas as pd
class Linear_Reg(nn.Module):
def __init__(self, inp_sz, out_sz):
super(Linear_Reg, self).__init__()
self.linear = nn.Linear(inp_sz, out_sz)
def forward(self, x):
out = self.linear(x)
return out
train = pd.read_csv('yourpath')
test = pd.read_csv('yourpath')
x_train = torch.Tensor(train['x']).to(torch.float).view(700, 1)
y_train = torch.Tensor(train['y']).to(torch.float).view(700, 1)
x_test = torch.Tensor(test['x']).to(torch.float).view(300, 1)
y_test = torch.Tensor(test['y']).to(torch.float).view(300, 1)
# ================================
input_sz = 1;
output_sz = 1
epochs = 60
learning_rate = 0.001
# ================================
model = Linear_Reg(input_sz, output_sz)
crit = nn.MSELoss()
opt = torch.optim.SGD(model.parameters(), learning_rate)
for e in range(epochs):
opt.zero_grad()
out = model(x_train)
loss = crit(out, y_train)
loss.backward()
opt.step()
print('epoch {}, loss {}'.format(e, loss.data[0]))

How can I train multiple models in one python module in TFLearn?

I am trying to train two models in one python module using TFLearn. I am using restore=False for all the layers. I am getting error when the second model's fit method is called:
Traceback (most recent call last):
File "multiple_models.py", line 76, in <module>
a_model.fit(X_inputs=X, Y_targets=Y, validation_set=0.1, show_metric=True, batch_size=None, shuffle=True, n_epoch=20) # 100% of data being used for validation
File "/Users/swarbhanu/miniconda2/lib/python2.7/site-packages/tflearn/models/dnn.py", line 182, in fit
self.targets)
File "/Users/swarbhanu/miniconda2/lib/python2.7/site-packages/tflearn/utils.py", line 289, in feed_dict_builder
feed_dict[net_inputs[i]] = x
IndexError: list index out of range
This error does not happen if one of the models is commented out, and therefore only one model is trained. Any help would be great! I have gone through (as far as I can tell) all previous stack overflow questions about problems to do with training or loading multiple models in tflearn or tensorflow but the suggested solutions (ex: restore=False, or using variable_scope) did not work for me. It is very important in my usage scenario to use one module to train (and later load and fit) multiple models. The code is below:
import os.path
import numpy as np
import tflearn
from tflearn.layers.core import input_data, fully_connected
from tflearn.layers.normalization import batch_normalization
from tflearn.layers.recurrent import bidirectional_rnn, BasicLSTMCell
from tflearn.layers.estimator import regression
import tensorflow as tf
i_model_file = 'example1.tfl'
a_model_file = 'example2.tfl'
batch_size = 50
sequence_len = 10
sequence_unit_array_size = 300
output_array_size = 1
# Set parameters
i_num_lstm_units = 128
i_num_labels = 5
i_learning_rate = 0.001
a_num_lstm_units = 128
a_num_labels = 4
a_learning_rate = 0.001
def create_data(batch_size, sequence_len, sequence_unit_array_size, num_labels):
shape_x = (batch_size,sequence_len,sequence_unit_array_size)
shape_y = (batch_size, num_labels)
X = np.random.random(shape_x)
Y = np.zeros(shape_y)
ind = np.random.randint(low=0,high=num_labels,size=batch_size)
for b in xrange(batch_size):
Y[b][ind[b]] = 1
return X, Y
def create_classification_model(target_name, num_lstm_units, num_labels, learning_rate, saved_model_file):
with tf.variable_scope(target_name):
input_layer = input_data(shape=[None, sequence_len, sequence_unit_array_size])
conv = tflearn.conv_1d(input_layer, nb_filter=2, filter_size=3, regularizer='L2', weight_decay=0.0001,restore=False)
bnorm1 = batch_normalization(conv,restore=False)
birnn = bidirectional_rnn(bnorm1, BasicLSTMCell(num_lstm_units), BasicLSTMCell(num_lstm_units))
bnorm2 = batch_normalization(birnn, restore=False)
conn = fully_connected(bnorm2, n_units=num_labels, activation='softmax',restore=False)
regress = regression(conn, optimizer='adam', learning_rate= learning_rate, loss='categorical_crossentropy', shuffle_batches=True,restore=False)
model = tflearn.DNN(regress, clip_gradients=0., tensorboard_verbose=3)
return model
i_model = create_classification_model('intent', num_lstm_units=i_num_lstm_units, num_labels=i_num_labels, learning_rate=i_learning_rate, saved_model_file=i_model_file)
# Get data
X, Y = create_data(batch_size = batch_size, sequence_len = sequence_len, sequence_unit_array_size = sequence_unit_array_size, num_labels=i_num_labels)
for overalliter in xrange(1):
i_model.fit(X_inputs=X, Y_targets=Y, validation_set=0.1, show_metric=True, batch_size=None, shuffle=True,
n_epoch=20) # 100% of data being used for validation
i_model.save(i_model_file)
# Predicting on sample sentences
X_new, _ = create_data(batch_size = 1, sequence_len = sequence_len, sequence_unit_array_size = sequence_unit_array_size, num_labels=i_num_labels)
Y_new = i_model.predict(X_new)
print "X_new: ", X_new
print "Y_predicted: ", Y_new
a_model = create_classification_model('action', num_lstm_units=a_num_lstm_units, num_labels=a_num_labels, learning_rate=a_learning_rate, saved_model_file=a_model_file)
print a_model
# Training data
X, Y = create_data(batch_size = batch_size, sequence_len = sequence_len, sequence_unit_array_size = sequence_unit_array_size, num_labels=a_num_labels)
for overalliter in xrange(1):
a_model.fit(X_inputs=X, Y_targets=Y, validation_set=0.1, show_metric=True, batch_size=None, shuffle=True, n_epoch=20) # 100% of data being used for validation
a_model.save(a_model_file)
# Predicting on sample sentences
X_new, _ = create_data(batch_size = 1, sequence_len = sequence_len, sequence_unit_array_size = sequence_unit_array_size, num_labels=a_num_labels)
Y_new = a_model.predict(X_new)
print "X_new: ", X_new
print "Y_predicted: ", Y_new
I had the same problem.
Put with tf.Graph().as_default(): before i_model = create_classification_model and a_model = create_classification_model and indent properly.
Or check how it's done here
https://github.com/tflearn/tflearn/blob/master/examples/basics/logical.py

Categories