Computing statistical results for the test dataset in PyTorch - Python - python

I'm creating a vanilla neural network with artificial datasets to learn pytorch. I'm currently looking how I can get the predictions for the test data set and obtain the statistical metrics including mse, mae, and r2. I was wondering if my calculations are correct. Also, is there any built-in function that could potentially give me all these results in pytorch as it happens in sckit-learn?
Let's first upload libraries and then generate artificial training and test data.
import random
import torch
import pandas as pd
import numpy as np
from torch import nn
from torch.utils.data import Dataset,DataLoader,TensorDataset
from torchvision import datasets, transforms
import math
n_input, n_hidden, n_out= 5, 64, 1
#Create training and test datasets
X_train = pd.DataFrame([[random.random() for i in range(n_input)] for j in range(1000)])
y_train = pd.DataFrame([[random.random() for i in range(n_out)] for j in range(1000)])
X_test = pd.DataFrame([[random.random() for i in range(n_input)] for j in range(50)])
y_test = pd.DataFrame([[random.random() for i in range(n_out)] for j in range(50)])
test_dataset = TensorDataset(torch.Tensor(X_test.to_numpy().astype(np.float32)), torch.Tensor((y_test).to_numpy().astype(np.float32)))
testloader = DataLoader(test_dataset, batch_size= 32)
#For training, use 32 as a batch size
training_dataset = TensorDataset(torch.Tensor(X_train.to_numpy().astype(np.float32)), torch.Tensor((y_train).to_numpy().astype(np.float32)))
dataloader = DataLoader(training_dataset, batch_size=32, shuffle=True)
Now, let us generate the model to train.
model = nn.Sequential(nn.Linear(n_input, n_hidden),
nn.ReLU(),
nn.Linear(n_hidden, n_out),
nn.ReLU())
loss_function = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
Next, I start the training process.
losses = []
epochs = 1000
for epoch in range(epochs+1):
for times,(x_train,y_train) in enumerate(dataloader):
y_pred = model(x_train)
loss = loss_function(y_pred, y_train)
model.zero_grad()
loss.backward()
optimizer.step()
Now, I'd like to get the predictions for the test dataset and get statistical results. This is the part that I need help and some guidance. Do they seem correct? Is there something I might be doing wrong?
from torchmetrics import R2Score
r2score = R2Score()
running_mae = running_mse = running_r2 = 0
with torch.no_grad():
model.eval()
for times,(x_test,y_test) in enumerate(testloader):
y_pred = model(x_test)
error = torch.abs(y_pred - y_test).sum().data
squared_error=((y_pred - y_test)*(y_pred - y_test)).sum().data
running_mae+=error
running_mse+=squared_error
running_r2+=r2score(y_pred, y_test)
mse = math.sqrt(squared_error/ len(testloader))
mae = error / len(testloader)
r2 = running_r2 / len(testloader)
print("MSE:",mse, "MAE:", mae, "R2:", r2)

Related

Classification ANN stuck at 60%

I am trying to create a binary classifier on a data set of 10,000. I have tried multiple Activators and Optimizers, however the results are always between 56.8% and 58.9%. Given the fairly steady results over many dozen iterations, I assume the problem is either:
My dataset is not classifiable
My model is broken
This is the data set: training-set.csv
I may be able to get 2000 more records but that would be it.
My question is: is there something in the way my model is constructed that is preventing it from learning to a higher degree?
Note that I am happy to have as many layers and nodes as needed, and time is not a factor in generating the model.
dataframe = pandas.read_csv(r"training-set.csv", index_col=None)
dataset = dataframe.values
X = dataset[:,0:48].astype(float)
Y = dataset[:,48]
#count the input variables
col_count = X.shape[1]
#normalize X
from sklearn.preprocessing import StandardScaler
sc_X = StandardScaler()
X_scale = sc_X.fit_transform(X)
# Splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_scale, Y, test_size = 0.2)
# define baseline model
activator = 'linear' #'relu' 'sigmoid' 'softmax' 'exponential' 'linear' 'tanh'
#opt = 'Adadelta' #adam SGD nadam RMSprop Adadelta
nodes = 1000
max_layers = 2
max_epochs = 100
max_batch = 32
loss_funct = 'binary_crossentropy' #for binary
last_act = 'sigmoid' # 'softmax' 'sigmoid' 'relu'
def baseline_model():
# create model
model = Sequential()
model.add(Dense(nodes, input_dim=col_count, activation=activator))
for x in range(0, max_layers):
model.add(Dropout(0.2))
model.add(Dense(nodes, input_dim=nodes, activation=activator))
#model.add(BatchNormalization())
model.add(Dense(1, activation=last_act)) #model.add(Dense(1, activation=last_act))
# Compile model
adam = Adam(lr=0.001)
model.compile(loss=loss_funct, optimizer=adam, metrics=['accuracy'])
return model
estimator = KerasClassifier(build_fn=baseline_model, epochs=max_epochs, batch_size=max_batch)
estimator.fit(X_train, y_train)
y_pred = estimator.predict(X_test)
#confusion matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
score = np.sum(cm.diagonal())/float(np.sum(cm))
Two points:
There is absolutely no point in stacking dense layers with linear activations - they only result to a single linear unit; change to activator = 'relu' (and just don't bother with the other candidate activation functions in your commented-out list).
Do not use dropout by default, especially if your model has difficulties in learning (like here); remove the dropout layer(s), and just be ready to put (some of) them back in only in case you see overfitting (you are currently still very far from that point, so this is not something to worry about now).

Linear Regression with Pytorch - Prediction error

I am using PyTorch to predict the value of a dependent variable.
The source file I am reading for dataset
As you can see that the Defect Per cent (which is a dependent variable is ~ 0 to 3)
import torch.nn as nn
import numpy as np
import torch
import pandas as pd
from torch.utils.data import TensorDataset, DataLoader
import torch.nn.functional as F
SourceData=pd.read_excel("Supplier Past Performance.xlsx") # Load the data into Pandas DataFrame
SourceData_train_independent= SourceData.drop(["Defect Per cent"], axis=1) # Drop depedent variable from training dataset
SourceData_train_dependent=SourceData["Defect Per cent"].copy() # Dependent variable value for training dataset
X_train = torch.tensor(SourceData_train_independent.values)
y_train=torch.tensor(SourceData_train_dependent.values)
X_train=X_train.type(torch.FloatTensor) #convert the type of tensor
y_train=y_train.type(torch.FloatTensor) #convert the type of tensor
# Define dataset
train_ds = TensorDataset(X_train, y_train)
# Define data loader
batch_size = 5
train_dl = DataLoader(train_ds, batch_size, shuffle=True)
#Define model
model = nn.Linear(3,1)
#Define optimizer
opt = torch.optim.SGD(model.parameters(), lr=0.02)
#Define loss function
loss_fn = F.mse_loss
#Define a utility function to train the model
def fit(num_epochs, model, loss_fn, opt):
for epoch in range(num_epochs):
for xb,yb in train_dl:
#Generate predictions
pred = model(xb)
loss = loss_fn(pred,yb)
#Perform gradient descent
loss.backward()
opt.step()
opt.zero_grad()
print('Training loss: ', loss_fn(model(xb), yb))
#Train the model for 100 epochs
fit(100, model, loss_fn, opt)
new_var=torch.Tensor([[5000.0, 33.0, 23.0]])
preds = model(new_var)
print(preds.item())
I am getting the prediction for the new_var as 963.40. This value is way higher that he expected value of 0.1 to 3.
Please help

Spiral problem, why does my loss increase in this neural network using Keras?

I'm trying to solve the spiral problem using Keras with 3 spirals instead of 2 using a similar strategy that I used for 2. Problem is my loss is now growing exponentially instead of decreasing with the same parameters I used for 2 spirals (The neural network structure has 3 outputs instead of being binary). I'm not quite sure what could be happening with this issue if anyone could help? I have tried this with various epochs, learning rates, batch sizes.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.optimizers import RMSprop
from Question1.utils import create_neural_network, create_test_data
EPOCHS = 250
BATCH_SIZE = 20
def main():
df = three_spirals(1000)
# Set-up data
x_train = df[['x-coord', 'y-coord']].values
y_train = df['class'].values
# Don't need y_test, can inspect visually if it worked or not
x_test = create_test_data()
# Scale data
scaler = MinMaxScaler()
scaler.fit(x_train)
x_train = scaler.transform(x_train)
x_test = scaler.transform(x_test)
relu_model = create_neural_network(layers=3,
neurons=[40, 40, 40],
activation='relu',
optimizer=RMSprop(learning_rate=0.001),
loss='categorical_crossentropy',
outputs=3)
# Train networks
relu_model.fit(x=x_train, y=y_train, epochs=EPOCHS, verbose=1, batch_size=BATCH_SIZE)
# Predictions on test data
relu_predictions = relu_model.predict_classes(x_test)
models = [relu_model]
test_predictions = [relu_predictions]
# Plot
plot_data(models, test_predictions)
And here is the create_neural_network function:
def create_neural_network(layers, neurons, activation, optimizer, loss, outputs=1):
if layers != len(neurons):
raise ValueError("Number of layers doesn't much the amount of neuron layers.")
model = Sequential()
for i in range(layers):
model.add(Dense(neurons[i], activation=activation))
# Output
if outputs == 1:
model.add(Dense(outputs))
else:
model.add(Dense(outputs, activation='softmax'))
model.compile(optimizer=optimizer,
loss=loss)
return model
I have worked it out, for the output data it isn't like a binary classification where you only need one column. For multi classification you need a column for each class you want to classify...so where I had y could be 0, 1, 2 was incorrect. The correct way to do this was to have y0, y1, y2 which would be 1 if it fit that specific class and 0 if it didn't.

Getting gradient of a Keras model output w.r.t input, but with the last layer being an SVM

I have a CNN model built in Keras. I then took out its last layer as a feature and retrained an SVM with it.
Is it possible to now find the gradient of the SVMs output wrt the CNN model's input?
I know of this method (Getting gradient of model output w.r.t weights using Keras) and am able to use it to get the gradient wrt input for the layer i am pulling the features out of. I can also get the numerical gradient of the SVM wrt to its input, albeit at the moment its a bit of a mess. Would appreciate some input here as well actually.
But now I need to somehow combine these two to get the gradient of the SVM to the input of the entire CNN model.
"""
Main CNN script
"""
# Imports ##
# general
import matplotlib.pyplot as plt
import numpy as np
# ML libraries
from tensorflow.keras.datasets import mnist
# ML utilities
from tensorflow.keras.utils import to_categorical
# Python scripts used
import train_CNN
import load_CNN
import train_subSVMs
import load_subSVMs
import train_finalSVM
import load_finalSVM
import joblib
def save_array(array, name):
joblib.dump(array, name+'.pkl', compress = 3)
return
def load_array(array, name):
array = joblib.load(array, name)
return array
def show_data_example(i, dataset):
# show some of the images in the dataset
# call multiple times for multiple images
# squeeze is necessary here to get rid of the extra dimension introduced in rehsaping
print('\nExample Image: %s from selected dataset' %i)
plt.imshow(np.squeeze(dataset[i]), cmap=plt.get_cmap('gray'))
plt.show()
return
def load_and_encode(target_shape):
# load dataset
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train, y_train = X_train[:,:,:],y_train[:]
X_test, y_test = X_test[:,:,:], y_test[:]
print('Loaded Mnist dataset')
print('Train: X=%s, y=%s' % (X_train.shape, y_train.shape))
print('Test: X=%s, y=%s' % (X_test.shape, y_test.shape))
# encode y data
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)
# normalise X data (X/255 -> [0,1])
X_train = X_train/255.0
X_test = X_test/255.0
# currently dimensions are (m x 28 x 28)
# making them into (m x 28x28x1) 3Dimensional for convolution networks
X_train = X_train.reshape(X_train.shape[0], target_shape[0], target_shape[1], target_shape[2])
X_test = X_test.reshape(X_test.shape[0], target_shape[0], target_shape[1], target_shape[2])
# show an arbitary example image from training set
show_data_example(12, X_train)
return X_train, y_train, X_test, y_test
image_shape = (28,28,1)
# load and encode mnist data
X_train, y_train, X_test, y_test = load_and_encode(image_shape)
# hyper-parameters
learning_rate = 0.1
momentum = 0.9
dropout = 0.5
batch_size = 128
epochs = 50
decay = 1e-6
number_of_classes = 10
# store required data into a packet to send to various imports
packet = [learning_rate, momentum, dropout, batch_size, epochs, decay,
number_of_classes, image_shape,
X_train, y_train, X_test, y_test]
data = [X_train, y_train, X_test, y_test]
#CNN_model = train_CNN.train_model(packet, save_model = 'True')
CNN_model = load_CNN.load_model(packet) # keras sequential model
#subSVM1, subSVM2, subSVM3, features = train_subSVMs.train(CNN_model, data, c=0.1, save_model = 'True', get_accuracies= 'True')
subSVM1, subSVM2, subSVM3, features = load_subSVMs.load(CNN_model, data, c=0.1, get_accuracies='False')
subSVMs = [subSVM1, subSVM2, subSVM3]
feature1_train, feature1_test,\
feature2_train, feature2_test,\
feature3_train, feature3_test = features
final_SVM = joblib.load('saved_finalSVM.pkl') # sklearn svm trained from features
NUMBER = 48
plt.imshow(np.squeeze(X_train[NUMBER,:,:,:]), cmap=plt.get_cmap('binary'))
# gradients of features wrt to input
import tensorflow.keras.backend as K
gradients = K.gradients(CNN_model.get_layer(name='feature1').output, CNN_model.input) # K.gradients(y,x) for dy/dx
f = K.function([CNN_model.input], gradients)
x = np.expand_dims(X_train[NUMBER,:,:,:],axis=0)
a=f([x])

Why can model not even predict sine

I am trying to generate a learned timeseries with an LSTM RNN using Keras, so I want to predict a datapoint, and feed it back in as input to predict the next one and so on, so that I can actually generate the timeseries (for example given 2000 datapoints, predict the next 2000)
I am trying it like this, but the Test score RMSE is 1.28 and the prediction is basically a straight line
# LSTM for international airline passengers problem with regression framing
import numpy
import matplotlib.pyplot as plt
from pandas import read_csv
import math
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
# convert an array of values into a dataset matrix
def create_dataset(dataset, look_back=1):
dataX, dataY = [], []
for i in range(len(dataset)-look_back-1):
a = dataset[i:(i+look_back), 0]
dataX.append(a)
dataY.append(dataset[i + look_back, 0])
return numpy.array(dataX), numpy.array(dataY)
# fix random seed for reproducibility
numpy.random.seed(7)
# load the dataset
dataset = np.sin(np.linspace(0,35,10000)).reshape(-1,1)
print(type(dataset))
print(dataset.shape)
dataset = dataset.astype('float32')
# normalize the dataset
scaler = MinMaxScaler(feature_range=(0, 1))
dataset = scaler.fit_transform(dataset)
# split into train and test sets
train_size = int(len(dataset) * 0.5)
test_size = len(dataset) - train_size
train, test = dataset[0:train_size,:], dataset[train_size:len(dataset),:]
# reshape into X=t and Y=t+1
look_back = 1
trainX, trainY = create_dataset(train, look_back)
testX, testY = create_dataset(test, look_back)
# reshape input to be [samples, time steps, features]
trainX = numpy.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
testX = numpy.reshape(testX, (testX.shape[0], 1, testX.shape[1]))
# create and fit the LSTM network
model = Sequential()
model.add(LSTM(16, input_shape=(1, look_back)))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(trainX, trainY, epochs=10, batch_size=1, verbose=2)
# make predictions
trainPredict = model.predict(trainX)
testPredict = list()
prediction = model.predict(testX[0].reshape(1,1,1))
for i in range(trainX.shape[0]):
prediction = model.predict(prediction.reshape(1,1,1))
testPredict.append(prediction)
testPredict = np.array(testPredict).reshape(-1,1)
# invert predictions
trainPredict = scaler.inverse_transform(trainPredict)
trainY = scaler.inverse_transform([trainY])
testPredict = scaler.inverse_transform(testPredict)
testY = scaler.inverse_transform([testY])
# calculate root mean squared error
trainScore = math.sqrt(mean_squared_error(trainY[0], trainPredict[:,0]))
print('Train Score: %.2f RMSE' % (trainScore))
testScore = math.sqrt(mean_squared_error(testY[0], testPredict[:,0]))
print('Test Score: %.2f RMSE' % (testScore))
# shift train predictions for plotting
trainPredictPlot = numpy.empty_like(dataset)
trainPredictPlot[:, :] = numpy.nan
trainPredictPlot[look_back:len(trainPredict)+look_back, :] = trainPredict
# shift test predictions for plotting
testPredictPlot = numpy.empty_like(dataset)
testPredictPlot[:, :] = numpy.nan
testPredictPlot[len(trainPredict)+(look_back*2)+1:len(dataset)-1, :] = testPredict
# plot baseline and predictions
plt.plot(scaler.inverse_transform(dataset))
plt.plot(trainPredictPlot)
plt.plot(testPredictPlot)
plt.show()
What am I doing wrong?
I see multiple issues with your code. Your value for look_back is 1, which means the LSTM sees only one Sample at a time, which is obviously not sufficient to learn anything about the sequence.
You probably did this so that you can make the final prediction at the end by feeding the prediction from the previous step as the new input. To correct way to make this work is to train with more timesteps and then change to network to a stateful LSTM with a single timestep.
Also, when you do the final prediction you have to show the network more than one ground truth sample. Otherwise the position on the sine is ambigious. (is it going up or down in the next step?)
I slapped together q quick example. Here is how I generated the data:
import numpy as np
numSamples = 1000
numTimesteps = 50
width = np.pi/2.0
def getRandomSine(numSamples = 100, width = np.pi):
return np.sin(np.linspace(0,width,numSamples) + (np.random.rand()*np.pi*2))
trainX = np.stack([getRandomSine(numSamples = numTimesteps+1) for _ in range(numSamples)])
valX = np.stack([getRandomSine(numSamples = numTimesteps+1) for _ in range(numSamples)])
trainX = trainX.reshape((numSamples,numTimesteps+1,1))
valX = valX.reshape((numSamples,numTimesteps+1,1))
trainY = trainX[:,1:,:]
trainX = trainX[:,:-1,:]
valY = valX[:,1:,:]
valX = valX[:,:-1,:]
Here I trained the model:
import keras
from keras.models import Sequential
from keras import layers
model = Sequential()
model.add(layers.recurrent.LSTM(32,return_sequences=True,input_shape=(numTimesteps, 1)))
model.add(layers.recurrent.LSTM(32,return_sequences=True))
model.add(layers.wrappers.TimeDistributed(layers.Dense(1,input_shape=(1,10))))
model.compile(loss='mean_squared_error',
optimizer='adam')
model.summary()
model.fit(trainX, trainY, nb_epoch=50, validation_data=(valX, valY), batch_size=32)
And here I changed the trained model to allow the continues prediction:
# serialize the model and get its weights, for quick re-building
config = model.get_config()
weights = model.get_weights()
config[0]['config']['batch_input_shape'] = (1, 1, 1)
config[0]['config']['stateful'] = True
config[1]['config']['stateful'] = True
from keras.models import model_from_config
new_model = Sequential().from_config(config)
new_model.set_weights(weights)
#create test sine
testX = getRandomSine(numSamples = numTimesteps*10, width = width*10)
new_model.reset_states()
testPredictions = []
# burn in
for i in range(numTimesteps):
prediction = new_model.predict(np.array([[[testX[i]]]]))
testPredictions.append(prediction[0,0,0])
# prediction
for i in range(numTimesteps, len(testX)):
prediction = new_model.predict(prediction)
testPredictions.append(prediction[0,0,0])
# plot result
import matplotlib.pyplot as plt
plt.plot(np.stack([testPredictions,testX]).T)
plt.show()
Here is what the result looks like. The prediction errors add up and very quickly it diverges from the input sine. But it clearly learned the general shape of sines. You can now try to improve on this by trying different layers, activation functions etc.
I was working a bit on a different architecture and uploaded it on github.
So for all people who are looking into predicting a time series point by point, I hope this helps.
The results look like this:

Categories