I am using PyTorch to predict the value of a dependent variable.
The source file I am reading for dataset
As you can see that the Defect Per cent (which is a dependent variable is ~ 0 to 3)
import torch.nn as nn
import numpy as np
import torch
import pandas as pd
from torch.utils.data import TensorDataset, DataLoader
import torch.nn.functional as F
SourceData=pd.read_excel("Supplier Past Performance.xlsx") # Load the data into Pandas DataFrame
SourceData_train_independent= SourceData.drop(["Defect Per cent"], axis=1) # Drop depedent variable from training dataset
SourceData_train_dependent=SourceData["Defect Per cent"].copy() # Dependent variable value for training dataset
X_train = torch.tensor(SourceData_train_independent.values)
y_train=torch.tensor(SourceData_train_dependent.values)
X_train=X_train.type(torch.FloatTensor) #convert the type of tensor
y_train=y_train.type(torch.FloatTensor) #convert the type of tensor
# Define dataset
train_ds = TensorDataset(X_train, y_train)
# Define data loader
batch_size = 5
train_dl = DataLoader(train_ds, batch_size, shuffle=True)
#Define model
model = nn.Linear(3,1)
#Define optimizer
opt = torch.optim.SGD(model.parameters(), lr=0.02)
#Define loss function
loss_fn = F.mse_loss
#Define a utility function to train the model
def fit(num_epochs, model, loss_fn, opt):
for epoch in range(num_epochs):
for xb,yb in train_dl:
#Generate predictions
pred = model(xb)
loss = loss_fn(pred,yb)
#Perform gradient descent
loss.backward()
opt.step()
opt.zero_grad()
print('Training loss: ', loss_fn(model(xb), yb))
#Train the model for 100 epochs
fit(100, model, loss_fn, opt)
new_var=torch.Tensor([[5000.0, 33.0, 23.0]])
preds = model(new_var)
print(preds.item())
I am getting the prediction for the new_var as 963.40. This value is way higher that he expected value of 0.1 to 3.
Please help
Related
I'm creating a vanilla neural network with artificial datasets to learn pytorch. I'm currently looking how I can get the predictions for the test data set and obtain the statistical metrics including mse, mae, and r2. I was wondering if my calculations are correct. Also, is there any built-in function that could potentially give me all these results in pytorch as it happens in sckit-learn?
Let's first upload libraries and then generate artificial training and test data.
import random
import torch
import pandas as pd
import numpy as np
from torch import nn
from torch.utils.data import Dataset,DataLoader,TensorDataset
from torchvision import datasets, transforms
import math
n_input, n_hidden, n_out= 5, 64, 1
#Create training and test datasets
X_train = pd.DataFrame([[random.random() for i in range(n_input)] for j in range(1000)])
y_train = pd.DataFrame([[random.random() for i in range(n_out)] for j in range(1000)])
X_test = pd.DataFrame([[random.random() for i in range(n_input)] for j in range(50)])
y_test = pd.DataFrame([[random.random() for i in range(n_out)] for j in range(50)])
test_dataset = TensorDataset(torch.Tensor(X_test.to_numpy().astype(np.float32)), torch.Tensor((y_test).to_numpy().astype(np.float32)))
testloader = DataLoader(test_dataset, batch_size= 32)
#For training, use 32 as a batch size
training_dataset = TensorDataset(torch.Tensor(X_train.to_numpy().astype(np.float32)), torch.Tensor((y_train).to_numpy().astype(np.float32)))
dataloader = DataLoader(training_dataset, batch_size=32, shuffle=True)
Now, let us generate the model to train.
model = nn.Sequential(nn.Linear(n_input, n_hidden),
nn.ReLU(),
nn.Linear(n_hidden, n_out),
nn.ReLU())
loss_function = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
Next, I start the training process.
losses = []
epochs = 1000
for epoch in range(epochs+1):
for times,(x_train,y_train) in enumerate(dataloader):
y_pred = model(x_train)
loss = loss_function(y_pred, y_train)
model.zero_grad()
loss.backward()
optimizer.step()
Now, I'd like to get the predictions for the test dataset and get statistical results. This is the part that I need help and some guidance. Do they seem correct? Is there something I might be doing wrong?
from torchmetrics import R2Score
r2score = R2Score()
running_mae = running_mse = running_r2 = 0
with torch.no_grad():
model.eval()
for times,(x_test,y_test) in enumerate(testloader):
y_pred = model(x_test)
error = torch.abs(y_pred - y_test).sum().data
squared_error=((y_pred - y_test)*(y_pred - y_test)).sum().data
running_mae+=error
running_mse+=squared_error
running_r2+=r2score(y_pred, y_test)
mse = math.sqrt(squared_error/ len(testloader))
mae = error / len(testloader)
r2 = running_r2 / len(testloader)
print("MSE:",mse, "MAE:", mae, "R2:", r2)
I have a regression neural network with ten input features and three outputs. But all ten features do not have the same importance in loss function calculation (mean square error). So I want to define specific coefficients for each input feature to increase their role in the loss function.
Consider we define coefficients in an array: coeff=[5,20,2,1,4,5,6,2,9,15]. When mean squared error is measuring the distances of input features, for example, if the distance of the second feature is '60', this distance is multiplied by coefficient '20' from coeff array.
I guess I need to define a custom loss function, but how to pass the defined "coeff" array and multiply its elements with input features?
Updated
I guess my idea is similar to this code and this code, but I am not sure. however, I was unable to run the first one and got errors.
from numpy import mean
from numpy import std
from sklearn.datasets import make_regression
from sklearn.model_selection import RepeatedKFold
from keras.models import Sequential
from keras.layers import Dense
# get the dataset
def get_dataset():
X, y = make_regression(n_samples=1000, n_features=10, n_informative=5, n_targets=3, random_state=2)
return X, y
# get the model
def get_model(n_inputs, n_outputs):
model = Sequential()
model.add(Dense(20, input_dim=n_inputs, kernel_initializer='he_uniform', activation='relu'))
model.add(Dense(n_outputs))
model.compile(loss='mse', optimizer='adam')
return model
# evaluate a model using repeated k-fold cross-validation
def evaluate_model(X, y):
results = list()
n_inputs, n_outputs = X.shape[1], y.shape[1]
# define evaluation procedure
cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)
# enumerate folds
for train_ix, test_ix in cv.split(X):
# prepare data
X_train, X_test = X[train_ix], X[test_ix]
y_train, y_test = y[train_ix], y[test_ix]
# define model
model = get_model(n_inputs, n_outputs)
# fit model
model.fit(X_train, y_train, verbose=0, epochs=100)
# evaluate model on test set
mse = model.evaluate(X_test, y_test, verbose=0)
# store result
print('>%.3f' % mse)
results.append(mse)
return results
# load dataset
X, y = get_dataset()
# evaluate model
results = evaluate_model(X, y)
# summarize performance
print('MSE: %.3f (%.3f)' % (mean(results), std(results)))
If you use the functional api, then you could add a custom loss function with the model.add_loss function, within the model. Your loss function can then use the model inputs and outputs and anything in your model.
The problem with this approach is, that in the model you don't have the 'true' y values. So you would need to add an additional input to your model, and pass the y values to the model – but just for the loss calculation.
Something like this:
inputs = Input(shape=(n_inputs))
x = Dense(20, ...)(model_inputs)
outputs = Dense(n_outputs)(x)
y_true = Input(shape=(n_outputs))
modelx = Model(inputs=[inputs, y_true], outputs=outputs)
modelx.add_loss(your_loss_function(y_true=y_true, y_pred=outputs, inputs=inputs)
Since you already added the loss to the model, you compile it without any loss:
modelx.compile(loss=None, optimizer='adam')
When you fit the model, you need to pass the y values to the model inputs.
modelx.fit(x=[X_train, y_train], y=y_train, verbose=0, epochs=100)
When you want a model with just the X values as input, for example for prediction, you can create it like so:
model = Model(modelx.input[0], modelx.output)
Can someone explain why adding random numbers to the loss does not affect the predictions of this Keras model? Every time I run it I get a very similar AUC for both models but I would expect the AUC from the second model to be close to 0.5. I use Colab.
Any suggestions why this might be happening?
import numpy as np
import pandas as pd
import tensorflow as tf
import keras as keras
from keras import layers
import random
from keras import backend as K
from sklearn import metrics
from sklearn.metrics import roc_auc_score
opt = tf.keras.optimizers.Adam(learning_rate=1e-04)
#resetting seeds to ensure reproducibility
def reset_random_seeds():
tf.random.set_seed(1)
np.random.seed(1)
random.seed(1)
def get_auc(y_test,y_pred):
fpr, tpr, threshold = metrics.roc_curve(y_test, y_pred)
auc = metrics.auc(fpr, tpr)
return auc
#standard loss function with binary cross-entropy
def binary_crossentropy1(y_true, y_pred):
bin_cross = tf.keras.losses.BinaryCrossentropy(from_logits=False)
bce1 = K.mean(bin_cross(y_true, y_pred))
return bce1
#same loss function but with added random numbers
def binary_crossentropy2(y_true, y_pred):
bin_cross = tf.keras.losses.BinaryCrossentropy(from_logits=False)
bce2 = K.mean(bin_cross(y_true, y_pred))
penalty = tf.random.normal([], mean=50.0, stddev=100.0)
bce2 = tf.math.add(bce2, penalty)
return bce2
#model without randomness
reset_random_seeds()
input1 = keras.Input(shape=(9,))
x = layers.Dense(12, activation="relu", kernel_initializer=keras.initializers.glorot_uniform(seed=123))(input1)
x = layers.Dense(8, activation="relu", kernel_initializer=keras.initializers.glorot_uniform(seed=123))(x)
output = layers.Dense(1, activation="sigmoid", kernel_initializer=keras.initializers.glorot_uniform(seed=123))(x)
model1 = keras.Model(inputs=input1, outputs=output)
model1.compile(optimizer=opt, loss=binary_crossentropy1, metrics=['accuracy'])
model1.fit(x=X_train, y=y_train, epochs=10, batch_size = 32)
model1_pred = model1.predict(X_test)
#model with randomness
reset_random_seeds()
input1 = keras.Input(shape=(9,))
x = layers.Dense(12, activation="relu", kernel_initializer=keras.initializers.glorot_uniform(seed=123))(input1)
x = layers.Dense(8, activation="relu", kernel_initializer=keras.initializers.glorot_uniform(seed=123))(x)
output = layers.Dense(1, activation="sigmoid", kernel_initializer=keras.initializers.glorot_uniform(seed=123))(x)
model2 = keras.Model(inputs=input1, outputs=output)
model2.compile(optimizer=opt, loss=binary_crossentropy2, metrics=['accuracy'])
model2.fit(x=X_train, y=y_train, epochs=10, batch_size = 32)
model2_pred = model2.predict(X_test)
print(get_auc(y_test, model1_pred))
print(get_auc(y_test, model2_pred))
Result
0.7228943446346893
0.7231896873302319
What the penalty looks like
penalty = 112.050842
penalty = 139.664017
penalty = 152.505341
penalty = -37.1483
penalty = -74.08284
penalty = 155.872528
penalty = 42.7903175
The training is guided by the gradient of the loss with respect to the input.
The random value that you add to the loss in the second model is independent form the input, so it will not contribute to the gradient of the loss during training. When you are running the prediction you are taking the the model output (before the loss function), so that's not affected as well.
I was practicing the keras classification for imbalanced data. I followed the official example:
https://keras.io/examples/structured_data/imbalanced_classification/
and used the scikit-learn api to do cross-validation.
I have tried the model with different parameter.
However, all the times one of the 3 folds has value 0.
eg.
results [0.99242424 0.99236641 0. ]
What am I doing wrong?
How to get ALL THREE validation recall values of order "0.8"?
MWE
%%time
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import StratifiedKFold
import os
import random
SEED = 100
os.environ['PYTHONHASHSEED'] = str(SEED)
np.random.seed(SEED)
random.seed(SEED)
tf.random.set_seed(SEED)
# load the data
ifile = "https://github.com/bhishanpdl/Datasets/blob/master/Projects/Fraud_detection/raw/creditcard.csv.zip?raw=true"
df = pd.read_csv(ifile,compression='zip')
# train test split
target = 'Class'
Xtrain,Xtest,ytrain,ytest = train_test_split(df.drop([target],axis=1),
df[target],test_size=0.2,stratify=df[target],random_state=SEED)
print(f"Xtrain shape: {Xtrain.shape}")
print(f"ytrain shape: {ytrain.shape}")
# build the model
def build_fn(n_feats):
model = keras.models.Sequential()
model.add(keras.layers.Dense(256, activation="relu", input_shape=(n_feats,)))
model.add(keras.layers.Dense(256, activation="relu"))
model.add(keras.layers.Dropout(0.3))
model.add(keras.layers.Dense(256, activation="relu"))
model.add(keras.layers.Dropout(0.3))
# last layer is dense 1 for binary sigmoid
model.add(keras.layers.Dense(1, activation="sigmoid"))
# compile
model.compile(loss='binary_crossentropy',
optimizer=keras.optimizers.Adam(1e-2),
metrics=['Recall'])
return model
# fitting the model
n_feats = Xtrain.shape[-1]
counts = np.bincount(ytrain)
weight_for_0 = 1.0 / counts[0]
weight_for_1 = 1.0 / counts[1]
class_weight = {0: weight_for_0, 1: weight_for_1}
FIT_PARAMS = {'class_weight' : class_weight}
clf_keras = KerasClassifier(build_fn=build_fn,
n_feats=n_feats, # custom argument
epochs=30,
batch_size=2048,
verbose=2)
skf = StratifiedKFold(n_splits=3, shuffle=True, random_state=SEED)
results = cross_val_score(clf_keras, Xtrain, ytrain,
cv=skf,
scoring='recall',
fit_params = FIT_PARAMS,
n_jobs = -1,
error_score='raise'
)
print('results', results)
Result
Xtrain shape: (227845, 30)
ytrain shape: (227845,)
results [0.99242424 0.99236641 0. ]
CPU times: user 3.62 s, sys: 117 ms, total: 3.74 s
Wall time: 5min 15s
Problem
I am getting the third recall as 0. I am expecting it of the order 0.8, how to make sure all three values are around 0.8 or more?
MilkyWay001,
You have chosen to use sklearn wrappers for your model - they have benefits, but the model training process is hidden. Instead, I trained the model separately with validation dataset added. The code for this would be:
clf_1 = KerasClassifier(build_fn=build_fn,
n_feats=n_feats)
clf_1.fit(Xtrain, ytrain, class_weight=class_weight,
validation_data=(Xtest, ytest),
epochs=30,batch_size=2048,
verbose=1)
In the Model.fit() output it is clearly seen that while loss metric goes down, recall is not stable. This lead to poor performance in CV reflected in zeros in CV results, as you observed.
I fixed this by reducing learning rate to just 0.0001. While it is 100 times less than yours - it reaches 98% recall on train and 100% (or close) on test in just 10 epochs.
Your code needs just one fix to achieve stable results: change LR to much lower one, like 0.0001:
optimizer=keras.optimizers.Adam(1e-4),
You can experiment with LR in the range < 0.001.
For reference, with LR 0.0001 I got:
results [0.99242424 0.97709924 1. ]
Good luck!
PS: thanks for inluding compact and complete MWE
I'm trying to solve the spiral problem using Keras with 3 spirals instead of 2 using a similar strategy that I used for 2. Problem is my loss is now growing exponentially instead of decreasing with the same parameters I used for 2 spirals (The neural network structure has 3 outputs instead of being binary). I'm not quite sure what could be happening with this issue if anyone could help? I have tried this with various epochs, learning rates, batch sizes.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.optimizers import RMSprop
from Question1.utils import create_neural_network, create_test_data
EPOCHS = 250
BATCH_SIZE = 20
def main():
df = three_spirals(1000)
# Set-up data
x_train = df[['x-coord', 'y-coord']].values
y_train = df['class'].values
# Don't need y_test, can inspect visually if it worked or not
x_test = create_test_data()
# Scale data
scaler = MinMaxScaler()
scaler.fit(x_train)
x_train = scaler.transform(x_train)
x_test = scaler.transform(x_test)
relu_model = create_neural_network(layers=3,
neurons=[40, 40, 40],
activation='relu',
optimizer=RMSprop(learning_rate=0.001),
loss='categorical_crossentropy',
outputs=3)
# Train networks
relu_model.fit(x=x_train, y=y_train, epochs=EPOCHS, verbose=1, batch_size=BATCH_SIZE)
# Predictions on test data
relu_predictions = relu_model.predict_classes(x_test)
models = [relu_model]
test_predictions = [relu_predictions]
# Plot
plot_data(models, test_predictions)
And here is the create_neural_network function:
def create_neural_network(layers, neurons, activation, optimizer, loss, outputs=1):
if layers != len(neurons):
raise ValueError("Number of layers doesn't much the amount of neuron layers.")
model = Sequential()
for i in range(layers):
model.add(Dense(neurons[i], activation=activation))
# Output
if outputs == 1:
model.add(Dense(outputs))
else:
model.add(Dense(outputs, activation='softmax'))
model.compile(optimizer=optimizer,
loss=loss)
return model
I have worked it out, for the output data it isn't like a binary classification where you only need one column. For multi classification you need a column for each class you want to classify...so where I had y could be 0, 1, 2 was incorrect. The correct way to do this was to have y0, y1, y2 which would be 1 if it fit that specific class and 0 if it didn't.