Can someone explain why adding random numbers to the loss does not affect the predictions of this Keras model? Every time I run it I get a very similar AUC for both models but I would expect the AUC from the second model to be close to 0.5. I use Colab.
Any suggestions why this might be happening?
import numpy as np
import pandas as pd
import tensorflow as tf
import keras as keras
from keras import layers
import random
from keras import backend as K
from sklearn import metrics
from sklearn.metrics import roc_auc_score
opt = tf.keras.optimizers.Adam(learning_rate=1e-04)
#resetting seeds to ensure reproducibility
def reset_random_seeds():
tf.random.set_seed(1)
np.random.seed(1)
random.seed(1)
def get_auc(y_test,y_pred):
fpr, tpr, threshold = metrics.roc_curve(y_test, y_pred)
auc = metrics.auc(fpr, tpr)
return auc
#standard loss function with binary cross-entropy
def binary_crossentropy1(y_true, y_pred):
bin_cross = tf.keras.losses.BinaryCrossentropy(from_logits=False)
bce1 = K.mean(bin_cross(y_true, y_pred))
return bce1
#same loss function but with added random numbers
def binary_crossentropy2(y_true, y_pred):
bin_cross = tf.keras.losses.BinaryCrossentropy(from_logits=False)
bce2 = K.mean(bin_cross(y_true, y_pred))
penalty = tf.random.normal([], mean=50.0, stddev=100.0)
bce2 = tf.math.add(bce2, penalty)
return bce2
#model without randomness
reset_random_seeds()
input1 = keras.Input(shape=(9,))
x = layers.Dense(12, activation="relu", kernel_initializer=keras.initializers.glorot_uniform(seed=123))(input1)
x = layers.Dense(8, activation="relu", kernel_initializer=keras.initializers.glorot_uniform(seed=123))(x)
output = layers.Dense(1, activation="sigmoid", kernel_initializer=keras.initializers.glorot_uniform(seed=123))(x)
model1 = keras.Model(inputs=input1, outputs=output)
model1.compile(optimizer=opt, loss=binary_crossentropy1, metrics=['accuracy'])
model1.fit(x=X_train, y=y_train, epochs=10, batch_size = 32)
model1_pred = model1.predict(X_test)
#model with randomness
reset_random_seeds()
input1 = keras.Input(shape=(9,))
x = layers.Dense(12, activation="relu", kernel_initializer=keras.initializers.glorot_uniform(seed=123))(input1)
x = layers.Dense(8, activation="relu", kernel_initializer=keras.initializers.glorot_uniform(seed=123))(x)
output = layers.Dense(1, activation="sigmoid", kernel_initializer=keras.initializers.glorot_uniform(seed=123))(x)
model2 = keras.Model(inputs=input1, outputs=output)
model2.compile(optimizer=opt, loss=binary_crossentropy2, metrics=['accuracy'])
model2.fit(x=X_train, y=y_train, epochs=10, batch_size = 32)
model2_pred = model2.predict(X_test)
print(get_auc(y_test, model1_pred))
print(get_auc(y_test, model2_pred))
Result
0.7228943446346893
0.7231896873302319
What the penalty looks like
penalty = 112.050842
penalty = 139.664017
penalty = 152.505341
penalty = -37.1483
penalty = -74.08284
penalty = 155.872528
penalty = 42.7903175
The training is guided by the gradient of the loss with respect to the input.
The random value that you add to the loss in the second model is independent form the input, so it will not contribute to the gradient of the loss during training. When you are running the prediction you are taking the the model output (before the loss function), so that's not affected as well.
Related
I'm creating a vanilla neural network with artificial datasets to learn pytorch. I'm currently looking how I can get the predictions for the test data set and obtain the statistical metrics including mse, mae, and r2. I was wondering if my calculations are correct. Also, is there any built-in function that could potentially give me all these results in pytorch as it happens in sckit-learn?
Let's first upload libraries and then generate artificial training and test data.
import random
import torch
import pandas as pd
import numpy as np
from torch import nn
from torch.utils.data import Dataset,DataLoader,TensorDataset
from torchvision import datasets, transforms
import math
n_input, n_hidden, n_out= 5, 64, 1
#Create training and test datasets
X_train = pd.DataFrame([[random.random() for i in range(n_input)] for j in range(1000)])
y_train = pd.DataFrame([[random.random() for i in range(n_out)] for j in range(1000)])
X_test = pd.DataFrame([[random.random() for i in range(n_input)] for j in range(50)])
y_test = pd.DataFrame([[random.random() for i in range(n_out)] for j in range(50)])
test_dataset = TensorDataset(torch.Tensor(X_test.to_numpy().astype(np.float32)), torch.Tensor((y_test).to_numpy().astype(np.float32)))
testloader = DataLoader(test_dataset, batch_size= 32)
#For training, use 32 as a batch size
training_dataset = TensorDataset(torch.Tensor(X_train.to_numpy().astype(np.float32)), torch.Tensor((y_train).to_numpy().astype(np.float32)))
dataloader = DataLoader(training_dataset, batch_size=32, shuffle=True)
Now, let us generate the model to train.
model = nn.Sequential(nn.Linear(n_input, n_hidden),
nn.ReLU(),
nn.Linear(n_hidden, n_out),
nn.ReLU())
loss_function = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
Next, I start the training process.
losses = []
epochs = 1000
for epoch in range(epochs+1):
for times,(x_train,y_train) in enumerate(dataloader):
y_pred = model(x_train)
loss = loss_function(y_pred, y_train)
model.zero_grad()
loss.backward()
optimizer.step()
Now, I'd like to get the predictions for the test dataset and get statistical results. This is the part that I need help and some guidance. Do they seem correct? Is there something I might be doing wrong?
from torchmetrics import R2Score
r2score = R2Score()
running_mae = running_mse = running_r2 = 0
with torch.no_grad():
model.eval()
for times,(x_test,y_test) in enumerate(testloader):
y_pred = model(x_test)
error = torch.abs(y_pred - y_test).sum().data
squared_error=((y_pred - y_test)*(y_pred - y_test)).sum().data
running_mae+=error
running_mse+=squared_error
running_r2+=r2score(y_pred, y_test)
mse = math.sqrt(squared_error/ len(testloader))
mae = error / len(testloader)
r2 = running_r2 / len(testloader)
print("MSE:",mse, "MAE:", mae, "R2:", r2)
What strings are valid metrics with keras.model.compile?
The following works,
model.compile(optimizer='sgd', loss='mse', metrics=['acc'])
but this does not work,
model.compile(optimizer='sgd', loss='mse', metrics=['recall', 'precision'])
Check method to check metrices. Check docstring for details
See keras source code, the list of metrics is
accuracy
binary_accuracy
categorical_accuracy
sparse_categorical_accuracy
top_k_categorical_accuracy
sparse_top_k_categorical_accuracy
cosine_similarity
binary_crossentropy
categorical_crossentropy
categorical_hinge
hinge
squared_hinge
kullback_leibler_divergence
logcosh
mean_absolute_error
mean_absolute_percentage_error
mean_squared_error
mean_squared_logarithmic_error
poisson
sparse_categorical_crossentropy
Possible abbreviations:
acc = ACC = accuracy
bce = BCE = binary_crossentropy
mse = MSE = mean_squared_error
mae = MAE = mean_absolute_error
mape = MAPE = mean_absolute_percentage_error
msle = MSLE = mean_squared_logarithmic_error
log_cosh = logcosh
cosine_proximity = cosine_similarity
any others will need to be added as custom objects, see this link
I am trying to create a binary classifier on a data set of 10,000. I have tried multiple Activators and Optimizers, however the results are always between 56.8% and 58.9%. Given the fairly steady results over many dozen iterations, I assume the problem is either:
My dataset is not classifiable
My model is broken
This is the data set: training-set.csv
I may be able to get 2000 more records but that would be it.
My question is: is there something in the way my model is constructed that is preventing it from learning to a higher degree?
Note that I am happy to have as many layers and nodes as needed, and time is not a factor in generating the model.
dataframe = pandas.read_csv(r"training-set.csv", index_col=None)
dataset = dataframe.values
X = dataset[:,0:48].astype(float)
Y = dataset[:,48]
#count the input variables
col_count = X.shape[1]
#normalize X
from sklearn.preprocessing import StandardScaler
sc_X = StandardScaler()
X_scale = sc_X.fit_transform(X)
# Splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_scale, Y, test_size = 0.2)
# define baseline model
activator = 'linear' #'relu' 'sigmoid' 'softmax' 'exponential' 'linear' 'tanh'
#opt = 'Adadelta' #adam SGD nadam RMSprop Adadelta
nodes = 1000
max_layers = 2
max_epochs = 100
max_batch = 32
loss_funct = 'binary_crossentropy' #for binary
last_act = 'sigmoid' # 'softmax' 'sigmoid' 'relu'
def baseline_model():
# create model
model = Sequential()
model.add(Dense(nodes, input_dim=col_count, activation=activator))
for x in range(0, max_layers):
model.add(Dropout(0.2))
model.add(Dense(nodes, input_dim=nodes, activation=activator))
#model.add(BatchNormalization())
model.add(Dense(1, activation=last_act)) #model.add(Dense(1, activation=last_act))
# Compile model
adam = Adam(lr=0.001)
model.compile(loss=loss_funct, optimizer=adam, metrics=['accuracy'])
return model
estimator = KerasClassifier(build_fn=baseline_model, epochs=max_epochs, batch_size=max_batch)
estimator.fit(X_train, y_train)
y_pred = estimator.predict(X_test)
#confusion matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
score = np.sum(cm.diagonal())/float(np.sum(cm))
Two points:
There is absolutely no point in stacking dense layers with linear activations - they only result to a single linear unit; change to activator = 'relu' (and just don't bother with the other candidate activation functions in your commented-out list).
Do not use dropout by default, especially if your model has difficulties in learning (like here); remove the dropout layer(s), and just be ready to put (some of) them back in only in case you see overfitting (you are currently still very far from that point, so this is not something to worry about now).
Trying to understand and implement GridSearch method for the Keras Regression. Here is my simple producible regression application.
import pandas as pd
import numpy as np
import sklearn
from sklearn.model_selection import train_test_split
from sklearn import metrics
from keras.models import Sequential
from keras.layers.core import Dense, Activation
from keras.callbacks import EarlyStopping
from keras.callbacks import ModelCheckpoint
df = pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/concrete/slump/slump_test.data")
df.drop(['No','FLOW(cm)','Compressive Strength (28-day)(Mpa)'],1,inplace=True)
# Convert a Pandas dataframe to the x,y inputs that TensorFlow needs
def to_xy(df, target):
result = []
for x in df.columns:
if x != target:
result.append(x)
# find out the type of the target column. Is it really this hard? :(
target_type = df[target].dtypes
target_type = target_type[0] if hasattr(target_type, '__iter__') else target_type
# Encode to int for classification, float otherwise. TensorFlow likes 32 bits.
if target_type in (np.int64, np.int32):
# Classification
dummies = pd.get_dummies(df[target])
return df.as_matrix(result).astype(np.float32), dummies.as_matrix().astype(np.float32)
else:
# Regression
return df.as_matrix(result).astype(np.float32), df.as_matrix([target]).astype(np.float32)
x,y = to_xy(df,'SLUMP(cm)')
x_train, x_test, y_train, y_test = train_test_split(
x, y, test_size=0.25, random_state=42)
#Create Model
model = Sequential()
model.add(Dense(128, input_dim=x.shape[1], activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
monitor = EarlyStopping(monitor='val_loss', min_delta=1e-5, patience=5, mode='auto')
checkpointer = ModelCheckpoint(filepath="best_weights.hdf5",save_best_only=True) # save best model
model.fit(x_train,y_train,callbacks=[monitor,checkpointer],verbose=0,epochs=1000)
#model.fit(x_train,y_train,validation_data=(x_test,y_test),callbacks=[monitor,checkpointer],verbose=0,epochs=1000)
pred = model.predict(x_test)
score = np.sqrt(metrics.mean_squared_error(pred,y_test))
print("(RMSE): {}".format(score))
If you run the code, you can see loss is not too big numbers.
And here is my producible GridSearch implementation. First of all, I have simply searched the web and find the GridSearch application for KerasClassifier, then tried to revise it for KerasRegressor. I am not sure if my revision is correct. If I assume the general concept is correct, there must be a problem in this code, because loss function does not make sense. The loss function is MSE but the output is negative, unfortunately I could not figure out where I am doing wrong.
from keras.wrappers.scikit_learn import KerasRegressor
import pandas as pd
import numpy as np
import sklearn
from sklearn.model_selection import train_test_split
from sklearn import metrics
from keras.models import Sequential
from keras.layers.core import Dense, Activation
from keras.callbacks import EarlyStopping
from keras.callbacks import ModelCheckpoint
from sklearn.model_selection import GridSearchCV
df = pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/concrete/slump/slump_test.data")
df.drop(['No','FLOW(cm)','Compressive Strength (28-day)(Mpa)'],1,inplace=True)
#Convert a Pandas dataframe to the x,y inputs that TensorFlow needs
def to_xy(df, target):
result = []
for x in df.columns:
if x != target:
result.append(x)
# find out the type of the target column. Is it really this hard? :(
target_type = df[target].dtypes
target_type = target_type[0] if hasattr(target_type, '__iter__') else target_type
# Encode to int for classification, float otherwise. TensorFlow likes 32 bits.
if target_type in (np.int64, np.int32):
#Classification
dummies = pd.get_dummies(df[target])
return df.as_matrix(result).astype(np.float32), dummies.as_matrix().astype(np.float32)
else:
#Regression
return df.as_matrix(result).astype(np.float32), df.as_matrix([target]).astype(np.float32)
x,y = to_xy(df,'SLUMP(cm)')
x_train, x_test, y_train, y_test = train_test_split(
x, y, test_size=0.25, random_state=42)
def create_model(optimizer='adam'):
# create model
model = Sequential()
model.add(Dense(128, input_dim=x.shape[1], activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer=optimizer,metrics=['mse'])
return model
model = KerasRegressor(build_fn=create_model, epochs=100, batch_size=10, verbose=0)
optimizer = ['SGD', 'RMSprop', 'Adagrad']
param_grid = dict(optimizer=optimizer)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=1)
grid_result = grid.fit(x_train, y_train)
#summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
print("%f (%f) with: %r" % (mean, stdev, param))
I have tested your code, and I have seen that you are not using a scoring function in GridSearchCV so according to documentation scikit-learn documentation:
If None, the estimator’s default scorer (if available) is used.
It seems like if it would be using the 'neg_mean_absolute_error' (or any of these scoring functions for regression) by default for scoring models.
That is because probably it says that the best model is:
-75.820078 using {'optimizer':'Adagrad'}
I get this error :
sum() got an unexpected keyword argument 'out'
when I run this code:
import pandas as pd, numpy as np
import keras
from keras.layers.core import Dense, Activation
from keras.models import Sequential
def AUC(y_true,y_pred):
not_y_pred=np.logical_not(y_pred)
y_int1=y_true*y_pred
y_int0=np.logical_not(y_true)*not_y_pred
TP=np.sum(y_pred*y_int1)
FP=np.sum(y_pred)-TP
TN=np.sum(not_y_pred*y_int0)
FN=np.sum(not_y_pred)-TN
TPR=np.float(TP)/(TP+FN)
FPR=np.float(FP)/(FP+TN)
return((1+TPR-FPR)/2)
# Input datasets
train_df = pd.DataFrame(np.random.rand(91,1000))
train_df.iloc[:,-2]=(train_df.iloc[:,-2]>0.8)*1
model = Sequential()
model.add(Dense(output_dim=60, input_dim=91, init="glorot_uniform"))
model.add(Activation("sigmoid"))
model.add(Dense(output_dim=1, input_dim=60, init="glorot_uniform"))
model.add(Activation("sigmoid"))
model.compile(optimizer='rmsprop',loss='binary_crossentropy',metrics=[AUC])
train_df.iloc[:,-1]=np.ones(train_df.shape[0]) #bias
X=train_df.iloc[:,:-1].values
Y=train_df.iloc[:,-1].values
print X.shape,Y.shape
model.fit(X, Y, batch_size=50,show_accuracy = False, verbose = 1)
Is it possible to implement a custom metric aside from doing a loop on batches and editing the source code?
Here I'm answering to OP's topic question rather than his exact problem. I'm doing this as the question shows up in the top when I google the topic problem.
You can implement a custom metric in two ways.
As mentioned in Keras docu.
import keras.backend as K
def mean_pred(y_true, y_pred):
return K.mean(y_pred)
model.compile(optimizer='sgd',
loss='binary_crossentropy',
metrics=['accuracy', mean_pred])
But here you have to remember as mentioned in Marcin Możejko's answer that y_true and y_pred are tensors. So in order to correctly calculate the metric you need to use keras.backend functionality. Please look at this SO question for details How to calculate F1 Macro in Keras?
Or you can implement it in a hacky way as mentioned in Keras GH issue. For that you need to use callbacks argument of model.fit.
import keras as keras
import numpy as np
from keras.optimizers import SGD
from sklearn.metrics import roc_auc_score
model = keras.models.Sequential()
# ...
sgd = SGD(lr=0.001, momentum=0.9)
model.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy'])
class Metrics(keras.callbacks.Callback):
def on_train_begin(self, logs={}):
self._data = []
def on_epoch_end(self, batch, logs={}):
X_val, y_val = self.validation_data[0], self.validation_data[1]
y_predict = np.asarray(model.predict(X_val))
y_val = np.argmax(y_val, axis=1)
y_predict = np.argmax(y_predict, axis=1)
self._data.append({
'val_rocauc': roc_auc_score(y_val, y_predict),
})
return
def get_data(self):
return self._data
metrics = Metrics()
history = model.fit(X_train, y_train, epochs=100, validation_data=(X_val, y_val), callbacks=[metrics])
metrics.get_data()
The problem is that y_pred and y_true are not NumPy arrays but either Theano or TensorFlow tensors. That's why you got this error.
You can define your custom metrics but you have to remember that its arguments are those tensors – not NumPy arrays.
you can pass a model.predict() in your AUC metric function. [this will iterate on bacthes so you might be better off using model.predict_on_batch(). Assuming you have something like a softmax layer as output (something that outputs probabilities), then you can use that together with sklearn.metric to get the AUC.
from sklearn.metrics import roc_curve, auc
from here
def sklearnAUC(test_labels,test_prediction):
n_classes = 2
# Compute ROC curve and ROC area for each class
fpr = dict()
tpr = dict()
roc_auc = dict()
for i in range(n_classes):
# ( actual labels, predicted probabilities )
fpr[i], tpr[i], _ = roc_curve(test_labels[:, i], test_prediction[:, i])
roc_auc[i] = auc(fpr[i], tpr[i])
return round(roc_auc[0],3) , round(roc_auc[1],3)
now make your metric
# gives a numpy array like so [ [0.3,0.7] , [0.2,0.8] ....]
Y_pred = model.predict_on_batch ( X_test )
# Y_test looks something like [ [0,1] , [1,0] .... ]
# auc1 and auc2 should be equal
auc1 , auc2 = sklearnAUC( Y_test , Y_pred )