I am working on a multi-label image classification problem with the evaluation being conducted in terms of F1-score between system predicted and ground truth labels.
Given that, should I use loss="binary_crossentropy" or loss=keras_metrics.f1_score() where keras_metrics.f1_score() is taken from here: https://pypi.org/project/keras-metrics/? I am a bit confused because all of the tutorials I have found on the Internet regarding multi-label classification are based on the binary_crossentropy loss function, but here I have to optimize against F1-score.
Furthermore, should I set metrics=["accuracy"] or maybe metrics=[keras_metrics.f1_score()] or I should left this completely empty?
Based on user706838 answer ...
use the f1_score in https://www.kaggle.com/rejpalcz/best-loss-function-for-f1-score-metric
import tensorflow as tf
import keras.backend as K
def f1_loss(y_true, y_pred):
tp = K.sum(K.cast(y_true*y_pred, 'float'), axis=0)
tn = K.sum(K.cast((1-y_true)*(1-y_pred), 'float'), axis=0)
fp = K.sum(K.cast((1-y_true)*y_pred, 'float'), axis=0)
fn = K.sum(K.cast(y_true*(1-y_pred), 'float'), axis=0)
p = tp / (tp + fp + K.epsilon())
r = tp / (tp + fn + K.epsilon())
f1 = 2*p*r / (p+r+K.epsilon())
f1 = tf.where(tf.is_nan(f1), tf.zeros_like(f1), f1)
return 1 - K.mean(f1)
Related
I want to implement a similar loss function like in this paper: https://arxiv.org/pdf/1511.08861.pdf
They are combining here the l1 (Mean Average Error) and the MS-SSIM Loss like in following equation:
L_Mix = α · L_MSSSIM + (1 − α) · GaussFilter· L_1
There is a caffe code available on GitHub: https://github.com/NVlabs/PL4NN/blob/master/src/loss.py
But I dont know how to use this in TF. Is there already a similar existing code for TF?
I started trying this:
def ms_ssim_loss(y_true, y_pred):
ms_ssim = tf.image.ssim_multiscale(y_true, y_pred, 1.0)
loss = 1-ms_ssim
return loss
def mix_loss(y_true, y_pred):
alpha = 0.84
ms_ssim = ms_ssim_loss(y_true, y_pred)
l1 = tf.keras.losses.MeanAbsoluteError(y_true, y_pred)
gauss = gaussian(...)
loss = ms_ssim * alpha + (1-alpha) * gauss * l1
return loss
But don't know how to implement and use the gaussian filter here.
Thanks in advance and best regards!
My question is in reference to the paper Learning Confidence for Out-of-Distribution Detection in Neural Networks.
I need help in creating a custom loss function in tensorflow 2.0+ as per the paper to get confident prediction from the CNN on a in distribution (if the image belongs to train categories) image while a low prediction for an out of distribution (any random image) image. The paper suggests adding a confidence estimation branch to any conventional feedforward architecture in parallel with the original class prediction branch (refer to image below)
In order to define the loss function, the softmax prediction probabilities are adjusted by interpolating between the original predictions(pi) and the target probability distribution y, where the degree of interpolation is indicated by the network’s confidence(c):
pi'= c · pi + (1 − c)yi and the final loss is :
I need help in implementing this along with the loss function in Tensorflow 2.0+, below is what I could think of, from my knowledge:
import tensorflow.keras.backend as k
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.applications import ResNet50
#Defining custom loss function
def custom_loss(c):
def loss(y_true, y_pred):
interpolated_p = c*y_pred+ (1-c)*y_true
return -k.reduce_sum((k.log(interpolated_p) * y_true), axis=-1) - k.log(c)
return loss
#Defining model strcuture using resnet50
basemodel = ResNet50(weights = "imagenet",include_top = False)
headmodel = basemodel.output
headmodel = layers.AveragePooling2D(pool_size = (7,7))(headmodel)
#Add a sigmoid layer to the pooling output
conf_branch = layers.Dense(1,activation = "sigmoid",name = "confidence_branch")(headmodel)
# Add a softmax layer after the pooling output
softmax_branch = layers.Dense(10,activation = "softmax",name = "softmax_branch")(headmodel)
# Instantiate an end-to-end model predicting both confidence and class prediction
model = keras.Model(
inputs=basemodel.input,
outputs=[softmax_branch, conf_branch],
)
model.compile(loss=custom_loss(c=conf_branch.output), optimizer='rmsprop')
Appreciate any help on this ! Thanks !
The following is the code I wrote for the keras implementation:
num_classes = 10
basemodel = ResNet50(weights = "imagenet",include_top = False)
headmodel = basemodel.output
headmodel = layers.AveragePooling2D(pool_size = (7,7))(headmodel)
conf_branch = layers.Dense(1,activation = "sigmoid",name="confidence_branch")(headmodel)
softmax_branch = layers.Dense(num_classes,activation = "softmax",name = "softmax_branch")(headmodel)
output = Concatenate(axis=-1)([softmax_branch , conf_branch])
def custom_loss(y_true, y_pred, budget=0.3):
with tf.compat.v1.variable_scope("LAMBDA", reuse=tf.compat.v1.AUTO_REUSE):
LAMBDA = tf.compat.v1.get_variable("LAMBDA", dtype=tf.float32, initializer=tf.constant(0.1))
pred_original = y_pred[:, 0:num_classes]
confidence = y_pred[:, num_classes]
eps = 1e-12
pred_original = tf.clip_by_value(pred_original, 0. + eps, 1. - eps)
confidence = tf.clip_by_value(confidence, 0. + eps, 1. - eps)
b = np.random.uniform(size=y_true.shape[0], low=0.0, high=1.0)
conf = confidence * b + (1 - b)
conf = tf.expand_dims(conf, axis=-1)
pred_new = pred_original * conf + y_true * (1 - conf)
xentropy_loss = tf.reduce_mean(-tf.reduce_sum(y_true * tf.math.log(pred_new), axis=-1))
confidence_loss = tf.reduce_mean(-tf.math.log(confidence))
total_loss = xentropy_loss + LAMBDA * confidence_loss
def true_func():
return LAMBDA / 1.01
def false_func():
return LAMBDA / 0.99
LAMBDA_NEW = tf.cond(budget > confidence_loss, true_func, false_func)
LAMBDA.assign(LAMBDA_NEW)
# tf.print(LAMBDA)
return total_loss
def accuracy(y_true, y_pred):
y_pred = y_pred[:, :num_classes]
correct_pred = tf.equal(tf.argmax(y_pred, 1), tf.argmax(y_true, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
return accuracy
model = Model(inputs=basemodel.input, outputs=output)
optimizer = keras.optimizers.Adam(learning_rate=0.001)
model.compile(loss=custom_loss, optimizer=optimizer, metrics=[accuracy])
I'm trying to implement the Multiclass Hybrid loss function in Python from following article https://arxiv.org/pdf/1808.05238.pdf for my semantic segmentation problem using an imbalanced dataset. I managed to get my implementation correct enough to start while training the model, but the results are very poor. Model architecture - U-net, learning rate in Adam optimizer is 1e-5. Mask shape is (None, 512, 512, 3), with 3 classes (in my case forest, deforestation, other). The formula I used to implement my loss:
The code I created:
def build_hybrid_loss(_lambda_=1, _alpha_=0.5, _beta_=0.5, smooth=1e-6):
def hybrid_loss(y_true, y_pred):
C = 3
tversky = 0
# Calculate Tversky Loss
for index in range(C):
inputs_fl = tf.nest.flatten(y_pred[..., index])
targets_fl = tf.nest.flatten(y_true[..., index])
#True Positives, False Positives & False Negatives
TP = tf.reduce_sum(tf.math.multiply(inputs_fl, targets_fl))
FP = tf.reduce_sum(tf.math.multiply(inputs_fl, 1-targets_fl[0]))
FN = tf.reduce_sum(tf.math.multiply(1-inputs_fl[0], targets_fl))
tversky_i = (TP + smooth) / (TP + _alpha_ * FP + _beta_ * FN + smooth)
tversky += tversky_i
tversky += C
# Calculate Focal loss
loss_focal = 0
for index in range(C):
f_loss = - (y_true[..., index] * (1 - y_pred[..., index])**2 * tf.math.log(y_pred[..., index]))
# Average over each data point/image in batch
axis_to_reduce = range(1, 3)
f_loss = tf.math.reduce_mean(f_loss, axis=axis_to_reduce)
loss_focal += f_loss
result = tversky + _lambda_ * loss_focal
return result
return hybrid_loss
The prediction of the model after the end of an epoch (I have a problem with swapped colors, so the red in the prediction is actually green, which means forest, so the prediction is mostly forest and not deforestation):
The question is what is wrong with my hybrid loss implementation, what needs to be changed to make it work?
To simplify things a little, I have divided the Hybrid loss into four separate functions: Tversky's loss, Dice coefficient, Dice loss, Hybrid loss. You can see the code below.
def TverskyLoss(targets, inputs, alpha=0.5, beta=0.5, smooth=1e-16, numLabels=3):
tversky = 0
for index in range(numLabels):
inputs_fl = tf.nest.flatten(inputs[..., index])
targets_fl = tf.nest.flatten(targets[..., index])
#True Positives, False Positives & False Negatives
TP = tf.reduce_sum(tf.math.multiply(inputs_fl, targets_fl))
FP = tf.reduce_sum(tf.math.multiply(inputs_fl, 1-targets_fl[0]))
FN = tf.reduce_sum(tf.math.multiply(1-inputs_fl[0], targets_fl))
tversky_i = (TP + smooth) / (TP + alpha*FP + beta*FN + smooth)
tversky += tversky_i
return numLabels - tversky
def dice_coef(y_true, y_pred, smooth=1e-16):
y_true_f = tf.nest.flatten(y_true)
y_pred_f = tf.nest.flatten(y_pred)
intersection = tf.math.reduce_sum(tf.math.multiply(y_true_f, y_pred_f))
return (2. * intersection + smooth) / (tf.math.reduce_sum(y_true_f) + tf.math.reduce_sum(y_pred_f) + smooth)
def dice_coef_multilabel(y_true, y_pred, numLabels=3):
dice=0
for index in range(numLabels):
dice -= dice_coef(y_true[..., index], y_pred[..., index])
return numLabels + dice
def build_hybrid_loss(_lambda_=0.5, _alpha_=0.5, _beta_=0.5, smooth=1e-16, C=3):
def hybrid_loss(y_true, y_pred):
tversky = TverskyLoss(y_true, y_pred, alpha=_alpha_, beta=_beta_)
dice = dice_coef_multilabel(y_true, y_pred)
result = tversky + _lambda_ * dice
return result
return hybrid_loss
Adding the loss=build_hybrid_loss() during model compilation will add Hybrid loss as the loss function of the model.
After a short research, I came to the conclusion that in my particular case, a Hybrid loss with _lambda_ = 0.2, _alpha_ = 0.5, _beta_ = 0.5 would not be much better than a single Dice loss or a single Tversky loss. Neither IoU (intersection over union) nor the standard accuracy metric are much better with Hybrid loss. But I believe it is not a rule of thumb that such a Hybrid loss will be worser or at the same level of performance as single loss at all cases.
link to Accuracy graph
link to IoU graph
I am working on a multi classification problem using CNN's in keras. My precision and recall score is always over 1 which does not make any sense at all. Attached below is my code, what am I doing wrong?
def recall(y_true, y_pred):
true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
recall = true_positives / (possible_positives + K.epsilon())
return recall
def precision(y_true, y_pred):
true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
precision = true_positives / (predicted_positives + K.epsilon())
return precision
model.compile(loss='categorical_crossentropy', optimizer='Adam', metrics=['accuracy',recall,precision])
I was able to figure this out. The above code works perfectly once you one-hot encode all the categorical labels. Also, make sure you do NOT have sparse_categorical_crossentropy as your loss function, and instead just use categorical_crossentropy.
If you wish to convert your categorical values to one-hot encoded values in Keras, you can just use this code:
from keras.utils import to_categorical
y_train = to_categorical(y_train)
The reason you have to do the above is noted in Keras documentation:
"when using the categorical_crossentropy loss, your targets should be in categorical format (e.g. if you have 10 classes, the target for each sample should be a 10-dimensional vector that is all-zeros except for a 1 at the index corresponding to the class of the sample). In order to convert integer targets into categorical targets, you can use the Keras utility to_categorical"
Is there any built-in function to get the maximum accuracy for a binary probabilistic classifier in scikit-learn?
E.g. to get the maximum F1-score I do:
# AUCPR
precision, recall, thresholds = sklearn.metrics.precision_recall_curve(y_true, y_score)
auprc = sklearn.metrics.auc(recall, precision)
max_f1 = 0
for r, p, t in zip(recall, precision, thresholds):
if p + r == 0: continue
if (2*p*r)/(p + r) > max_f1:
max_f1 = (2*p*r)/(p + r)
max_f1_threshold = t
I could compute the maximum accuracy in a similar fashion:
accuracies = []
thresholds = np.arange(0,1,0.1)
for threshold in thresholds:
y_pred = np.greater(y_score, threshold).astype(int)
accuracy = sklearn.metrics.accuracy_score(y_true, y_pred)
accuracies.append(accuracy)
accuracies = np.array(accuracies)
max_accuracy = accuracies.max()
max_accuracy_threshold = thresholds[accuracies.argmax()]
but I wonder whether there is any built-in function.
from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_curve
fpr, tpr, thresholds = roc_curve(y_true, probs)
accuracy_scores = []
for thresh in thresholds:
accuracy_scores.append(accuracy_score(y_true, [m > thresh for m in probs]))
accuracies = np.array(accuracy_scores)
max_accuracy = accuracies.max()
max_accuracy_threshold = thresholds[accuracies.argmax()]
I started to improve the solution by transforming the thresholds = np.arange(0,1,0.1) into a smarter, dichotomous way of finding the maximum
Then I realized, after 2 hours of work, that getting all the accuracies were far more cheaper than just finding the maximum !! (Yes it is totally counter-intuitive).
I wrote a lot of comments here below to explain my code. Feel free to delete all these to make the code more readable.
import numpy as np
# Definition : we predict True if y_score > threshold
def ROC_curve_data(y_true, y_score):
y_true = np.asarray(y_true, dtype=np.bool_)
y_score = np.asarray(y_score, dtype=np.float_)
assert(y_score.size == y_true.size)
order = np.argsort(y_score) # Just ordering stuffs
y_true = y_true[order]
# The thresholds to consider are just the values of score, and 0 (accept everything)
thresholds = np.insert(y_score[order],0,0)
TP = [sum(y_true)] # Number of True Positives (For Threshold = 0 => We accept everything => TP[0] = # of postive in true y)
FP = [sum(~y_true)] # Number of True Positives (For Threshold = 0 => We accept everything => TP[0] = # of postive in true y)
TN = [0] # Number of True Negatives (For Threshold = 0 => We accept everything => we don't have negatives !)
FN = [0] # Number of True Negatives (For Threshold = 0 => We accept everything => we don't have negatives !)
for i in range(1, thresholds.size) : # "-1" because the last threshold
# At this step, we stop predicting y_score[i-1] as True, but as False.... what y_true value say about it ?
# if y_true was True, that step was a mistake !
TP.append(TP[-1] - int(y_true[i-1]))
FN.append(FN[-1] + int(y_true[i-1]))
# if y_true was False, that step was good !
FP.append(FP[-1] - int(~y_true[i-1]))
TN.append(TN[-1] + int(~y_true[i-1]))
TP = np.asarray(TP, dtype=np.int_)
FP = np.asarray(FP, dtype=np.int_)
TN = np.asarray(TN, dtype=np.int_)
FN = np.asarray(FN, dtype=np.int_)
accuracy = (TP + TN) / (TP + FP + TN + FN)
sensitivity = TP / (TP + FN)
specificity = TN / (FP + TN)
return((thresholds, TP, FP, TN, FN))
The all process is just a single loop, and the algorithm is just trivial.
In fact, the stupidly simple function is 10 times faster than the solution proposed before me (commpute the accuracies for thresholds = np.arange(0,1,0.1)) and 30 times faster than my previous smart-ass-dychotomous-algorithm...
You can then easily compute ANY KPI you want, for example :
def max_accuracy(thresholds, TP, FP, TN, FN) :
accuracy = (TP + TN) / (TP + FP + TN + FN)
return(max(accuracy))
def max_min_sensitivity_specificity(thresholds, TP, FP, TN, FN) :
sensitivity = TP / (TP + FN)
specificity = TN / (FP + TN)
return(max(np.minimum(sensitivity, specificity)))
If you want to test it :
y_score = np.random.uniform(size = 100)
y_true = [np.random.binomial(1, p) for p in y_score]
data = ROC_curve_data(y_true, y_score)
%matplotlib inline # Because I personnaly use Jupyter, you can remove it otherwise
import matplotlib.pyplot as plt
plt.step(data[0], data[1])
plt.step(data[0], data[2])
plt.step(data[0], data[3])
plt.step(data[0], data[4])
plt.show()
print("Max accuracy is", max_accuracy(*data))
print("Max of Min(Sensitivity, Specificity) is", max_min_sensitivity_specificity(*data))
Enjoy ;)