I have a binary classification problem with categories background (bg) = 0, signal (sig) = 1, for which I am training NNs. For monitoring purposes, I am trying to implement a custom metric in Keras with TensorFlow backend that does the following:
1) Calculate the threshold on my NN output which would result in a false positive rate (classifying bg as signal) of X (in this case X = 0.02, but it could be anything).
2) Calculate the true positive rate at this threshold.
Given numpy arrays y_true, y_pred, I would write a function like:
def eff_at_2percent_metric(y_true, y_pred):
#Find list of bg events
bg_list = np.argwhere(y_true < 0.5)
#Order by the NN output
ordered_bg_predictions = np.flip(np.sort(y_pred[bg_list]),axis=0)
#Find the threshold with 2% false positive rate
threshold = ordered_bg_predictions[0.02*round(len(ordered_bg_list))]
#Find list of signal events
sig_list = np.argwhere(y_true > 0.5)
#Order these by NN output
ordered_sig_predictions = np.sort(y_pred[sig_list])
#Find true positive rate with this threshold
sig_eff = 1 - np.searchsorted(ordered_sig_predictions,threshold)/len(ordered_sig_predictions)
return sig_eff
Of course, this does not work because to implement a custom metric, y_true and y_pred are supposed to be TensorFlow tensors rather than numpy arrays. Is there any way I can make this work correctly?
There's a metric for sensitivity at specificity, which I believe is equivalent (specificity is one minus FPR).
You can implement your own metric, and here is an example for the false positive rate:
from tensorflow.python.eager import context
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import ops
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import variable_scope
from tensorflow.python.ops.metrics_impl import _aggregate_across_towers
from tensorflow.python.ops.metrics_impl import true_negatives
from tensorflow.python.ops.metrics_impl import false_positives
from tensorflow.python.ops.metrics_impl import _remove_squeezable_dimensions
def false_positive_rate(labels,
predictions,
weights=None,
metrics_collections=None,
updates_collections=None,
name=None):
if context.executing_eagerly():
raise RuntimeError('tf.metrics.recall is not supported is not '
'supported when eager execution is enabled.')
with variable_scope.variable_scope(name, 'false_alarm',
(predictions, labels, weights)):
predictions, labels, weights = _remove_squeezable_dimensions(
predictions=math_ops.cast(predictions, dtype=dtypes.bool),
labels=math_ops.cast(labels, dtype=dtypes.bool),
weights=weights)
false_p, false_positives_update_op = false_positives(
labels,
predictions,
weights,
metrics_collections=None,
updates_collections=None,
name=None)
true_n, true_negatives_update_op = true_negatives(
labels,
predictions,
weights,
metrics_collections=None,
updates_collections=None,
name=None)
def compute_false_positive_rate(true_n, false_p, name):
return array_ops.where(
math_ops.greater(true_n + false_p, 0),
math_ops.div(false_p, true_n + false_p), 0, name)
def once_across_towers(_, true_n, false_p):
return compute_false_positive_rate(true_n, false_p, 'value')
false_positive_rate = _aggregate_across_towers(
metrics_collections, once_across_towers, true_n, false_p)
update_op = compute_false_positive_rate(true_negatives_update_op,
false_positives_update_op, 'update_op')
if updates_collections:
ops.add_to_collections(updates_collections, update_op)
return false_positive_rate, update_op
You can adapt the code to the true positive rate.
Related
This question already has answers here:
Custom TensorFlow metric: true positive rate at given false positive rate
(2 answers)
Closed 1 year ago.
I'm trying to add some metrics to a BERT-style model, but struggling with tf.metrics. For most metrics it's pretty straightforward that you can use tf.metrics.mean, but for a metric like false positive rate it's not. I know there is tf.metrics.false_positives and tf.metrics.true_negatives, but since tf.metrics also have an associated op, you can't just do fpr = fp / (fp + tn). How does one go about this?
Here is the code:
from tensorflow.python.eager import context
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import ops
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import variable_scope
from tensorflow.python.ops.metrics_impl import _aggregate_across_towers
from tensorflow.python.ops.metrics_impl import true_negatives
from tensorflow.python.ops.metrics_impl import false_positives
from tensorflow.python.ops.metrics_impl import _remove_squeezable_dimensions
def false_positive_rate(labels,
predictions,
weights=None,
metrics_collections=None,
updates_collections=None,
name=None):
if context.executing_eagerly():
raise RuntimeError('tf.metrics.recall is not supported is not '
'supported when eager execution is enabled.')
with variable_scope.variable_scope(name, 'false_alarm',
(predictions, labels, weights)):
predictions, labels, weights = _remove_squeezable_dimensions(
predictions=math_ops.cast(predictions, dtype=dtypes.bool),
labels=math_ops.cast(labels, dtype=dtypes.bool),
weights=weights)
false_p, false_positives_update_op = false_positives(
labels,
predictions,
weights,
metrics_collections=None,
updates_collections=None,
name=None)
true_n, true_negatives_update_op = true_negatives(
labels,
predictions,
weights,
metrics_collections=None,
updates_collections=None,
name=None)
def compute_false_positive_rate(true_n, false_p, name):
return array_ops.where(
math_ops.greater(true_n + false_p, 0),
math_ops.div(false_p, true_n + false_p), 0, name)
def once_across_towers(_, true_n, false_p):
return compute_false_positive_rate(true_n, false_p, 'value')
false_positive_rate = _aggregate_across_towers(
metrics_collections, once_across_towers, true_n, false_p)
update_op = compute_false_positive_rate(true_negatives_update_op,
false_positives_update_op, 'update_op')
if updates_collections:
ops.add_to_collections(updates_collections, update_op)
return false_positive_rate, update_op
I have created a custom loss function in Keras as follows:
import tensorflow as tf
import numpy as np
def custom_loss(y_true, y_pred):
cce = tf.keras.losses.CategoricalCrossentropy()
loss = cce(y_true, y_pred).numpy()
epsilon = np.finfo(np.float32).eps
confidence = np.clip(y_true.numpy(), epsilon, 1.-epsilon)
sample_entropy = -1. * np.sum(np.multiply(confidence, np.log(confidence) / np.log(np.e)), axis=-1)
entropy = np.mean(sample_entropy)
penalty = 0.1 * -entropy
return loss + penalty
When I use this custom loss function I'm getting the error message
ValueError: No gradients provided for any variable
Somehow now gradients can be calculated. How needs the loss function be changed so that gradients can be calculated?
Tensorflow need tensor to store the dependency info to let gradients flow backwards, if you convert tensor to numpy array in loss function then you break this dependency thus no gradients provided for any variable, so you need change every np operation in loss function to corresponding tf or backend operation, e.g:
import tensorflow as tf
import numpy as np
cce = tf.keras.losses.CategoricalCrossentropy()
epsilon = np.finfo(np.float32).eps
def custom_loss(y_true, y_pred):
loss = cce(y_true, y_pred)
confidence = tf.clip_by_value(y_true, epsilon, 1.-epsilon)
sample_entropy = -1. * tf.reduce_sum(tf.math.multiply(confidence, tf.math.log(confidence) / tf.math.log(np.e)), axis=-1)
entropy = tf.reduce_mean(sample_entropy)
penalty = 0.1 * -entropy
return loss + penalty
I have difficulties writing a custom loss function that makes use of some random weights generated according to the class/state predicted by the Softmax output. The desired property is:
The model is a simple feedforward neural network with input-dimension as 1 and the output dimension as 6.
The activation function of the output layer is Softmax, which intends to estimate the actual number of classes or states using Argmax.
Note that the training data only consists of X (there is no Y).
The loss function is defined according to random weights (i.e., Weibull distribution) sampled based on the predicted state number for each input sample X.
As follows, I provided a minimal example for illustration. For simplification purposes, I only define the loss function based on the random weights for state/class-1. I get: "ValueError: No gradients provided for any variable: ['dense_41/kernel:0', 'dense_41/bias:0', 'dense_42/kernel:0', 'dense_42/bias:0']."
As indicated in the post below, I found out that argmax is not differntiable, and a softargmax function would help (as I implemented in the following code). However, I still get the same error.
Getting around tf.argmax which is not differentiable
import sys
import time
from tqdm import tqdm
import tensorflow as tf
import numpy as np
from tensorflow.keras import layers
from scipy.stats import weibull_min
###############################################################################################
# Generate Dataset
lb = np.array([2.0]) # Left boundary
ub = np.array([100.0]) # Right boundary
# Data Points - uniformly distributed
N_r = 50
X_r = np.linspace(lb, ub, N_r)
###############################################################################################
#Define Model
class DGM:
# Initialize the class
def __init__(self, X_r):
#Normalize training input data
self.Xmean, self.Xstd = np.mean(X_r), np.std(X_r)
X_r = (X_r - self.Xmean) / self.Xstd
self.X_r = X_r
#Input and output variable dimensions
self.X_dim = 1; self.Y_dim = 6
# Define tensors
self.X_r_tf = tf.convert_to_tensor(X_r, dtype=tf.float32)
#Learning rate
self.LEARNING_RATE=1e-4
#Feedforward neural network model
self.modelTest = self.test_model()
###############################################
# Initialize network weights and biases
def test_model(self):
input_shape = self.X_dim
dimensionality = self.Y_dim
model = tf.keras.Sequential()
model.add(layers.Input(shape=input_shape))
model.add(layers.Dense(64, kernel_initializer='glorot_uniform',bias_initializer='zeros'))
model.add(layers.Activation('tanh'))
model.add(layers.Dense(dimensionality))
model.add(layers.Activation('softmax'))
return model
##############################################
def compute_loss(self):
#Define optimizer
gen_opt = tf.keras.optimizers.Adam(lr=self.LEARNING_RATE, beta_1=0.0,beta_2=0.9)
with tf.GradientTape() as test_tape:
###### calculate loss
generated_u = self.modelTest(self.X_r_tf, training=True)
#number of data
n_data = generated_u.shape[0]
#initialize random weights assuming state-1 at all input samples
wt1 = np.zeros((n_data, 1),dtype=np.float32) #initialize weights
for b in range(n_data):
wt1[b] = weibull_min.rvs(c=2, loc=0, scale =4 , size=1)
wt1 = tf.reshape(tf.convert_to_tensor(wt1, dtype=tf.float32),shape=(n_data,1))
#print('-----------sampling done-----------')
#determine the actual state using softargmax
idst = self.softargmax(generated_u)
idst = tf.reshape(tf.cast(idst, tf.float32),shape=(n_data,1))
#index state-1
id1 = tf.constant(0.,dtype=tf.float32)
#assign weights if predicted state is state-1
wt1_final = tf.cast(tf.equal(idst, id1), dtype=tf.float32)*wt1
#final loss
test_loss = tf.reduce_mean(tf.square(wt1_final))
#print('-----------test loss calcuated-----------')
gradients_of_modelTest = test_tape.gradient(test_loss,
[self.modelTest.trainable_variables])
gen_opt.apply_gradients(zip(gradients_of_modelTest[0],self.modelTest.trainable_variables))
return test_loss
#reference: Getting around tf.argmax which is not differentiable
#https://stackoverflow.com/questions/46926809/getting-around-tf-argmax-which-is-not-differentiable
def softargmax(self, x, beta=1e10):
x = tf.convert_to_tensor(x)
x_range = tf.range(x.shape.as_list()[-1], dtype=x.dtype)
return tf.reduce_sum(tf.nn.softmax(x*beta,axis=1) * x_range, axis=-1)
##############################################
def train(self,training_steps=100):
train_start_time = time.time()
for step in tqdm(range(training_steps), desc='Training'):
start = time.time()
test_loss = self.compute_loss()
if (step + 1) % 10 == 0:
elapsed_time = time.time() - train_start_time
sec_per_step = elapsed_time / step
mins_left = ((training_steps - step) * sec_per_step)
tf.print("\nStep # ", step, "/", training_steps,
output_stream=sys.stdout)
tf.print("Current time:", elapsed_time, " time left:",
mins_left, output_stream=sys.stdout)
tf.print("Test Loss: ", test_loss, output_stream=sys.stdout)
###############################################################################################
#Define and train the model
model = DGM(X_r)
model.train(training_steps=100)
I have the following simple neural network (with 1 neuron only) to test the computation precision of sigmoid activation & binary_crossentropy of Keras:
model = Sequential()
model.add(Dense(1, input_dim=1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
To simplify the test, I manually set the only weight to 1 and bias to 0, and then evaluate the model with 2-point training set {(-a, 0), (a, 1)}, i.e.
y = numpy.array([0, 1])
for a in range(40):
x = numpy.array([-a, a])
keras_ce[a] = model.evaluate(x, y)[0] # cross-entropy computed by keras/tensorflow
my_ce[a] = np.log(1+exp(-a)) # My own computation
My Question: I found the binary crossentropy (keras_ce) computed by Keras/Tensorflow reach a floor of 1.09e-7 when a is approx. 16, as illustrated below (blue line). It doesn't decrease further as 'a' keeps growing. Why is that?
This neural network has 1 neuron only whose weight is set to 1 and bias is 0. With the 2-point training set {(-a, 0), (a, 1)}, the binary_crossentropy is just
-1/2 [ log(1 - 1/(1+exp(a)) ) + log( 1/(1+exp(-a)) ) ] = log(1+exp(-a))
So the cross-entropy should decrease as a increases, as illustrated in orange ('my') above. Is there some Keras/Tensorflow/Python setup I can change to increase its precision? Or am I mistaken somewhere? I'd appreciate any suggestions/comments/answers.
TL;DR version: the probability values (i.e. the outputs of sigmoid function) are clipped due to numerical stability when computing the loss function.
If you inspect the source code, you would find that using binary_crossentropy as the loss would result in a call to binary_crossentropy function in losses.py file:
def binary_crossentropy(y_true, y_pred):
return K.mean(K.binary_crossentropy(y_true, y_pred), axis=-1)
which in turn, as you can see, calls the equivalent backend function. In case of using Tensorflow as the backend, that would result in a call to binary_crossentropy function in tensorflow_backend.py file:
def binary_crossentropy(target, output, from_logits=False):
""" Docstring ..."""
# Note: tf.nn.sigmoid_cross_entropy_with_logits
# expects logits, Keras expects probabilities.
if not from_logits:
# transform back to logits
_epsilon = _to_tensor(epsilon(), output.dtype.base_dtype)
output = tf.clip_by_value(output, _epsilon, 1 - _epsilon)
output = tf.log(output / (1 - output))
return tf.nn.sigmoid_cross_entropy_with_logits(labels=target,
logits=output)
As you can see from_logits argument is set to False by default. Therefore, the if condition evaluates to true and as a result the values in the output are clipped to the range [epsilon, 1-epislon]. That's why no matter how small or large a probability is, it could not be smaller than epsilon and greater than 1-epsilon. And that explains why the output of binary_crossentropy loss is also bounded.
Now, what is this epsilon here? It is a very small constant which is used for numerical stability (e.g. prevent division by zero or undefined behaviors, etc.). To find out its value you can further inspect the source code and you would find it in the common.py file:
_EPSILON = 1e-7
def epsilon():
"""Returns the value of the fuzz factor used in numeric expressions.
# Returns
A float.
# Example
```python
>>> keras.backend.epsilon()
1e-07
```
"""
return _EPSILON
If for any reason, you would like more precision you can alternatively set the epsilon value to a smaller constant using set_epsilon function from the backend:
def set_epsilon(e):
"""Sets the value of the fuzz factor used in numeric expressions.
# Arguments
e: float. New value of epsilon.
# Example
```python
>>> from keras import backend as K
>>> K.epsilon()
1e-07
>>> K.set_epsilon(1e-05)
>>> K.epsilon()
1e-05
```
"""
global _EPSILON
_EPSILON = e
However, be aware that setting epsilon to an extremely low positive value or zero, may disrupt the stability of computations all over the Keras.
I think that keras take into account numerical stability,
Let's track how keras caculate
First,
def binary_crossentropy(y_true, y_pred):
return K.mean(K.binary_crossentropy(y_true, y_pred), axis=-1)
Then,
def binary_crossentropy(target, output, from_logits=False):
"""Binary crossentropy between an output tensor and a target tensor.
# Arguments
target: A tensor with the same shape as `output`.
output: A tensor.
from_logits: Whether `output` is expected to be a logits tensor.
By default, we consider that `output`
encodes a probability distribution.
# Returns
A tensor.
"""
# Note: tf.nn.sigmoid_cross_entropy_with_logits
# expects logits, Keras expects probabilities.
if not from_logits:
# transform back to logits
_epsilon = _to_tensor(epsilon(), output.dtype.base_dtype)
output = tf.clip_by_value(output, _epsilon, 1 - _epsilon)
output = tf.log(output / (1 - output))
return tf.nn.sigmoid_cross_entropy_with_logits(labels=target,
logits=output)
Notice tf.clip_by_value is used for numerical stability
Let's compare keras binary_crossentropy, tensorflow tf.nn.sigmoid_cross_entropy_with_logits and custom loss function(eleminate vale clipping)
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense
import keras
# keras
model = Sequential()
model.add(Dense(units=1, activation='sigmoid', input_shape=(
1,), weights=[np.ones((1, 1)), np.zeros(1)]))
# print(model.get_weights())
model.compile(loss='binary_crossentropy',
optimizer='adam', metrics=['accuracy'])
# tensorflow
G = tf.Graph()
with G.as_default():
x_holder = tf.placeholder(dtype=tf.float32, shape=(2,))
y_holder = tf.placeholder(dtype=tf.float32, shape=(2,))
entropy = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(
logits=x_holder, labels=y_holder))
sess = tf.Session(graph=G)
# keras with custom loss function
def customLoss(target, output):
# if not from_logits:
# # transform back to logits
# _epsilon = _to_tensor(epsilon(), output.dtype.base_dtype)
# output = tf.clip_by_value(output, _epsilon, 1 - _epsilon)
# output = tf.log(output / (1 - output))
output = tf.log(output / (1 - output))
return tf.nn.sigmoid_cross_entropy_with_logits(labels=target,
logits=output)
model_m = Sequential()
model_m.add(Dense(units=1, activation='sigmoid', input_shape=(
1,), weights=[np.ones((1, 1)), np.zeros(1)]))
# print(model.get_weights())
model_m.compile(loss=customLoss,
optimizer='adam', metrics=['accuracy'])
N = 100
xaxis = np.linspace(10, 20, N)
keras_ce = np.zeros(N)
tf_ce = np.zeros(N)
my_ce = np.zeros(N)
keras_custom = np.zeros(N)
y = np.array([0, 1])
for i, a in enumerate(xaxis):
x = np.array([-a, a])
# cross-entropy computed by keras/tensorflow
keras_ce[i] = model.evaluate(x, y)[0]
my_ce[i] = np.log(1+np.exp(-a)) # My own computation
tf_ce[i] = sess.run(entropy, feed_dict={x_holder: x, y_holder: y})
keras_custom[i] = model_m.evaluate(x, y)[0]
# print(model.get_weights())
plt.plot(xaxis, keras_ce, label='keras')
plt.plot(xaxis, my_ce, 'b', label='my_ce')
plt.plot(xaxis, tf_ce, 'r:', linewidth=5, label='tensorflow')
plt.plot(xaxis, keras_custom, '--', label='custom loss')
plt.xlabel('a')
plt.ylabel('xentropy')
plt.yscale('log')
plt.legend()
plt.savefig('compare.jpg')
plt.show()
we can see that tensorflow is same with manual computing, but keras with custom loss encounter numeric overflow as expected.
I want to train, evaluate the accuracy and eventually predict with my model. This is my first time using high level APIs such as tf.estimator.
I'm getting a value error from estimator.train(train_input_fn):
'ValueError: features should be a dictionary of `Tensor's. Given type: '
I'm not sure what is going on here. My model is taking 3 inputs and producing a binary output from one neuron.
Before this error I was getting an error about the requested shape not equal to the actual shape, or something along those lines. I fixed it by reducing the batchSize down to 1, instead of 100. I'm sure this isn't going to do so well when it comes to training though.
Any ideas? Heres my code:
import tensorflow as tf
import numpy as np
import sys
sys.path.insert(0, '/Users/blairburns/Documents/DeepLearning/BackgroundColourPredictor/Dataset/Testing/')
sys.path.insert(0, '/Users/blairburns/Documents/DeepLearning/BackgroundColourPredictor/Dataset/Training/')
#other files
from TestDataNormaliser import *
from TrainDataNormaliser import *
learning_rate = 0.01
trainingIteration = 15
batchSize = 1
displayStep = 2
#Layers using tf.layers
def get_logits(features):
l1 = tf.layers.dense(features, 3, activation=tf.nn.relu)
l2 = tf.layers.dense(l1, 4, activation=tf.nn.relu)
l3 = tf.layers.dense(l2, 1, activation=None)
a = l3
return a
#cost function
def get_loss(a, labels):
#cross_entropy = tf.reduce_mean(-tf.reduce_sum(y * tf.log(a)))
return tf.nn.sigmoid_cross_entropy_with_logits(logits=a, labels=labels)
#cross_entropy = tf.reduce_mean((l3 - y)**2)
#cross_entropy = -tf.reduce_sum(y*tf.log(a))-tf.reduce_sum((1-y)*tf.log(1-a))
#optimizer
def get_train_op(loss):
learning_rate = 1e-3
optimizer = tf.train.RMSPropOptimizer(learning_rate)
return optimizer.minimize(loss, global_step=tf.train.get_global_step())
#training
####
def get_inputs(feature_data, label_data, batch_size, n_epochs=None, shuffle=True):
dataset = tf.data.Dataset.from_tensor_slices(
(feature_data, label_data))
dataset = dataset.repeat(n_epochs)
if shuffle:
dataset = dataset.shuffle(len(feature_data))
dataset = dataset.batch(batch_size)
features, labels = dataset.make_one_shot_iterator().get_next()
return features, labels
def model_fn(features, labels, mode):
a = get_logits(features)
loss = get_loss(a, labels)
train_op = get_train_op(loss)
predictions = tf.greater(a, 0)
accuracy = tf.metrics.accuracy(labels, predictions)
return tf.estimator.EstimatorSpec(
mode=mode,
loss=loss,
train_op=train_op,
eval_metric_ops={'Accuracy': accuracy},
predictions=predictions
)
def train_input_fn():
return get_inputs(
trainArrayValues,
trainArrayLabels,
batchSize
)
def eval_input_fn():
return get_inputs(
testArrayValues,
testArrayLabels,
batchSize,
n_epochs=1,
shuffle=False
)
model_dir = './savedModel'
estimator = tf.estimator.LinearRegressor(feature_columns=[model_fn, model_dir])
#estimator.train(train_input_fn, max_steps=1)
estimator.train(train_input_fn)
estimator.evaluate(eval_input_fn)
Your problem is this line:
estimator = tf.estimator.LinearRegressor(feature_columns=[model_fn, model_dir])
You need to set the feature_columns argument to an array of feature columns. A feature column tells the estimator about the data you're feeding it.
It looks like all your input data is numeric, so I'd call tf.feature_column.numeric_column to create your feature column(s). The documentation is here. For example, the following code creates a numeric feature column containing x-coordinates:
xcol = tf.feature_column.numeric_column('x')
If all your estimator needs are x-coordinates, then you could create the estimator with the following code:
estimator = tf.estimator.LinearRegressor(feature_columns=[xcol])