Error when trying to use MirroredStrategy in tf.estimator

Error when trying to use MirroredStrategy in tf.estimator - python

I'm trying to add multi-gpu support to my tensorflow training code using tf.contrib.distribute.MirroredStrategy as a parameter to tf.estimator.RunConfig.
Tensorflow version: 1.7 (compiled from source)
Python version: 3.5
OS Platform and version: Linux Ubuntu 16.04.2
I get the following error message:
Traceback (most recent call last):
File "python3.5/site-packages/tensorflow/python/training/coordinator.py", line 297, in stop_on_exception
yield
File "python3.5/site-packages/tensorflow/contrib/distribute/python/mirrored_strategy.py", line 248, in _call_for_each_tower
self, *merge_args, **merge_kwargs)
File "python3.5/site-packages/tensorflow/python/training/optimizer.py", line 667, in _distributed_apply
reduced_grads = distribution.batch_reduce("sum", grads_and_vars)
File "python3.5/site-packages/tensorflow/python/training/distribute.py", line 801, in batch_reduce
return self._batch_reduce(method_string, value_destination_pairs)
File "python3.5/site-packages/tensorflow/contrib/distribute/python/mirrored_strategy.py", line 295, in _batch_reduce
value_destination_pairs)
File "python3.5/site-packages/tensorflow/contrib/distribute/python/cross_tower_ops.py", line 169, in batch_reduce
raise ValueError("`value_destination_pairs` must be a list or a tuple of "
ValueError: `value_destination_pairs` must be a list or a tuple of tuples of PerDevice objects and destinations
The following code produces the error (I omitted the code for parsing the tfrecord to image tensor as I don't believe this code effects the error, but I can add it if necessary):
import glob, os
import tensorflow as tf
slim = tf.contrib.slim
# ...
# definition of args (arguments parser)
def input_fn():
dataset = tf.data.TFRecordDataset(glob.glob(os.path.join(args.train_data_dir, 'train*')))
dataset = dataset.map(
lambda x: parse_and_preprocess_image(x, args.image_size),
num_parallel_calls=2,
)
dataset = dataset.repeat()
dataset = dataset.batch(batch_size=4)
dataset = dataset.prefetch(1)
return dataset
def model_fn(features, labels=None, mode=tf.estimator.ModeKeys.TRAIN, params=None):
train_images_batch = features
res = slim.conv2d(inputs=train_images_batch, kernel_size=9, stride=1, num_outputs=3, scope='conv1')
loss = tf.reduce_mean((train_images_batch - res) ** 2)
optimizer = tf.train.AdamOptimizer(0.001)
train_op = slim.learning.create_train_op(loss, optimizer)
return tf.estimator.EstimatorSpec(
mode=tf.estimator.ModeKeys.TRAIN,
loss=loss, train_op=train_op)
def train():
init()
distribution = tf.contrib.distribute.MirroredStrategy(num_gpus=args.num_gpus)
config = tf.estimator.RunConfig(
model_dir=args.log_dir,
train_distribute=distribution,
)
estimator = tf.estimator.Estimator(model_fn=model_fn, config=config)
estimator.train(
input_fn=input_fn,
max_steps=args.train_steps,
)
def main():
add_arguments()
train()
if __name__ == '__main__':
main()
Thank you!
Adva

This error happens if you specified num_gpus=1. For a single GPU, you can use OneDeviceStrategy("/device:GPU:0") instead of MirroredStrategy.

Related

Error converting Detectron2 torchscript model to CoreML using coremltools

I have a Detectron2 model that is trained to identify specific items on a backend server. I would like to make this model available on iOS devices and convert it to a CoreML model using coremltools v6.1. I used the export_model.py script provided by Facebook to create a torchscript model, but when I try to convert this to coreml I get a KeyError
def save_core_ml_package(scripted_model):
# Using image_input in the inputs parameter:
# Convert to Core ML neural network using the Unified Conversion API.
h = 224
w = 224
ctmodel = ct.convert(scripted_model,
inputs=[ct.ImageType(shape=(1, 3, h, w),
color_layout=ct.colorlayout.RGB)]
)
# Save the converted model.
ctmodel.save("newmodel.mlmodel")
I get the following error
Support for converting Torch Script Models is experimental. If possible you should use a traced model for conversion.
Traceback (most recent call last):
File "/usr/repo/URCV/src/Python/pytorch_to_torchscript.py", line 101, in <module>
save_trace_to_core_ml_package(test_model, outdir=outdir)
File "/usr/repo/URCV/src/Python/pytorch_to_torchscript.py", line 46, in save_trace_to_core_ml_package
ctmodel = ct.convert(
File "/opt/python-venv/lib/python3.8/site-packages/coremltools/converters/_converters_entry.py", line 444, in convert
mlmodel = mil_convert(
File "/opt/python-venv/lib/python3.8/site-packages/coremltools/converters/mil/converter.py", line 190, in mil_convert
return _mil_convert(model, convert_from, convert_to, ConverterRegistry, MLModel, compute_units, **kwargs)
File "/opt/python-venv/lib/python3.8/site-packages/coremltools/converters/mil/converter.py", line 217, in _mil_convert
proto, mil_program = mil_convert_to_proto(
File "/opt/python-venv/lib/python3.8/site-packages/coremltools/converters/mil/converter.py", line 282, in mil_convert_to_proto
prog = frontend_converter(model, **kwargs)
File "/opt/python-venv/lib/python3.8/site-packages/coremltools/converters/mil/converter.py", line 112, in __call__
return load(*args, **kwargs)
File "/opt/python-venv/lib/python3.8/site-packages/coremltools/converters/mil/frontend/torch/load.py", line 56, in load
converter = TorchConverter(torchscript, inputs, outputs, cut_at_symbols, specification_version)
File "/opt/python-venv/lib/python3.8/site-packages/coremltools/converters/mil/frontend/torch/converter.py", line 160, in __init__
raw_graph, params_dict = self._expand_and_optimize_ir(self.torchscript)
File "/opt/python-venv/lib/python3.8/site-packages/coremltools/converters/mil/frontend/torch/converter.py", line 486, in _expand_and_optimize_ir
graph, params_dict = TorchConverter._jit_pass_lower_graph(graph, torchscript)
File "/opt/python-venv/lib/python3.8/site-packages/coremltools/converters/mil/frontend/torch/converter.py", line 431, in _jit_pass_lower_graph
_lower_graph_block(graph)
File "/opt/python-venv/lib/python3.8/site-packages/coremltools/converters/mil/frontend/torch/converter.py", line 410, in _lower_graph_block
module = getattr(node_to_module_map[_input], attr_name)
KeyError: images.2 defined in (%images.2 : __torch__.detectron2.structures.image_list.ImageList = prim::CreateObject()
)

From the error message it looks like you are using a torch script model:
Support for converting Torch Script Models is experimental. If
possible you should use a traced model for conversion.
if possible try to use a traced model e.g.:
dummy_input = torch.randn(batch, channels, width, height)
traceable_model = torch.jit.trace(model, dummy_input)
followed by your original code:
ct.convert(traceable_model,...

torch training with Multi GPU enviroment

I'm trying to run a training on a multi gpu enviroment.
here's model code
net_1 = nn.Sequential(nn.Conv2d(2, 12, 5),
nn.MaxPool2d(2),
snn.Leaky(beta=beta, spike_grad=spike_grad, init_hidden=True),
nn.Conv2d(12, 32, 5),
nn.MaxPool2d(2),
snn.Leaky(beta=beta, spike_grad=spike_grad, init_hidden=True),
nn.Flatten(),
nn.Linear(32*5*5, 10),
snn.Leaky(beta=beta, spike_grad=spike_grad, init_hidden=True, output=True)
)
net_1.cuda()
net = nn.DataParallel(net_1)
snn.Leaky is a module used to implement SNN structure combinig with torch.nn, Which makes network work as kind of RNN.
links here(https://snntorch.readthedocs.io/en/latest/readme.html)
The input shape looks like this (timestep, batchsize, 2, 32,32)
Training code
def forward_pass(net, data):
spk_rec = []
utils.reset(net) # resets hidden states for all LIF neurons in net
for step in range(data.size(1)): # data.size(0) = number of time steps
datas = data[:,step,:,:,:].cuda()
net = net.to(device)
spk_out, mem_out = net(datas)
spk_rec.append(spk_out)
return torch.stack(spk_rec)
optimizer = torch.optim.Adam(net.parameters(), lr=2e-2, betas=(0.9, 0.999))
loss_fn = SF.mse_count_loss(correct_rate=0.8, incorrect_rate=0.2)
num_epochs = 5
num_iters = 50
loss_hist = []
acc_hist = []
t_spk_rec_sum = []
start = time.time()
net.train()
# training loop
for epoch in range(num_epochs):
for i, (data, targets) in enumerate(iter(trainloader)):
data = data.to(device)
targets = targets.to(device)
spk_rec = forward_pass(net, data)
loss_val = loss_fn(spk_rec, targets)
# Gradient calculation + weight update
optimizer.zero_grad()
loss_val.backward()
optimizer.step()
# Store loss history for future plotting
loss_hist.append(loss_val.item())
print("time :", time.time() - start,"sec")
print(f"Epoch {epoch}, Iteration {i} \nTrain Loss: {loss_val.item():.2f}")
acc = SF.accuracy_rate(spk_rec, targets)
acc_hist.append(acc)
print(f"Train Accuracy: {acc * 100:.2f}%\n")
And I got this error
Traceback (most recent call last):
File "/home/hubo1024/PycharmProjects/snntorch/multi_gpu_train.py", line 87, in <module>
spk_rec = forward_pass(net, data)
File "/home/hubo1024/PycharmProjects/snntorch/multi_gpu_train.py", line 63, in forward_pass
spk_out, mem_out = net(datas)
File "/home/hubo1024/anaconda3/envs/spyketorchproject/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1130, in _call_impl
return forward_call(*input, **kwargs)
File "/home/hubo1024/anaconda3/envs/spyketorchproject/lib/python3.10/site-packages/torch/nn/parallel/data_parallel.py", line 168, in forward
outputs = self.parallel_apply(replicas, inputs, kwargs)
File "/home/hubo1024/anaconda3/envs/spyketorchproject/lib/python3.10/site-packages/torch/nn/parallel/data_parallel.py", line 178, in parallel_apply
return parallel_apply(replicas, inputs, kwargs, self.device_ids[:len(replicas)])
File "/home/hubo1024/anaconda3/envs/spyketorchproject/lib/python3.10/site-packages/torch/nn/parallel/parallel_apply.py", line 86, in parallel_apply
output.reraise()
File "/home/hubo1024/anaconda3/envs/spyketorchproject/lib/python3.10/site-packages/torch/_utils.py", line 461, in reraise
raise exception
RuntimeError: Caught RuntimeError in replica 0 on device 0.
Original Traceback (most recent call last):
File "/home/hubo1024/anaconda3/envs/spyketorchproject/lib/python3.10/site-packages/torch/nn/parallel/parallel_apply.py", line 61, in _worker
output = module(*input, **kwargs)
File "/home/hubo1024/anaconda3/envs/spyketorchproject/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1130, in _call_impl
return forward_call(*input, **kwargs)
File "/home/hubo1024/anaconda3/envs/spyketorchproject/lib/python3.10/site-packages/torch/nn/modules/container.py", line 139, in forward
input = module(input)
File "/home/hubo1024/anaconda3/envs/spyketorchproject/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1130, in _call_impl
return forward_call(*input, **kwargs)
File "/home/hubo1024/anaconda3/envs/spyketorchproject/lib/python3.10/site-packages/snntorch/_neurons/leaky.py", line 162, in forward
self.mem = self.state_fn(input_)
File "/home/hubo1024/anaconda3/envs/spyketorchproject/lib/python3.10/site-packages/snntorch/_neurons/leaky.py", line 201, in _build_state_function_hidden
self._base_state_function_hidden(input_) - self.reset * self.threshold
File "/home/hubo1024/anaconda3/envs/spyketorchproject/lib/python3.10/site-packages/snntorch/_neurons/leaky.py", line 195, in _base_state_function_hidden
base_fn = self.beta.clamp(0, 1) * self.mem + input_
File "/home/hubo1024/anaconda3/envs/spyketorchproject/lib/python3.10/site-packages/torch/_tensor.py", line 1121, in __torch_function__
ret = func(*args, **kwargs)
RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!
Process finished with exit code 1
Line 87 is
spk_rec = forward_pass(net, data)
from traning loop
and line 63 is
spk_out, mem_out = net(datas)
of forward pass function
I checked and made sure that there's no part where the tensor is defined as cpu,
And the code works well when I run this code in single GPU.
I'm currently using
torch.utils.data import DataLoader
for making batch train loader. I'm thinking that this might be main source of the problem.
Should I use different dataloader for multi GPU training?
And if so where can I find some reference with this?, I serched a bit but those info where a bit old.

This was a bug in the Leaky neuron that kept resetting its device when using DataParallel. It has been fixed in the current version of snnTorch in GitHub, and addressed in this issue: https://github.com/jeshraghian/snntorch/issues/154
We're working on fixing up the other neurons now.

"google.protobuf.message.DecodeError: Error parsing message" is reported when I try to save a checkpoint

i am training a model with tf and the process of training is going well. but when it comes to the step of saving checkpoints, the error occured as below:
Traceback (most recent call last):
File "trainer.py", line 110, in <module>
trainer.train()
File "trainer.py", line 100, in train
self.model.saver.save(sess, model_save_path, global_step=current_step)
File "/search/odin/zhangshusen/Python-3.6.8-tf/lib/python3.6/site-packages/tensorflow_core/python/training/saver.py", line 1203, in save
save_debug_info=save_debug_info)
File "/search/odin/zhangshusen/Python-3.6.8-tf/lib/python3.6/site-packages/tensorflow_core/python/training/saver.py", line 1246, in export_meta_graph
graph_def=ops.get_default_graph().as_graph_def(add_shapes=True),
File "/search/odin/zhangshusen/Python-3.6.8-tf/lib/python3.6/site-packages/tensorflow_core/python/framework/ops.py", line 3238, in as_graph_def
result, _ = self._as_graph_def(from_version, add_shapes)
File "/search/odin/zhangshusen/Python-3.6.8-tf/lib/python3.6/site-packages/tensorflow_core/python/framework/ops.py", line 3166, in _as_graph_def
graph.ParseFromString(compat.as_bytes(data))
google.protobuf.message.DecodeError: Error parsing message
there is a point that the embedding parmas of the model is loaded from a txt file, which is the pre-trained embeddings with another model. code is as below:
def create_pretrained_emb_from_txt(self, vocab, embed_file, name = None, dtype=tf.float32):
vocab_size = len(vocab)
print("start load {} vocab: {}".format(embed_file, vocab_size))
embs_dict, emb_size = utils.load_emb(embed_file)
print("load {} end {}".format(embed_file, len(embs_dict)))
embs = np.random.rand(vocab_size, emb_size).astype(np.float32)
for v in vocab:
if v in embs_dict:
embs[vocab[v]] = np.array(embs_dict[v], dtype=np.float32)
emb_mat = tf.get_variable(name=name,
initializer=tf.convert_to_tensor(embs, dtype=dtype), trainable=True)
print("init tensor done")
return emb_mat
when i call this function to init embeddings, the error occured. Instead, when i init emebedings randomly with code below, the error nerver hapend and the training goes well
domain_init_scope = math.sqrt(6.0 / (domain_vocab_size + emb_dim))
domain_emb_shape = [domain_vocab_size, emb_dim]
domain_embedding_table = tf.Variable(tf.random_uniform(domain_emb_shape, -domain_init_scope, domain_init_scope))

pmdarima autoarima prediction method returns ''SARIMAX' object has no attribute '_k_trend' '

I have created a model using the pmdarima module's pipeline method
fit2 = Pipeline([
('boxcox', BoxCoxEndogTransformer(lmbda2=1e-6)),
('arima', pmd.AutoARIMA(trace=True,
suppress_warnings=True,
m=12,
stepwise=True))])
and fitted the model using the train data set
fitted = fit2.fit(train)
And were able to perform predictions. Afterwards, tried to persists the model as a pickle file
pickle_tgt = "arima.pkl"
joblib.dump(fitted, pickle_tgt, compress=3)
then I read the pickle file back into another python instance
def get_model(product_id):
file_path = "collector/resources/" + product_id
try:
model = joblib.load(file_path)
return model
except Exception:
print(traceback.format_exc())
However, when I tried to perform prediction using the model I'v imported
fc, confint = model.predict(n_periods=24, return_conf_int=True)
it fails and returns the below stacktrace
fc, confint = model.predict(n_periods=n_periods, return_conf_int=True)
File "C:\Users\collector\venv\lib\site-packages\pmdarima\pipeline.py", line 436, in predict
alpha=alpha, **predict_kwargs)
File "C:\Users\collector\venv\lib\site-packages\pmdarima\utils\metaestimators.py", line 53, in <lambda>
out = (lambda *args, **kwargs: self.fn(obj, *args, **kwargs))
File "C:\Users\collector\venv\lib\site-packages\pmdarima\arima\auto.py", line 184, in predict
return_conf_int=return_conf_int, alpha=alpha)
File "C:\Users\collector\venv\lib\site-packages\pmdarima\arima\arima.py", line 651, in predict
alpha=alpha)
File "C:\Users\collector\venv\lib\site-packages\pmdarima\arima\arima.py", line 86, in _seasonal_prediction_with_confidence
**kwargs)
File "C:\Users\collector\venv\lib\site-packages\statsmodels\tsa\statespace\mlemodel.py", line 3234, in get_prediction
transformed=True, includes_fixed=True, **kwargs)
File "C:\Users\collector\venv\lib\site-packages\statsmodels\tsa\statespace\sarimax.py", line 1732, in _get_extension_time_varying_matrices
if not self.simple_differencing and self._k_trend > 0:
AttributeError: 'SARIMAX' object has no attribute '_k_trend'
The pmdarima version is 1.6.0, I'v tried setting _k_trend = 0 variable in the sarimax.py file but it does not seems to have any effect. Anyone has a work around to this ?

Apparently there was an version compatibility issue while installing pmdarima in colab and local env, find more information here

How to ensure at least 2 of n classes are included in training data

I am currently training a CNN. One of the metrics I am using is AUC. One issue I have noticed is that sometimes my generator will only select examples from one class (I have 3 classes in this project). So if my batch size is 20 it will sometimes randomly select 20 examples from class one for 1 epoch. If this happens then I get an error stating that AUC cannot be calculated with only one class and then the training ends.
Is there a way to make a condition in the generator that more or less states you need at least 2 of the n classes? Without having to use tf.metrics.auc
Thank you
# load training data
def load_train_data_batch_generator(batch_size=32, rows_in=48, cols_in=48, zs_in=32,
channels_in=2, num_classes=3,
dir_dict=dir_dict):
# dir_in_train = main_dir + '/test_CT_PET_combo'
# required when using hyperopt
batch_size = int(batch_size)
# if not: TypeError: 'float' object cannot be interpreted as an integer
fnames = os.listdir(dir_dict['dir_in_train_combo'])
y_train = np.zeros((batch_size, num_classes))
x_train = np.zeros((batch_size, rows_in, cols_in, zs_in, channels_in))
while True:
count = 0
for fname in np.random.choice(fnames, batch_size, replace=False):
data_label = scipy.io.loadmat(os.path.join(dir_dict['dir_out_train'], fname))['output']
# changing one hot encoding to integer
integer_label = np.argmax(data_label[0], axis=0)
y_train[count,:] = data_label
# Loading train ct w/ c and pet/ct combo
train_combo = scipy.io.loadmat(os.path.join(dir_dict['dir_in_train_combo'], fname))[fname]
x_train[count,:,:,:,:] = train_combo
count += 1
yield(x_train, y_train)
Per request: code for metric and error
Metric code
def sk_auroc(y_true, y_pred):
import tensorflow as tf
from sklearn.metrics import roc_auc_score
return tf.py_func(roc_auc_score, (y_true, y_pred), tf.double)
Epoch 1/200
57/205 [=======>......................] - ETA: 11s - loss: 1.2858 - acc: 0.3632 - sk_auroc: 0.4581 - auc: 0.5380ValueError: Only one class present in y_true. ROC AUC score is not defined in that case.
Traceback (most recent call last):
File "/home/mikedoho/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/script_ops.py", line 158, in __call__
ret = func(*args)
File "/home/mikedoho/anaconda3/lib/python3.6/site-packages/sklearn/metrics/ranking.py", line 277, in roc_auc_score
sample_weight=sample_weight)
File "/home/mikedoho/anaconda3/lib/python3.6/site-packages/sklearn/metrics/base.py", line 118, in _average_binary_score
sample_weight=score_weight)
File "/home/mikedoho/anaconda3/lib/python3.6/site-packages/sklearn/metrics/ranking.py", line 268, in _binary_roc_auc_score
raise ValueError("Only one class present in y_true. ROC AUC score "
ValueError: Only one class present in y_true. ROC AUC score is not defined in that case.
[[Node: metrics_1/sk_auroc/PyFunc = PyFunc[Tin=[DT_FLOAT, DT_FLOAT], Tout=[DT_DOUBLE], token="pyfunc_24", _device="/job:localhost/replica:0/task:0/device:CPU:0"](_arg_predictions_target_1_0_1, predictions_1/Softmax/_857)]]
Traceback (most recent call last):
File "<ipython-input-48-34101247f335>", line 8, in optimize_cnn
model, results = train_model(space)
File "<ipython-input-47-254bd056a344>", line 40, in train_model
validation_steps=round(len(os.listdir(dir_out_val))/space['batch_size'])
File "/home/mikedoho/anaconda3/lib/python3.6/site-packages/keras/legacy/interfaces.py", line 91, in wrapper
return func(*args, **kwargs)
File "/home/mikedoho/anaconda3/lib/python3.6/site-packages/keras/engine/training.py", line 1418, in fit_generator
initial_epoch=initial_epoch)
File "/home/mikedoho/anaconda3/lib/python3.6/site-packages/keras/engine/training_generator.py", line 217, in fit_generator
class_weight=class_weight)
File "/home/mikedoho/anaconda3/lib/python3.6/site-packages/keras/engine/training.py", line 1217, in train_on_batch
outputs = self.train_function(ins)
File "/home/mikedoho/anaconda3/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py", line 2715, in __call__
return self._call(inputs)
File "/home/mikedoho/anaconda3/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py", line 2675, in _call
fetched = self._callable_fn(*array_vals)
File "/home/mikedoho/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1454, in __call__
self._session._session, self._handle, args, status, None)
File "/home/mikedoho/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/errors_impl.py", line 519, in __exit__
c_api.TF_GetCode(self.status.status))
tensorflow.python.framework.errors_impl.InvalidArgumentError: ValueError: Only one class present in y_true. ROC AUC score is not defined in that case.
Traceback (most recent call last):
File "/home/mikedoho/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/script_ops.py", line 158, in __call__
ret = func(*args)
File "/home/mikedoho/anaconda3/lib/python3.6/site-packages/sklearn/metrics/ranking.py", line 277, in roc_auc_score
sample_weight=sample_weight)
File "/home/mikedoho/anaconda3/lib/python3.6/site-packages/sklearn/metrics/base.py", line 118, in _average_binary_score
sample_weight=score_weight)
File "/home/mikedoho/anaconda3/lib/python3.6/site-packages/sklearn/metrics/ranking.py", line 268, in _binary_roc_auc_score
raise ValueError("Only one class present in y_true. ROC AUC score "
ValueError: Only one class present in y_true. ROC AUC score is not defined in that case.
[[Node: metrics_1/sk_auroc/PyFunc = PyFunc[Tin=[DT_FLOAT, DT_FLOAT], Tout=[DT_DOUBLE], token="pyfunc_24", _device="/job:localhost/replica:0/task:0/device:CPU:0"](_arg_predictions_target_1_0_1, predictions_1/Softmax/_857)]]
tf.metrics.auc code and the picture showing the reason I dont really like it
# converting tf metric in keras metric
def as_keras_metric(method):
import functools
from keras import backend as K
import tensorflow as tf
#functools.wraps(method)
def wrapper(self, args, **kwargs):
""" Wrapper for turning tensorflow metrics into keras metrics """
value, update_op = method(self, args, **kwargs)
K.get_session().run(tf.local_variables_initializer())
with tf.control_dependencies([update_op]):
value = tf.identity(value)
return value
return wrapper
tf_auc_roc = as_keras_metric(tf.metrics.auc)
Seems like the tf.metrics.auc is too smooth and something might be off that I will have to look into later

You can use tf.metrics.auc in tensorflow instead of sklearn.metrics.roc_auc_score in sklearns. For example:
import tensorflow as tf
label = tf.Variable([1,0,0,0,1])
pred = tf.Variable([0.8,1,0.6,0.23,0.78])
auc,op = tf.metrics.auc(label,pred)
with tf.Session()as sess:
init = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
sess.run(init)
for i in range(3):
auc_value, op_value = sess.run([auc,op])
print(auc_value)
0.0
0.6666667
0.66666657
There will be no problem with you.

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Error when trying to use MirroredStrategy in tf.estimator - python

This error happens if you specified num_gpus=1. For a single GPU, you can use OneDeviceStrategy("/device:GPU:0") instead of MirroredStrategy.

Related

Error converting Detectron2 torchscript model to CoreML using coremltools

torch training with Multi GPU enviroment

"google.protobuf.message.DecodeError: Error parsing message" is reported when I try to save a checkpoint

pmdarima autoarima prediction method returns ''SARIMAX' object has no attribute '_k_trend' '

How to ensure at least 2 of n classes are included in training data

Categories

Resources