tf.keras - Importing model with batchnormalization layers - python

I've gotten stuck on this issue for a little while. I'm trying to run the code below with the tf_cnnvis (https://github.com/InFoCusp/tf_cnnvis) package for visualising learnt features in the network, where I import my protobuf model and then try and provide it a tensor containing some image data (which I believe is provided as a feed_dict, although I could be mistaken).
import numpy as np
import tensorflow as tf
import keras as k
import cv2
import tf_cnnvis as tfv
from tensorflow.python.platform import gfile
from keras import backend as K
model_filename = "saved_model.pb"
image = "test.jpg"
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.8, allow_growth=False)
sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
K.set_session(sess)
K._LEARNING_PHASE = tf.constant(0)
K.set_learning_phase(0)
with gfile.FastGFile(model_filename, 'rb') as f:
graph_def = tf.GraphDef()
graph_def.ParseFromString(f.read())
tf.import_graph_def(graph_def)
X = tf.placeholder(tf.float32, shape = [None, 48, 64, 3],name = "input") # placeholder for input images
y = tf.placeholder(tf.float32, shape = [None, 8])
im = np.array(cv2.imread(image))
im = np.expand_dims(im, 0)
layers = ['r', 'p', 'c']
init_op = init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
sess.run(init_op)
with sess.as_default():
is_success = tfv.activation_visualization(sess_graph_path=tf.get_default_graph(), value_feed_dict = {X : im}, layers=layers)
sess.close()
When I run my code, I get an "InvalidArgumentError" with this traceback:
Traceback (most recent call last):
File "/usr/local/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1292, in _do_call
return fn(*args)
File "/usr/local/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1277, in _run_fn
options, feed_dict, fetch_list, target_list, run_metadata)
File "/usr/local/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1367, in _call_tf_sessionrun
run_metadata)
tensorflow.python.framework.errors_impl.InvalidArgumentError: You must feed a value for placeholder tensor 'import/batch_normalization_1_input' with dtype float and shape [?,48,64,3]
[[{{node import/batch_normalization_1_input}} = Placeholder[_class=["loc:#import/batch_normalization/cond/FusedBatchNorm_1/Switch"], dtype=DT_FLOAT, shape=[?,48,64,3], _device="/job:localhost/replica:0/task:0/device:GPU:0"]()]]
[[{{node import/conv2d/Relu/_5}} = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_50_import/conv2d/Relu", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "vis2.py", line 36, in <module>
is_success = tfv.activation_visualization(sess_graph_path=tf.get_default_graph(), value_feed_dict = {X : im}, layers=layers)
File "/usr/local/anaconda3/lib/python3.6/site-packages/tf_cnnvis-1.0.0-py3.6.egg/tf_cnnvis/tf_cnnvis.py", line 406, in activation_visualization
File "/usr/local/anaconda3/lib/python3.6/site-packages/tf_cnnvis-1.0.0-py3.6.egg/tf_cnnvis/tf_cnnvis.py", line 169, in _get_visualization
File "/usr/local/anaconda3/lib/python3.6/site-packages/tf_cnnvis-1.0.0-py3.6.egg/tf_cnnvis/tf_cnnvis.py", line 227, in _visualization_by_layer_type
File "/usr/local/anaconda3/lib/python3.6/site-packages/tf_cnnvis-1.0.0-py3.6.egg/tf_cnnvis/tf_cnnvis.py", line 288, in _visualization_by_layer_name
File "/usr/local/anaconda3/lib/python3.6/site-packages/tf_cnnvis-1.0.0-py3.6.egg/tf_cnnvis/tf_cnnvis.py", line 315, in _activation
File "/usr/local/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 887, in run
run_metadata_ptr)
File "/usr/local/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1110, in _run
feed_dict_tensor, options, run_metadata)
File "/usr/local/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1286, in _do_run
run_metadata)
File "/usr/local/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1308, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InvalidArgumentError: You must feed a value for placeholder tensor 'import/batch_normalization_1_input' with dtype float and shape [?,48,64,3]
[[{{node import/batch_normalization_1_input}} = Placeholder[_class=["loc:#import/batch_normalization/cond/FusedBatchNorm_1/Switch"], dtype=DT_FLOAT, shape=[?,48,64,3], _device="/job:localhost/replica:0/task:0/device:GPU:0"]()]]
[[{{node import/conv2d/Relu/_5}} = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_50_import/conv2d/Relu", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]
Caused by op 'import/batch_normalization_1_input', defined at:
File "vis2.py", line 36, in <module>
is_success = tfv.activation_visualization(sess_graph_path=tf.get_default_graph(), value_feed_dict = {X : im}, layers=layers)
File "/usr/local/anaconda3/lib/python3.6/site-packages/tf_cnnvis-1.0.0-py3.6.egg/tf_cnnvis/tf_cnnvis.py", line 406, in activation_visualization
path_logdir = path_logdir, path_outdir = path_outdir)
File "/usr/local/anaconda3/lib/python3.6/site-packages/tf_cnnvis-1.0.0-py3.6.egg/tf_cnnvis/tf_cnnvis.py", line 159, in _get_visualization
s = _graph_import_function(PATH,s)
File "/usr/local/anaconda3/lib/python3.6/site-packages/tf_cnnvis-1.0.0-py3.6.egg/tf_cnnvis/tf_cnnvis.py", line 177, in _graph_import_function
new_saver = tf.train.import_meta_graph(PATH) # Import graph
File "/usr/local/anaconda3/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 1650, in import_meta_graph
meta_graph_or_file, clear_devices, import_scope, **kwargs)[0]
File "/usr/local/anaconda3/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 1672, in _import_meta_graph_with_return_elements
**kwargs))
File "/usr/local/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/meta_graph.py", line 806, in import_scoped_meta_graph_with_return_elements
return_elements=return_elements)
File "/usr/local/anaconda3/lib/python3.6/site-packages/tensorflow/python/util/deprecation.py", line 488, in new_func
return func(*args, **kwargs)
File "/usr/local/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/importer.py", line 442, in import_graph_def
_ProcessNewOps(graph)
File "/usr/local/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/importer.py", line 234, in _ProcessNewOps
for new_op in graph._add_new_tf_operations(compute_devices=False): # pylint: disable=protected-access
File "/usr/local/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3426, in _add_new_tf_operations
for c_op in c_api_util.new_tf_operations(self)
File "/usr/local/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3426, in <listcomp>
for c_op in c_api_util.new_tf_operations(self)
File "/usr/local/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3285, in _create_op_from_tf_operation
ret = Operation(c_op, self)
File "/usr/local/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1748, in __init__
self._traceback = tf_stack.extract_stack()
InvalidArgumentError (see above for traceback): You must feed a value for placeholder tensor 'import/batch_normalization_1_input' with dtype float and shape [?,48,64,3]
[[{{node import/batch_normalization_1_input}} = Placeholder[_class=["loc:#import/batch_normalization/cond/FusedBatchNorm_1/Switch"], dtype=DT_FLOAT, shape=[?,48,64,3], _device="/job:localhost/replica:0/task:0/device:GPU:0"]()]]
[[{{node import/conv2d/Relu/_5}} = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_50_import/conv2d/Relu", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]
Now, I've looked around and I've arrived (tentatively) at the conclusion that this is due to a learning phase variable that's set in the BatchNormalization layer that I have in the model. I'm unclear as to how to set the learning phase when you've imported the model. Some people set the learning phase before initializing the model (which as you can see, I have attempted), but in most examples of this they're using one of the large, pre-provided models (such as MNIST). Others provide the learning phase in the feed_dict, which I have also tried, like so:
with sess.as_default():
is_success = tfv.activation_visualization(sess_graph_path=tf.get_default_graph(), value_feed_dict = {X : im, K.learning_phase(): 0}, layers=layers)
But this gives me a different error message:
Traceback (most recent call last):
File "vis2.py", line 36, in <module>
is_success = tfv.activation_visualization(sess_graph_path=tf.get_default_graph(), value_feed_dict = {X : im, K.learning_phase(): 0}, layers=layers)
File "/usr/local/anaconda3/lib/python3.6/site-packages/tf_cnnvis-1.0.0-py3.6.egg/tf_cnnvis/tf_cnnvis.py", line 406, in activation_visualization
File "/usr/local/anaconda3/lib/python3.6/site-packages/tf_cnnvis-1.0.0-py3.6.egg/tf_cnnvis/tf_cnnvis.py", line 169, in _get_visualization
File "/usr/local/anaconda3/lib/python3.6/site-packages/tf_cnnvis-1.0.0-py3.6.egg/tf_cnnvis/tf_cnnvis.py", line 227, in _visualization_by_layer_type
File "/usr/local/anaconda3/lib/python3.6/site-packages/tf_cnnvis-1.0.0-py3.6.egg/tf_cnnvis/tf_cnnvis.py", line 270, in _visualization_by_layer_name
File "/usr/local/anaconda3/lib/python3.6/site-packages/tf_cnnvis-1.0.0-py3.6.egg/tf_cnnvis/utils.py", line 79, in parse_tensors_dict
AttributeError: 'int' object has no attribute 'name'
At this stage, seeing as I'm still not completely sure if the problem I'm trying to fix is even the right one, I would very much appreciate some input. If there's anything else you need me to provide, please ask.

Related

cuDNN launch failure (tensorflow-gpu/CUDA)

Traceback (most recent call last):
File "/home/alex/anaconda3/envs/tensorflow/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1322, in _do_call
return fn(*args)
File "/home/alex/anaconda3/envs/tensorflow/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1307, in _run_fn
options, feed_dict, fetch_list, target_list, run_metadata)
File "/home/alex/anaconda3/envs/tensorflow/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1409, in _call_tf_sessionrun
run_metadata)
tensorflow.python.framework.errors_impl.InternalError: cuDNN launch failure : input shape ([202027,64,1,1])
[[Node: bn_fm_1/FusedBatchNorm = FusedBatchNorm[T=DT_FLOAT, data_format="NCHW", epsilon=0.001, is_training=false, _device="/job:localhost/replica:0/task:0/device:GPU:0"](bn_fm_1/FusedBatchNorm-0-TransposeNHWCToNCHW-LayoutOptimizer, bn_fm/gamma/read, bn_fm/beta/read, bn_fm/moving_mean/read, bn_fm/moving_variance/read)]]
[[Node: AddN/_31 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_202_AddN", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "NeuralFM.py", line 350, in <module>
model.train(data.Train_data, data.Validation_data, data.Test_data)
File "NeuralFM.py", line 266, in train
init_train = self.evaluate(Train_data)
File "NeuralFM.py", line 311, in evaluate
predictions = self.sess.run((self.out), feed_dict=feed_dict)
File "/home/alex/anaconda3/envs/tensorflow/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 900, in run
run_metadata_ptr)
File "/home/alex/anaconda3/envs/tensorflow/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1135, in _run
feed_dict_tensor, options, run_metadata)
File "/home/alex/anaconda3/envs/tensorflow/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1316, in _do_run
run_metadata)
File "/home/alex/anaconda3/envs/tensorflow/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1335, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InternalError: cuDNN launch failure : input shape ([202027,64,1,1])
[[Node: bn_fm_1/FusedBatchNorm = FusedBatchNorm[T=DT_FLOAT, data_format="NCHW", epsilon=0.001, is_training=false, _device="/job:localhost/replica:0/task:0/device:GPU:0"](bn_fm_1/FusedBatchNorm-0-TransposeNHWCToNCHW-LayoutOptimizer, bn_fm/gamma/read, bn_fm/beta/read, bn_fm/moving_mean/read, bn_fm/moving_variance/read)]]
[[Node: AddN/_31 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_202_AddN", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]
Caused by op 'bn_fm_1/FusedBatchNorm', defined at:
File "NeuralFM.py", line 349, in <module>
model = NeuralFM(data.features_M, args.hidden_factor, eval(args.layers), args.loss_type, args.pretrain, args.epoch, args.batch_size, args.lr, args.lamda, eval(args.keep_prob), args.optimizer, args.batch_norm, activation_function, args.verbose, args.early_stop)
File "NeuralFM.py", line 89, in __init__
self._init_graph()
File "NeuralFM.py", line 123, in _init_graph
self.FM = self.batch_norm_layer(self.FM, train_phase=self.train_phase, scope_bn='bn_fm')
File "NeuralFM.py", line 224, in batch_norm_layer
is_training=False, reuse=True, trainable=True, scope=scope_bn)
File "/home/alex/anaconda3/envs/tensorflow/lib/python3.6/site-packages/tensorflow/contrib/framework/python/ops/arg_scope.py", line 183, in func_with_args
return func(*args, **current_args)
File "/home/alex/anaconda3/envs/tensorflow/lib/python3.6/site-packages/tensorflow/contrib/layers/python/layers/layers.py", line 596, in batch_norm
scope=scope)
File "/home/alex/anaconda3/envs/tensorflow/lib/python3.6/site-packages/tensorflow/contrib/layers/python/layers/layers.py", line 382, in _fused_batch_norm
is_training, _fused_batch_norm_training, _fused_batch_norm_inference)
File "/home/alex/anaconda3/envs/tensorflow/lib/python3.6/site-packages/tensorflow/contrib/layers/python/layers/utils.py", line 214, in smart_cond
return static_cond(pred_value, fn1, fn2)
File "/home/alex/anaconda3/envs/tensorflow/lib/python3.6/site-packages/tensorflow/contrib/layers/python/layers/utils.py", line 194, in static_cond
return fn2()
File "/home/alex/anaconda3/envs/tensorflow/lib/python3.6/site-packages/tensorflow/contrib/layers/python/layers/layers.py", line 379, in _fused_batch_norm_inference
data_format=data_format)
File "/home/alex/anaconda3/envs/tensorflow/lib/python3.6/site-packages/tensorflow/python/ops/nn_impl.py", line 906, in fused_batch_norm
name=name)
File "/home/alex/anaconda3/envs/tensorflow/lib/python3.6/site-packages/tensorflow/python/ops/gen_nn_ops.py", line 3465, in _fused_batch_norm
is_training=is_training, name=name)
File "/home/alex/anaconda3/envs/tensorflow/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
op_def=op_def)
File "/home/alex/anaconda3/envs/tensorflow/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3414, in create_op
op_def=op_def)
File "/home/alex/anaconda3/envs/tensorflow/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1740, in __init__
self._traceback = self._graph._extract_stack() # pylint: disable=protected-access
InternalError (see above for traceback): cuDNN launch failure : input shape ([202027,64,1,1])
[[Node: bn_fm_1/FusedBatchNorm = FusedBatchNorm[T=DT_FLOAT, data_format="NCHW", epsilon=0.001, is_training=false, _device="/job:localhost/replica:0/task:0/device:GPU:0"](bn_fm_1/FusedBatchNorm-0-TransposeNHWCToNCHW-LayoutOptimizer, bn_fm/gamma/read, bn_fm/beta/read, bn_fm/moving_mean/read, bn_fm/moving_variance/read)]]
[[Node: AddN/_31 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_202_AddN", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]
I keep getting this error, I've tried everything from downgrading CUDA, cuDNN, and tensorflow-gpu.
I'm currently on CUDA 9.0, cuDNN v7.4.2 for CUDA 9.0, tensorflow-gpu 1.9 and nothing I do seems to help. I'm running out of ideas, I've got every dependency I could imagine.
I'm trying to run this:
https://github.com/hexiangnan/neural_factorization_machine
EDIT: I have a feeling this is connected to https://github.com/tensorflow/tensorflow/issues/8090 but as I'm a little new to all this, I'm not sure if I'm right or how to address this.
I met the same error. The reason for mine is that my GPU does not have enough memory for the process.
I'm probably a few of years late to be of any help Alex but I've come up on this issue when on Windows with a specific GPU. Don't ask me why but adding
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '/gpu:0'
if you have a single GPU works for me
I solved it by adding after imports this:
os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'
in the script

How to understand tensorflow error message?

I found that the error message from TensorFlow, especially at run time (i.e. in sess.run()). There'is few document explaining how to understand the error message.
For example, there is a error message:
Traceback (most recent call last):
File "/home/hyh/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1322, in _do_call
return fn(*args)
File "/home/hyh/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1307, in _run_fn
options, feed_dict, fetch_list, target_list, run_metadata)
File "/home/hyh/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1409, in _call_tf_sessionrun
run_metadata)
tensorflow.python.framework.errors_impl.InvalidArgumentError: Input to reshape is a tensor with 10669 values, but the requested shape has 11172
[[Node: optimizer/gradients/RPNloss/cond/calcRPNLoss/calcAllRPNLosses/classification_loss/Reshape_2_grad/Reshape = Reshape[T=DT_FLOAT, Tshape=DT_INT32, _device="/job:localhost/replica:0/task:0/device:GPU:0"](optimizer/gradients/RPNloss/cond/calcRPNLoss/calcAllRPNLosses/classification_loss/Reshape_2_grad/Reshape/tensor, optimizer/gradients/RPNloss/cond/calcRPNLoss/calcAllRPNLosses/classification_loss/Reshape_2_grad/Shape)]]
[[Node: cond/getRefinementLoss/posLoss/getPosLoss/Reshape/_1897 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_4151_cond/getRefinementLoss/posLoss/getPosLoss/Reshape", tensor_type=DT_INT32, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/hyh/projects/RFCN-tensorflow/main.py", line 155, in <module>
res = runManager.modRun(i)
File "/home/hyh/projects/RFCN-tensorflow/Utils/RunManager.py", line 97, in modRun
return self.runAndMerge(feed_dict, options=options if options is not None else self.options, run_metadata=run_metadata if run_metadata is not None else self.run_metadata)
File "/home/hyh/projects/RFCN-tensorflow/Utils/RunManager.py", line 71, in runAndMerge
res = self.sess.run(self.inputTensors, feed_dict=feed_dict, options=options, run_metadata=run_metadata)
File "/home/hyh/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 900, in run
run_metadata_ptr)
File "/home/hyh/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1135, in _run
feed_dict_tensor, options, run_metadata)
File "/home/hyh/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1316, in _do_run
run_metadata)
File "/home/hyh/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1335, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InvalidArgumentError: Input to reshape is a tensor with 10669 values, but the requested shape has 11172
[[Node: optimizer/gradients/RPNloss/cond/calcRPNLoss/calcAllRPNLosses/classification_loss/Reshape_2_grad/Reshape = Reshape[T=DT_FLOAT, Tshape=DT_INT32, _device="/job:localhost/replica:0/task:0/device:GPU:0"](optimizer/gradients/RPNloss/cond/calcRPNLoss/calcAllRPNLosses/classification_loss/Reshape_2_grad/Reshape/tensor, optimizer/gradients/RPNloss/cond/calcRPNLoss/calcAllRPNLosses/classification_loss/Reshape_2_grad/Shape)]]
[[Node: cond/getRefinementLoss/posLoss/getPosLoss/Reshape/_1897 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_4151_cond/getRefinementLoss/posLoss/getPosLoss/Reshape", tensor_type=DT_INT32, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]
Caused by op 'optimizer/gradients/RPNloss/cond/calcRPNLoss/calcAllRPNLosses/classification_loss/Reshape_2_grad/Reshape', defined at:
File "/home/hyh/projects/RFCN-tensorflow/main.py", line 118, in <module>
trainOp = createUpdateOp()
File "/home/hyh/projects/RFCN-tensorflow/main.py", line 104, in createUpdateOp
grads = optimizer.compute_gradients(totalLoss, var_list=net.getVariables())
File "/home/hyh/anaconda3/lib/python3.6/site-packages/tensorflow/python/training/optimizer.py", line 526, in compute_gradients
colocate_gradients_with_ops=colocate_gradients_with_ops)
File "/home/hyh/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/gradients_impl.py", line 494, in gradients
gate_gradients, aggregation_method, stop_gradients)
File "/home/hyh/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/gradients_impl.py", line 636, in _GradientsHelper
lambda: grad_fn(op, *out_grads))
File "/home/hyh/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/gradients_impl.py", line 385, in _MaybeCompile
return grad_fn() # Exit early
File "/home/hyh/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/gradients_impl.py", line 636, in <lambda>
lambda: grad_fn(op, *out_grads))
File "/home/hyh/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/array_grad.py", line 521, in _ReshapeGrad
return [array_ops.reshape(grad, array_ops.shape(op.inputs[0])), None]
File "/home/hyh/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/gen_array_ops.py", line 6113, in reshape
"Reshape", tensor=tensor, shape=shape, name=name)
File "/home/hyh/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
op_def=op_def)
File "/home/hyh/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3392, in create_op
op_def=op_def)
File "/home/hyh/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1718, in __init__
self._traceback = self._graph._extract_stack() # pylint: disable=protected-access
...which was originally created as op 'RPNloss/cond/calcRPNLoss/calcAllRPNLosses/classification_loss/Reshape_2', defined at:
File "/home/hyh/projects/RFCN-tensorflow/main.py", line 96, in <module>
tf.losses.add_loss(net.getLoss(boxes, classes))
File "/home/hyh/projects/RFCN-tensorflow/BoxEngine/BoxNetwork.py", line 50, in getLoss
return self.rpn.loss(refBoxes) + self.boxRefiner.loss(self.proposals, refBoxes, refClasses)
File "/home/hyh/projects/RFCN-tensorflow/BoxEngine/RPN.py", line 186, in loss
return tf.cond(tf.shape(refBoxes)[0] > 0, lambda: calcLoss(), lambda: tf.constant(0.0))
File "/home/hyh/anaconda3/lib/python3.6/site-packages/tensorflow/python/util/deprecation.py", line 432, in new_func
return func(*args, **kwargs)
File "/home/hyh/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/control_flow_ops.py", line 2063, in cond
orig_res_t, res_t = context_t.BuildCondBranch(true_fn)
File "/home/hyh/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/control_flow_ops.py", line 1913, in BuildCondBranch
original_result = fn()
File "/home/hyh/projects/RFCN-tensorflow/BoxEngine/RPN.py", line 186, in <lambda>
return tf.cond(tf.shape(refBoxes)[0] > 0, lambda: calcLoss(), lambda: tf.constant(0.0))
File "/home/hyh/projects/RFCN-tensorflow/BoxEngine/RPN.py", line 173, in calcLoss
positiveLosses, negativeLosses = calcAllLosses(inAnchros, inBoxes, inRawSizes, inScores, inBoxSizes)
File "/home/hyh/projects/RFCN-tensorflow/BoxEngine/RPN.py", line 145, in calcAllLosses
classificationLoss = tf.nn.softmax_cross_entropy_with_logits_v2(logits=scores, labels=refScores, name="classification_loss")
File "/home/hyh/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/nn_ops.py", line 1878, in softmax_cross_entropy_with_logits_v2
cost = array_ops.reshape(cost, output_shape)
File "/home/hyh/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/gen_array_ops.py", line 6113, in reshape
"Reshape", tensor=tensor, shape=shape, name=name)
File "/home/hyh/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
op_def=op_def)
InvalidArgumentError (see above for traceback): Input to reshape is a tensor with 10669 values, but the requested shape has 11172
[[Node: optimizer/gradients/RPNloss/cond/calcRPNLoss/calcAllRPNLosses/classification_loss/Reshape_2_grad/Reshape = Reshape[T=DT_FLOAT, Tshape=DT_INT32, _device="/job:localhost/replica:0/task:0/device:GPU:0"](optimizer/gradients/RPNloss/cond/calcRPNLoss/calcAllRPNLosses/classification_loss/Reshape_2_grad/Reshape/tensor, optimizer/gradients/RPNloss/cond/calcRPNLoss/calcAllRPNLosses/classification_loss/Reshape_2_grad/Shape)]]
[[Node: cond/getRefinementLoss/posLoss/getPosLoss/Reshape/_1897 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_4151_cond/getRefinementLoss/posLoss/getPosLoss/Reshape", tensor_type=DT_INT32, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]
Process finished with exit code 1
I have two questions:
Where there is so many calling stack? First is Trackback and then During handling of the above exception, another exception occurred:, and Caused by..., finally ...which was originally created as op. What do they mean respectively?
Why there is so many error node? In the message above, it seems that there are two nodes that have gone wrong. What does it mean? Which node caused this error?
Tensorflow error messages are always quite verbose and this is mainly due to how TF works (because of the Computation Graph it builds).
In your case, it seems that you are reshaping a tensor with the wrong shape:
tensorflow.python.framework.errors_impl.InvalidArgumentError: Input to reshape is a tensor with 10669 values, but the requested shape has 11172
To see if that is the case try printing the shape of the tensor given to reshape op, i.e.:
input = tf.placeholder(tf.float32, [None, 28, 28, 1])
x = tf.layers.dense(input, units=64, activation=tf.nn.relu)
x = tf.Print(x, [x])
x_rs = tf.reshape(x, [-1, 28*28])

OutOfRangeError (see above for traceback): FIFOQueue '_1_batch/fifo_queue' is closed and has insufficient elements (requested 5, current size 0)

i don't know how to solve this problem, this error message is useless for me to locate the problem. Thanks for helping!
here is the data in e.csv, D.csv and F.csv
e.csv: 1,2,3
4,5,6
7,8,9
D.csv: 11,12,13
14,15,16
17,18,19
F.csv: 21,22,23
24,25,26
27,28,29
here is my code
import tensorflow as tf
import os
file_dir = './KDD2'
fileNameQueue = []
for file in os.listdir(file_dir):
fileNameQueue.append(file)
print fileNameQueue
filename_queue = tf.train.string_input_producer(fileNameQueue, shuffle=False)
reader = tf.TextLineReader()
key, value = reader.read(filename_queue)
col1,col2,label = tf.decode_csv(value, record_defaults=[[1],[1],[1]])
example = tf.pack([col1,col2])
example_batch, label_batch = tf.train.batch([example, label], batch_size=5)
with tf.Session() as sess:
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
for i in range(10):
print example_batch.eval()
coord.request_stop()
coord.join(threads)
here is the error message
root#ubuntumagiclab:/home/magiclab/SAE# python try.py
['e.csv', 'D.csv', 'F.csv']
Traceback (most recent call last):
File "try.py", line 30, in <module>
print example_batch.eval()
File "/usr/local/lib/python2.7/dist-
packages/tensorflow/python/framework/ops.py", line 575, in eval
return _eval_using_default_session(self, feed_dict, self.graph, session)
File "/usr/local/lib/python2.7/dist-
packages/tensorflow/python/framework/ops.py", line 3633, in _eval_using_default_session
return session.run(tensors, feed_dict)
File "/usr/local/lib/python2.7/dist-
packages/tensorflow/python/client/session.py", line 766, in run
run_metadata_ptr)
File "/usr/local/lib/python2.7/dist-
packages/tensorflow/python/client/session.py", line 964, in _run
feed_dict_string, options, run_metadata)
File "/usr/local/lib/python2.7/dist-
packages/tensorflow/python/client/session.py", line 1014, in _do_run
target_list, options, run_metadata)
File "/usr/local/lib/python2.7/dist-
packages/tensorflow/python/client/session.py", line 1034, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.OutOfRangeError: FIFOQueue '_0_batch/fifo_queue' is closed and has insufficient elements (requested 5, current size 0)
[[Node: batch = QueueDequeueMany[_class=["loc:#batch/fifo_queue"], component_types=[DT_INT32, DT_INT32], timeout_ms=-1, _device="/job:localhost/replica:0/task:0/cpu:0"](batch/fifo_queue, batch/n)]]
Caused by op u'batch', defined at:
File "try.py", line 24, in <module>
example_batch, label_batch = tf.train.batch([example, label], batch_size=5)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/input.py", line 692, in batch
dequeued = queue.dequeue_many(batch_size, name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/data_flow_ops.py", line 458, in dequeue_many
self._queue_ref, n=n, component_types=self._dtypes, name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/gen_data_flow_ops.py", line 1099, in _queue_dequeue_many
timeout_ms=timeout_ms, name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/op_def_library.py", line 759, in apply_op
op_def=op_def)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 2240, in create_op
original_op=self._default_original_op, op_def=op_def)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 1128, in __init__
self._traceback = _extract_stack()
OutOfRangeError (see above for traceback): FIFOQueue '_0_batch/fifo_queue' is closed and has insufficient elements (requested 5, current size 0)
[[Node: batch = QueueDequeueMany[_class=["loc:#batch/fifo_queue"], component_types=[DT_INT32, DT_INT32], timeout_ms=-1, _device="/job:localhost/replica:0/task:0/cpu:0"](batch/fifo_queue, batch/n)]]
Problem is with filepaths. Please provide complete paths as shown below to fileName Queue.
This works for me:
fileNameQueue.append('/home/****/Desktop/stackoverflow/data/' +file)
Hope this helps.

Tensorflow placeholder problems: error: you must feed a value for placeholder tensor 'Placeholder' with dtype float error

I am implementing a convolutional autoencoder that has to reduce the dimension of camera images per image at real time. Therefore my batch size is one. Y_pred is the output of the network. I want to evaluate this so I can see what my network is outputting.
This is the code I use to import my data and feed the placeholders:
def inputs(image_file_path):
filenames = [image_file_path]
filename_queue = tf.train.string_input_producer(filenames)
read_input = read_image(filename_queue)
return read_input
with tf.Session() as sess:
image = inputs(file_path)
coord = tf.train.Coordinator()
init = tf.global_variables_initializer()
sess.run(init)
threads = tf.train.start_queue_runners(sess=sess, coord = coord)
image = tf.cast(image/255, tf.float32)
###data is just one image, so I set the dimension [batch, height, width, channels] to [1,120,160,3]
image = tf.reshape(image, [1, 120,160,3])
X_data = image.eval()
sess.run(train_step,{X_input:X_data, Y_true: X_data})
output = Y_pred
###Everything works fine up until this point.
print(Y_pred.eval())
coord.request_stop()
coord.join(threads)
Then the placeholders are here:
X_input = tf.placeholder(tf.float32, [None,120,160,3])
Y_true = tf.placeholder(tf.float32, [None,120,160,3])
The error I'm getting:
Traceback (most recent call last):
File "C:\Python35\lib\site-packages\tensorflow\python\client\session.py", line 1039, in _do_call
return fn(*args)
File "C:\Python35\lib\site-packages\tensorflow\python\client\session.py", line 1021, in _run_fn
status, run_metadata)
File "C:\Python35\lib\contextlib.py", line 66, in __exit__
next(self.gen)
File "C:\Python35\lib\site-packages\tensorflow\python\framework\errors_impl.py", line 466, in raise_exception_on_not_ok_status
pywrap_tensorflow.TF_GetCode(status))
tensorflow.python.framework.errors_impl.InvalidArgumentError: You must feed a value for placeholder tensor 'Placeholder' with dtype float
[[Node: Placeholder = Placeholder[dtype=DT_FLOAT, shape=[], _device="/job:localhost/replica:0/task:0/cpu:0"]()]]
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Users\Julian\workspaceNeon\Scriptie\Autoencoder\__init__.py", line 164, in <module>
print(output.eval())
File "C:\Python35\lib\site-packages\tensorflow\python\framework\ops.py", line 569, in eval
return _eval_using_default_session(self, feed_dict, self.graph, session)
File "C:\Python35\lib\site-packages\tensorflow\python\framework\ops.py", line 3741, in _eval_using_default_session
return session.run(tensors, feed_dict)
File "C:\Python35\lib\site-packages\tensorflow\python\client\session.py", line 778, in run
run_metadata_ptr)
File "C:\Python35\lib\site-packages\tensorflow\python\client\session.py", line 982, in _run
feed_dict_string, options, run_metadata)
File "C:\Python35\lib\site-packages\tensorflow\python\client\session.py", line 1032, in _do_run
target_list, options, run_metadata)
File "C:\Python35\lib\site-packages\tensorflow\python\client\session.py", line 1052, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InvalidArgumentError: You must feed a value for placeholder tensor 'Placeholder' with dtype float
[[Node: Placeholder = Placeholder[dtype=DT_FLOAT, shape=[], _device="/job:localhost/replica:0/task:0/cpu:0"]()]]
Caused by op 'Placeholder', defined at:
File "C:\Users\Julian\workspaceNeon\Scriptie\Autoencoder\__init__.py", line 50, in <module>
X_input = tf.placeholder(tf.float32, [None,120,160,3])
File "C:\Python35\lib\site-packages\tensorflow\python\ops\array_ops.py", line 1507, in placeholder
name=name)
File "C:\Python35\lib\site-packages\tensorflow\python\ops\gen_array_ops.py", line 1997, in _placeholder
name=name)
File "C:\Python35\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 768, in apply_op
op_def=op_def)
File "C:\Python35\lib\site-packages\tensorflow\python\framework\ops.py", line 2336, in create_op
original_op=self._default_original_op, op_def=op_def)
File "C:\Python35\lib\site-packages\tensorflow\python\framework\ops.py", line 1228, in __init__
self._traceback = _extract_stack()
InvalidArgumentError (see above for traceback): You must feed a value for placeholder tensor 'Placeholder' with dtype float
[[Node: Placeholder = Placeholder[dtype=DT_FLOAT, shape=[], _device="/job:localhost/replica:0/task:0/cpu:0"]()]]

TensorFlow OutOfRangeError FIFOQueue '_1_batch/fifo_queue' is closed and has insufficient elements

I am desperately trying to find the error in my program. I consistently get the following error:
[[Node: input_producer/limit_epochs/CountUpTo = CountUpTo[T=DT_INT64, _class=["loc:#input_producer/limit_epochs/epochs"], limit=1, _device="/job:localhost/replica:0/task:0/cpu:0"](input_producer/limit_epochs/epochs)]]
Traceback (most recent call last):
File "/media/home/user/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1039, in _do_call
return fn(*args)
File "/media/home/user/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1021, in _run_fn
status, run_metadata)
File "/media/home/user/.conda/envs/tf/lib/python3.6/contextlib.py", line 89, in __exit__
next(self.gen)
File "/media/home/user/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/python/framework/errors_impl.py", line 466, in raise_exception_on_not_ok_status
pywrap_tensorflow.TF_GetCode(status))
tensorflow.python.framework.errors_impl.OutOfRangeError: FIFOQueue '_1_batch/fifo_queue' is closed and has insufficient elements (requested 3, current size 0)
[[Node: batch = QueueDequeueUpToV2[component_types=[DT_FLOAT], timeout_ms=-1, _device="/job:localhost/replica:0/task:0/cpu:0"](batch/fifo_queue, batch/n)]]
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "test_tfr.py", line 60, in <module>
res = sess.run(item_cont_b)
File "/media/home/user/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 778, in run
run_metadata_ptr)
File "/media/home/user/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 982, in _run
feed_dict_string, options, run_metadata)
File "/media/home/user/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1032, in _do_run
target_list, options, run_metadata)
File "/media/home/user/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1052, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.OutOfRangeError: FIFOQueue '_1_batch/fifo_queue' is closed and has insufficient elements (requested 3, current size 0)
[[Node: batch = QueueDequeueUpToV2[component_types=[DT_FLOAT], timeout_ms=-1, _device="/job:localhost/replica:0/task:0/cpu:0"](batch/fifo_queue, batch/n)]]
Caused by op 'batch', defined at:
File "test_tfr.py", line 48, in <module>
allow_smaller_final_batch=True)
File "/media/home/user/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/python/training/input.py", line 917, in batch
name=name)
File "/media/home/user/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/python/training/input.py", line 710, in _batch
dequeued = queue.dequeue_up_to(batch_size, name=name)
File "/media/home/user/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/python/ops/data_flow_ops.py", line 499, in dequeue_up_to
self._queue_ref, n=n, component_types=self._dtypes, name=name)
File "/media/home/user/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/python/ops/gen_data_flow_ops.py", line 1420, in _queue_dequeue_up_to_v2
timeout_ms=timeout_ms, name=name)
File "/media/home/user/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 768, in apply_op
op_def=op_def)
File "/media/home/user/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 2336, in create_op
original_op=self._default_original_op, op_def=op_def)
File "/media/home/user/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1228, in __init__
self._traceback = _extract_stack()
OutOfRangeError (see above for traceback): FIFOQueue '_1_batch/fifo_queue' is closed and has insufficient elements (requested 3, current size 0)
[[Node: batch = QueueDequeueUpToV2[component_types=[DT_FLOAT], timeout_ms=-1, _device="/job:localhost/replica:0/task:0/cpu:0"](batch/fifo_queue, batch/n)]]
I am using TF version 1.1.0, the tfrecords-FIle is 40GB large and contains 1.6M examples. My script (as below) is in the same folder as 201704_test.tfrecords which I am trying to read from. Current batch size, capacity and loop-range are just arbitrarily chosen for testing. Trying a few things mentioned across GitHub and StackOverFlow I keep getting stuck here.
My Script
import os
import numpy as np
import tensorflow as tf
def read_and_decode(filename_queue):
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
features = tf.parse_single_example(
serialized_example,
features={
'user': tf.FixedLenFeature([1], tf.int64),
'item': tf.FixedLenFeature([1], tf.int64),
'week': tf.FixedLenFeature([1], tf.int64),
'label': tf.FixedLenFeature([1], tf.int64),
'item_cont': tf.FixedLenFeature([6], tf.float32),
'item_cat': tf.FixedLenFeature([3092], tf.float32),
'user_cont': tf.FixedLenFeature([12], tf.float32),
'user_cat': tf.FixedLenFeature([3138], tf.float32)
}
)
user = features['user']
item = features['item']
week = features['week']
label = features['label']
item_cont = features['item_cont']
item_cat = features['item_cat']
user_cont = features['user_cont']
user_cat = features['user_cat']
return user, item, week, label, item_cont, item_cat, user_cont, user_cat
filename_queue = tf.train.string_input_producer(['201704_test.tfrecords'],
num_epochs=1)
user, item, week, label, item_cont, item_cat, user_cont, user_cat = \
read_and_decode(filename_queue=filename_queue)
item_cont_b = tf.train.batch([item_cont],
batch_size=3,
num_threads=1,
capacity=32,
enqueue_many=False,
shapes=None,
dynamic_pad=False,
allow_smaller_final_batch=True)
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
for step in range(100000):
if coord.should_stop():
break
res = sess.run(item_cont_b)
print(res)
coord.request_stop()
coord.join(threads)
Thanks for any helpful advice!

Categories