Related
I trying to run on a docker created as follows:
docker run --gpus=all -it -p "8888:8888" -v "/home/miguel/ml-resnet-50/:/notebooks/" --name ml-resnet-50 tensorflow/tensorflow:1.5.0-gpu-py3 jupyter notebook --ip 0.0.0.0 --no-browser --allow-root
On a Linux PC Ubuntu 20.04 with RTX 3070 Nvidia Card the follow code:
model.fit(
x=imgs_train,
y=clss_train,
batch_size=16,
epochs=2,
verbose=1,
validation_data=(imgs_val, clss_val)
)
And getting following error:
InternalError: Blas SGEMM launch failed : m=48400, n=64, k=64
[[Node: res2a_branch2a/Conv2D = Conv2D[T=DT_FLOAT, data_format="NHWC",
dilations=[1, 1, 1, 1], padding="VALID", strides=[1, 1, 1, 1],
use_cudnn_on_gpu=true,
_device="/job:localhost/replica:0/task:0/device:GPU:0"](max_pooling2d/MaxPool,
res2a_branch2a/kernel/read)]] [[Node: loss/mul/_2859 =
_Recvclient_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0",
send_device="/job:localhost/replica:0/task:0/device:GPU:0",
send_device_incarnation=1, tensor_name="edge_15435_loss/mul",
tensor_type=DT_FLOAT,
_device="/job:localhost/replica:0/task:0/device:CPU:0"]]
Caused by op 'res2a_branch2a/Conv2D', defined at: File
"/usr/lib/python3.5/runpy.py", line 184, in _run_module_as_main
"main", mod_spec) File "/usr/lib/python3.5/runpy.py", line 85, in _run_code
exec(code, run_globals) File "/usr/local/lib/python3.5/dist-packages/ipykernel_launcher.py", line
16, in
app.launch_new_instance() File "/usr/local/lib/python3.5/dist-packages/traitlets/config/application.py",
line 658, in launch_instance
app.start() File "/usr/local/lib/python3.5/dist-packages/ipykernel/kernelapp.py", line
478, in start
self.io_loop.start() File "/usr/local/lib/python3.5/dist-packages/zmq/eventloop/ioloop.py", line
177, in start
super(ZMQIOLoop, self).start() File "/usr/local/lib/python3.5/dist-packages/tornado/ioloop.py", line 888,
in start
handler_func(fd_obj, events) File "/usr/local/lib/python3.5/dist-packages/tornado/stack_context.py",
line 277, in null_wrapper
return fn(*args, **kwargs) File "/usr/local/lib/python3.5/dist-packages/zmq/eventloop/zmqstream.py",
line 440, in _handle_events
self._handle_recv() File "/usr/local/lib/python3.5/dist-packages/zmq/eventloop/zmqstream.py",
line 472, in _handle_recv
self._run_callback(callback, msg) File "/usr/local/lib/python3.5/dist-packages/zmq/eventloop/zmqstream.py",
line 414, in _run_callback
callback(*args, **kwargs) File "/usr/local/lib/python3.5/dist-packages/tornado/stack_context.py",
line 277, in null_wrapper
return fn(*args, **kwargs) File "/usr/local/lib/python3.5/dist-packages/ipykernel/kernelbase.py", line
283, in dispatcher
return self.dispatch_shell(stream, msg) File "/usr/local/lib/python3.5/dist-packages/ipykernel/kernelbase.py", line
233, in dispatch_shell
handler(stream, idents, msg) File "/usr/local/lib/python3.5/dist-packages/ipykernel/kernelbase.py", line
399, in execute_request
user_expressions, allow_stdin) File "/usr/local/lib/python3.5/dist-packages/ipykernel/ipkernel.py", line
208, in do_execute
res = shell.run_cell(code, store_history=store_history, silent=silent) File
"/usr/local/lib/python3.5/dist-packages/ipykernel/zmqshell.py", line
537, in run_cell
return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs) File
"/usr/local/lib/python3.5/dist-packages/IPython/core/interactiveshell.py",
line 2728, in run_cell
interactivity=interactivity, compiler=compiler, result=result) File
"/usr/local/lib/python3.5/dist-packages/IPython/core/interactiveshell.py",
line 2850, in run_ast_nodes
if self.run_code(code, result): File "/usr/local/lib/python3.5/dist-packages/IPython/core/interactiveshell.py",
line 2910, in run_code
exec(code_obj, self.user_global_ns, self.user_ns) File "", line 2, in
model = get_model() File "", line 4, in get_model
model = ResNet50(include_top=False,input_shape=(pipeline['img_height'],
pipeline['img_width'], 3)) File
"/usr/local/lib/python3.5/dist-packages/tensorflow/python/keras/_impl/keras/applications/resnet50.py",
line 235, in ResNet50
x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1)) File
"/usr/local/lib/python3.5/dist-packages/tensorflow/python/keras/_impl/keras/applications/resnet50.py",
line 122, in conv_block
name=conv_name_base + '2a')(input_tensor) File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/keras/_impl/keras/engine/topology.py",
line 258, in call
output = super(Layer, self).call(inputs, **kwargs) File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/layers/base.py",
line 652, in call
outputs = self.call(inputs, *args, **kwargs) File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/layers/convolutional.py",
line 167, in call
outputs = self._convolution_op(inputs, self.kernel) File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/nn_ops.py",
line 838, in call
return self.conv_op(inp, filter) File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/nn_ops.py",
line 502, in call
return self.call(inp, filter) File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/nn_ops.py",
line 190, in call
name=self.name) File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/gen_nn_ops.py",
line 639, in conv2d
data_format=data_format, dilations=dilations, name=name) File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/op_def_library.py",
line 787, in _apply_op_helper
op_def=op_def) File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/ops.py",
line 3160, in create_op
op_def=op_def) File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/ops.py",
line 1625, in init
self._traceback = self._graph._extract_stack() # pylint: disable=protected-access
InternalError (see above for traceback): Blas SGEMM launch failed :
m=48400, n=64, k=64 [[Node: res2a_branch2a/Conv2D =
Conv2D[T=DT_FLOAT, data_format="NHWC", dilations=[1, 1, 1, 1],
padding="VALID", strides=[1, 1, 1, 1], use_cudnn_on_gpu=true,
_device="/job:localhost/replica:0/task:0/device:GPU:0"](max_pooling2d/MaxPool,
res2a_branch2a/kernel/read)]] [[Node: loss/mul/_2859 =
_Recvclient_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0",
send_device="/job:localhost/replica:0/task:0/device:GPU:0",
send_device_incarnation=1, tensor_name="edge_15435_loss/mul",
tensor_type=DT_FLOAT,
_device="/job:localhost/replica:0/task:0/device:CPU:0"]]
Any idea why it happeds?
I found that the error message from TensorFlow, especially at run time (i.e. in sess.run()). There'is few document explaining how to understand the error message.
For example, there is a error message:
Traceback (most recent call last):
File "/home/hyh/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1322, in _do_call
return fn(*args)
File "/home/hyh/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1307, in _run_fn
options, feed_dict, fetch_list, target_list, run_metadata)
File "/home/hyh/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1409, in _call_tf_sessionrun
run_metadata)
tensorflow.python.framework.errors_impl.InvalidArgumentError: Input to reshape is a tensor with 10669 values, but the requested shape has 11172
[[Node: optimizer/gradients/RPNloss/cond/calcRPNLoss/calcAllRPNLosses/classification_loss/Reshape_2_grad/Reshape = Reshape[T=DT_FLOAT, Tshape=DT_INT32, _device="/job:localhost/replica:0/task:0/device:GPU:0"](optimizer/gradients/RPNloss/cond/calcRPNLoss/calcAllRPNLosses/classification_loss/Reshape_2_grad/Reshape/tensor, optimizer/gradients/RPNloss/cond/calcRPNLoss/calcAllRPNLosses/classification_loss/Reshape_2_grad/Shape)]]
[[Node: cond/getRefinementLoss/posLoss/getPosLoss/Reshape/_1897 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_4151_cond/getRefinementLoss/posLoss/getPosLoss/Reshape", tensor_type=DT_INT32, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/hyh/projects/RFCN-tensorflow/main.py", line 155, in <module>
res = runManager.modRun(i)
File "/home/hyh/projects/RFCN-tensorflow/Utils/RunManager.py", line 97, in modRun
return self.runAndMerge(feed_dict, options=options if options is not None else self.options, run_metadata=run_metadata if run_metadata is not None else self.run_metadata)
File "/home/hyh/projects/RFCN-tensorflow/Utils/RunManager.py", line 71, in runAndMerge
res = self.sess.run(self.inputTensors, feed_dict=feed_dict, options=options, run_metadata=run_metadata)
File "/home/hyh/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 900, in run
run_metadata_ptr)
File "/home/hyh/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1135, in _run
feed_dict_tensor, options, run_metadata)
File "/home/hyh/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1316, in _do_run
run_metadata)
File "/home/hyh/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1335, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InvalidArgumentError: Input to reshape is a tensor with 10669 values, but the requested shape has 11172
[[Node: optimizer/gradients/RPNloss/cond/calcRPNLoss/calcAllRPNLosses/classification_loss/Reshape_2_grad/Reshape = Reshape[T=DT_FLOAT, Tshape=DT_INT32, _device="/job:localhost/replica:0/task:0/device:GPU:0"](optimizer/gradients/RPNloss/cond/calcRPNLoss/calcAllRPNLosses/classification_loss/Reshape_2_grad/Reshape/tensor, optimizer/gradients/RPNloss/cond/calcRPNLoss/calcAllRPNLosses/classification_loss/Reshape_2_grad/Shape)]]
[[Node: cond/getRefinementLoss/posLoss/getPosLoss/Reshape/_1897 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_4151_cond/getRefinementLoss/posLoss/getPosLoss/Reshape", tensor_type=DT_INT32, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]
Caused by op 'optimizer/gradients/RPNloss/cond/calcRPNLoss/calcAllRPNLosses/classification_loss/Reshape_2_grad/Reshape', defined at:
File "/home/hyh/projects/RFCN-tensorflow/main.py", line 118, in <module>
trainOp = createUpdateOp()
File "/home/hyh/projects/RFCN-tensorflow/main.py", line 104, in createUpdateOp
grads = optimizer.compute_gradients(totalLoss, var_list=net.getVariables())
File "/home/hyh/anaconda3/lib/python3.6/site-packages/tensorflow/python/training/optimizer.py", line 526, in compute_gradients
colocate_gradients_with_ops=colocate_gradients_with_ops)
File "/home/hyh/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/gradients_impl.py", line 494, in gradients
gate_gradients, aggregation_method, stop_gradients)
File "/home/hyh/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/gradients_impl.py", line 636, in _GradientsHelper
lambda: grad_fn(op, *out_grads))
File "/home/hyh/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/gradients_impl.py", line 385, in _MaybeCompile
return grad_fn() # Exit early
File "/home/hyh/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/gradients_impl.py", line 636, in <lambda>
lambda: grad_fn(op, *out_grads))
File "/home/hyh/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/array_grad.py", line 521, in _ReshapeGrad
return [array_ops.reshape(grad, array_ops.shape(op.inputs[0])), None]
File "/home/hyh/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/gen_array_ops.py", line 6113, in reshape
"Reshape", tensor=tensor, shape=shape, name=name)
File "/home/hyh/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
op_def=op_def)
File "/home/hyh/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3392, in create_op
op_def=op_def)
File "/home/hyh/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1718, in __init__
self._traceback = self._graph._extract_stack() # pylint: disable=protected-access
...which was originally created as op 'RPNloss/cond/calcRPNLoss/calcAllRPNLosses/classification_loss/Reshape_2', defined at:
File "/home/hyh/projects/RFCN-tensorflow/main.py", line 96, in <module>
tf.losses.add_loss(net.getLoss(boxes, classes))
File "/home/hyh/projects/RFCN-tensorflow/BoxEngine/BoxNetwork.py", line 50, in getLoss
return self.rpn.loss(refBoxes) + self.boxRefiner.loss(self.proposals, refBoxes, refClasses)
File "/home/hyh/projects/RFCN-tensorflow/BoxEngine/RPN.py", line 186, in loss
return tf.cond(tf.shape(refBoxes)[0] > 0, lambda: calcLoss(), lambda: tf.constant(0.0))
File "/home/hyh/anaconda3/lib/python3.6/site-packages/tensorflow/python/util/deprecation.py", line 432, in new_func
return func(*args, **kwargs)
File "/home/hyh/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/control_flow_ops.py", line 2063, in cond
orig_res_t, res_t = context_t.BuildCondBranch(true_fn)
File "/home/hyh/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/control_flow_ops.py", line 1913, in BuildCondBranch
original_result = fn()
File "/home/hyh/projects/RFCN-tensorflow/BoxEngine/RPN.py", line 186, in <lambda>
return tf.cond(tf.shape(refBoxes)[0] > 0, lambda: calcLoss(), lambda: tf.constant(0.0))
File "/home/hyh/projects/RFCN-tensorflow/BoxEngine/RPN.py", line 173, in calcLoss
positiveLosses, negativeLosses = calcAllLosses(inAnchros, inBoxes, inRawSizes, inScores, inBoxSizes)
File "/home/hyh/projects/RFCN-tensorflow/BoxEngine/RPN.py", line 145, in calcAllLosses
classificationLoss = tf.nn.softmax_cross_entropy_with_logits_v2(logits=scores, labels=refScores, name="classification_loss")
File "/home/hyh/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/nn_ops.py", line 1878, in softmax_cross_entropy_with_logits_v2
cost = array_ops.reshape(cost, output_shape)
File "/home/hyh/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/gen_array_ops.py", line 6113, in reshape
"Reshape", tensor=tensor, shape=shape, name=name)
File "/home/hyh/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
op_def=op_def)
InvalidArgumentError (see above for traceback): Input to reshape is a tensor with 10669 values, but the requested shape has 11172
[[Node: optimizer/gradients/RPNloss/cond/calcRPNLoss/calcAllRPNLosses/classification_loss/Reshape_2_grad/Reshape = Reshape[T=DT_FLOAT, Tshape=DT_INT32, _device="/job:localhost/replica:0/task:0/device:GPU:0"](optimizer/gradients/RPNloss/cond/calcRPNLoss/calcAllRPNLosses/classification_loss/Reshape_2_grad/Reshape/tensor, optimizer/gradients/RPNloss/cond/calcRPNLoss/calcAllRPNLosses/classification_loss/Reshape_2_grad/Shape)]]
[[Node: cond/getRefinementLoss/posLoss/getPosLoss/Reshape/_1897 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_4151_cond/getRefinementLoss/posLoss/getPosLoss/Reshape", tensor_type=DT_INT32, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]
Process finished with exit code 1
I have two questions:
Where there is so many calling stack? First is Trackback and then During handling of the above exception, another exception occurred:, and Caused by..., finally ...which was originally created as op. What do they mean respectively?
Why there is so many error node? In the message above, it seems that there are two nodes that have gone wrong. What does it mean? Which node caused this error?
Tensorflow error messages are always quite verbose and this is mainly due to how TF works (because of the Computation Graph it builds).
In your case, it seems that you are reshaping a tensor with the wrong shape:
tensorflow.python.framework.errors_impl.InvalidArgumentError: Input to reshape is a tensor with 10669 values, but the requested shape has 11172
To see if that is the case try printing the shape of the tensor given to reshape op, i.e.:
input = tf.placeholder(tf.float32, [None, 28, 28, 1])
x = tf.layers.dense(input, units=64, activation=tf.nn.relu)
x = tf.Print(x, [x])
x_rs = tf.reshape(x, [-1, 28*28])
To optimize my code, I changed following:
view1ID_train_data_sparse = load_sample(batch_size_view1, f_view1ID_train_data_sparse)
row_view1ID = range(len(view1ID_train_data_sparse[:,0].astype(int)))
col_view1ID = view1ID_train_data_sparse[:,1]
value_view1ID = view1ID_train_data_sparse[:,2]
view1ID_train_data = coo_matrix( ( value_view1ID, (row_view1ID, col_view1ID.astype(int)) ), shape=( len(row_view1ID), View1Number ) ).toarray()
to:
View1ID_x_temp = tf.placeholder(tf.int32, shape = [None, None], name = 'View1ID_x_temp')
View1ID_x_label = tf.expand_dims(View1ID_x_temp[:,1],1)
View1ID_x_index = tf.expand_dims(tf.range(0, batch_size_view1),1)
concated_1ID = tf.concat([View1ID_x_index, View1ID_x_label],1)
View1ID_x = tf.sparse_to_dense(concated_1ID, [batch_size_view1,View1Number], 1.0, 0.0)
But there is an error:
2018-02-26 17:25:25.665274: W tensorflow/core/framework/op_kernel.cc:1152] Invalid argument: Incompatible shapes: [4883,1] vs. [68,1]
[[Node: gradients/sub_337_grad/BroadcastGradientArgs = BroadcastGradientArgs[T=DT_INT32, _device="/job:localhost/replica:0/task:0/cpu:0"](gradients/sub_337_grad/Shape, gradients/sub_337_grad/Shape_1)]]
2018-02-26 17:25:25.666627: W tensorflow/core/framework/op_kernel.cc:1152] Invalid argument: Incompatible shapes: [4883,1] vs. [68,1]
[[Node: gradients/sub_337_grad/BroadcastGradientArgs = BroadcastGradientArgs[T=DT_INT32, _device="/job:localhost/replica:0/task:0/cpu:0"](gradients/sub_337_grad/Shape, gradients/sub_337_grad/Shape_1)]]
Traceback (most recent call last):
File "/Users/LA_rovski/anaconda/lib/python3.5/site-packages/tensorflow/python/client/session.py", line 1039, in _do_call
return fn(*args)
File "/Users/LA_rovski/anaconda/lib/python3.5/site-packages/tensorflow/python/client/session.py", line 1021, in _run_fn
status, run_metadata)
File "/Users/LA_rovski/anaconda/lib/python3.5/contextlib.py", line 66, in __exit__
next(self.gen)
File "/Users/LA_rovski/anaconda/lib/python3.5/site-packages/tensorflow/python/framework/errors_impl.py", line 466, in raise_exception_on_not_ok_status
pywrap_tensorflow.TF_GetCode(status))
tensorflow.python.framework.errors_impl.InvalidArgumentError: Incompatible shapes: [4883,1] vs. [68,1]
[[Node: gradients/sub_337_grad/BroadcastGradientArgs = BroadcastGradientArgs[T=DT_INT32, _device="/job:localhost/replica:0/task:0/cpu:0"](gradients/sub_337_grad/Shape, gradients/sub_337_grad/Shape_1)]]
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "Transfer_Model_sparse1.py", line 638, in <module>
Pi: pi})
File "/Users/LA_rovski/anaconda/lib/python3.5/site-packages/tensorflow/python/client/session.py", line 778, in run
run_metadata_ptr)
File "/Users/LA_rovski/anaconda/lib/python3.5/site-packages/tensorflow/python/client/session.py", line 982, in _run
feed_dict_string, options, run_metadata)
File "/Users/LA_rovski/anaconda/lib/python3.5/site-packages/tensorflow/python/client/session.py", line 1032, in _do_run
target_list, options, run_metadata)
File "/Users/LA_rovski/anaconda/lib/python3.5/site-packages/tensorflow/python/client/session.py", line 1052, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InvalidArgumentError: Incompatible shapes: [4883,1] vs. [68,1]
[[Node: gradients/sub_337_grad/BroadcastGradientArgs = BroadcastGradientArgs[T=DT_INT32, _device="/job:localhost/replica:0/task:0/cpu:0"](gradients/sub_337_grad/Shape, gradients/sub_337_grad/Shape_1)]]
Caused by op 'gradients/sub_337_grad/BroadcastGradientArgs', defined at:
File "Transfer_Model_sparse1.py", line 489, in <module>
optimize = optimizer.minimize(objective)
File "/Users/LA_rovski/anaconda/lib/python3.5/site-packages/tensorflow/python/training/optimizer.py", line 315, in minimize
grad_loss=grad_loss)
File "/Users/LA_rovski/anaconda/lib/python3.5/site-packages/tensorflow/python/training/optimizer.py", line 386, in compute_gradients
colocate_gradients_with_ops=colocate_gradients_with_ops)
File "/Users/LA_rovski/anaconda/lib/python3.5/site-packages/tensorflow/python/ops/gradients_impl.py", line 560, in gradients
grad_scope, op, func_call, lambda: grad_fn(op, *out_grads))
File "/Users/LA_rovski/anaconda/lib/python3.5/site-packages/tensorflow/python/ops/gradients_impl.py", line 368, in _MaybeCompile
return grad_fn() # Exit early
File "/Users/LA_rovski/anaconda/lib/python3.5/site-packages/tensorflow/python/ops/gradients_impl.py", line 560, in <lambda>
grad_scope, op, func_call, lambda: grad_fn(op, *out_grads))
File "/Users/LA_rovski/anaconda/lib/python3.5/site-packages/tensorflow/python/ops/math_grad.py", line 609, in _SubGrad
rx, ry = gen_array_ops._broadcast_gradient_args(sx, sy)
File "/Users/LA_rovski/anaconda/lib/python3.5/site-packages/tensorflow/python/ops/gen_array_ops.py", line 411, in _broadcast_gradient_args
name=name)
File "/Users/LA_rovski/anaconda/lib/python3.5/site-packages/tensorflow/python/framework/op_def_library.py", line 768, in apply_op
op_def=op_def)
File "/Users/LA_rovski/anaconda/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 2336, in create_op
original_op=self._default_original_op, op_def=op_def)
File "/Users/LA_rovski/anaconda/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 1228, in __init__
self._traceback = _extract_stack()
...which was originally created as op 'sub_337', defined at:
File "Transfer_Model_sparse1.py", line 424, in <module>
sample_log_likelihood_view1 = tf.reduce_sum(log_gaussian(Rating_view1, Y_view1, sigma_prior_y))
File "Transfer_Model_sparse1.py", line 38, in log_gaussian
return -0.5 * np.log(2 * np.pi) - tf.log(tf.abs(sigma)) - tf.square(x - mu) / (2 * tf.square(sigma))
File "/Users/LA_rovski/anaconda/lib/python3.5/site-packages/tensorflow/python/ops/math_ops.py", line 821, in binary_op_wrapper
return func(x, y, name=name)
File "/Users/LA_rovski/anaconda/lib/python3.5/site-packages/tensorflow/python/ops/gen_math_ops.py", line 2627, in _sub
result = _op_def_lib.apply_op("Sub", x=x, y=y, name=name)
File "/Users/LA_rovski/anaconda/lib/python3.5/site-packages/tensorflow/python/framework/op_def_library.py", line 768, in apply_op
op_def=op_def)
File "/Users/LA_rovski/anaconda/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 2336, in create_op
original_op=self._default_original_op, op_def=op_def)
File "/Users/LA_rovski/anaconda/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 1228, in __init__
self._traceback = _extract_stack()
InvalidArgumentError (see above for traceback): Incompatible shapes: [4883,1] vs. [68,1]
[[Node: gradients/sub_337_grad/BroadcastGradientArgs = BroadcastGradientArgs[T=DT_INT32, _device="/job:localhost/replica:0/task:0/cpu:0"](gradients/sub_337_grad/Shape, gradients/sub_337_grad/Shape_1)]]
I have checked the dimension of matrix for a lot of time but couldn't find out the solution. And this problem has disturbed me for a long time, thank you so much if you could help me.
It is so weird that if i turn n_batches(the number of iteration) down to 2, the bug would disappear.
I got error, trying to chatbot example in tensorflow( https://github.com/golbin/TensorFlow-Tutorials/tree/master/08%20-%20RNN/ChatBot)
I have no idea what error is.
bellow is error
Caused by op 'save/RestoreV2_11', defined at:
File "chat.py", line 79, in <module>
tf.app.run()
File "/home/ubuntu/tensor/lib/python3.5/site-packages/tensorflow/python/platform/app.py", line 48, in run
_sys.exit(main(_sys.argv[:1] + flags_passthrough))
File "chat.py", line 75, in main
chatbot = ChatBot(FLAGS.voc_path, FLAGS.train_dir)
File "chat.py", line 17, in __init__
self.model = Seq2Seq(self.dialog.vocab_size)
File "/home/ubuntu/example/TensorFlow-Tutorials/08 - RNN/ChatBot/model.py", line 30, in __init_ _
self.saver = tf.train.Saver(tf.global_variables())
File "/home/ubuntu/tensor/lib/python3.5/site-packages/tensorflow/python/training/saver.py", line 1140, in __init__
self.build()
File "/home/ubuntu/tensor/lib/python3.5/site-packages/tensorflow/python/training/saver.py", line 1172, in build
filename=self._filename)
File "/home/ubuntu/tensor/lib/python3.5/site-packages/tensorflow/python/training/saver.py", line 688, in build
restore_sequentially, reshape)
File "/home/ubuntu/tensor/lib/python3.5/site-packages/tensorflow/python/training/saver.py", line 407, in _AddRestoreOps
tensors = self.restore_op(filename_tensor, saveable, preferred_shard)
File "/home/ubuntu/tensor/lib/python3.5/site-packages/tensorflow/python/training/saver.py", line 247, in restore_op
[spec.tensor.dtype])[0])
File "/home/ubuntu/tensor/lib/python3.5/site-packages/tensorflow/python/ops/gen_io_ops.py", line 663, in restore_v2
dtypes=dtypes, name=name)
File "/home/ubuntu/tensor/lib/python3.5/site-packages/tensorflow/python/framework/op_def_librar y.py", line 767, in apply_op
op_def=op_def)
File "/home/ubuntu/tensor/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 2630, in create_op
original_op=self._default_original_op, op_def=op_def)
File "/home/ubuntu/tensor/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 1204, in __init__
self._traceback = self._graph._extract_stack() # pylint: disable=protected-access
NotFoundError (see above for traceback): Key
decode/rnn/multi_rnn_cell/cell_1/basic_rnn_cell/bias
not found in checkpoint
[[Node: save/RestoreV2_11 = RestoreV2[dtypes=[DT_FLOAT],
_device="/job:localhost/replica
:0/task:0/cpu:0"](_arg_save/Const_0_0, save/RestoreV2_11/tensor_names,
save/RestoreV2_11/shape_and_slices)]]
You need to apply this solution
by using the appropriate vars_to_rename
likely using this
"lstm/BasicLSTMCell/Linear/Bias": "lstm/basic_lstm_cell/biases"
I got confused about feed_dict. I can feed my datasets as X but I cannot feed Y labels. all dtype are tf.float64 and I printed it out, and yes, they are float64. My X dataset has shape 90,32,32,3 and my Y is 90,2. so I defined a plaeholder to be fed with the same shape.
but when I run my code, error comes out :
InvalidArgumentError: You must feed a value for placeholder tensor 'Placeholder_34' with dtype double
[[Node: Placeholder_34 = Placeholder[dtype=DT_DOUBLE, shape=[], _device="/job:localhost/replica:0/task:0/cpu:0"]()]]
I have no idea why this happened, since I am really sure that both X and Y are float64. X is fine but why only Y? Please help, I really appreciate :)
here's the code :
size_image =32
allX = np.zeros((100, size_image, size_image, 3), dtype='float64')
ally = np.zeros(100, dtype='float64')
count = 0
for i in range(49):
img = io.imread("cat/cat"+str(i+1)+".jpg")
new_img = imresize(img, (size_image, size_image, 3))
allX[count] = np.array(new_img)
ally[count] = 0
count += 1
#dog images and labels
for i in range(51):
img = io.imread("dog/dog"+str(i+1)+".jpg")
new_img = imresize(img, (size_image, size_image, 3))
allX[count] = np.array(new_img)
ally[count] = 1
count += 1
#splitting dataset supaya random
X, X_test, Y, Y_test = train_test_split(allX, ally, test_size=0.1, random_state=42)
# encode the Ys
Y = to_categorical(Y, 2)
Y_test = to_categorical(Y_test, 2)
x = tf.placeholder(tf.float64, [None,None, None,3])
y_ = tf.placeholder(tf.float64, [None, 2])
sess = tf.Session()
sess.run(init)
t = x*2
print(sess.run(t, feed_dict={x:X}))
m = 1*y
print(sess.run(m, feed_dict={y_:Y}))
and here is the traceback :
InvalidArgumentError Traceback (most recent call last)
<ipython-input-95-6f97fe1da9c0> in <module>()
6 print(y.get_shape())
7 m = 1*y
----> 8 print(sess.run(m, feed_dict={y_:Y}))
9 print(y_.get_shape().as_list())
10 #x = tf.stack(X)
InvalidArgumentError: You must feed a value for placeholder tensor 'Placeholder_34' with dtype double
[[Node: Placeholder_34 = Placeholder[dtype=DT_DOUBLE, shape=[], _device="/job:localhost/replica:0/task:0/cpu:0"]()]]
Caused by op u'Placeholder_34', defined at:
File "/usr/lib/python2.7/runpy.py", line 174, in _run_module_as_main
"__main__", fname, loader, pkg_name)
File "/usr/lib/python2.7/runpy.py", line 72, in _run_code
exec code in run_globals
File "/usr/local/lib/python2.7/dist-packages/ipykernel/__main__.py", line 3, in <module>
app.launch_new_instance()
File "/usr/local/lib/python2.7/dist-packages/traitlets/config/application.py", line 658, in launch_instance
app.start()
File "/usr/local/lib/python2.7/dist-packages/ipykernel/kernelapp.py", line 474, in start
ioloop.IOLoop.instance().start()
File "/usr/local/lib/python2.7/dist-packages/zmq/eventloop/ioloop.py", line 177, in start
super(ZMQIOLoop, self).start()
File "/usr/local/lib/python2.7/dist-packages/tornado/ioloop.py", line 887, in start
handler_func(fd_obj, events)
File "/usr/local/lib/python2.7/dist-packages/tornado/stack_context.py", line 275, in null_wrapper
return fn(*args, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/zmq/eventloop/zmqstream.py", line 440, in _handle_events
self._handle_recv()
File "/usr/local/lib/python2.7/dist-packages/zmq/eventloop/zmqstream.py", line 472, in _handle_recv
self._run_callback(callback, msg)
File "/usr/local/lib/python2.7/dist-packages/zmq/eventloop/zmqstream.py", line 414, in _run_callback
callback(*args, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/tornado/stack_context.py", line 275, in null_wrapper
return fn(*args, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/ipykernel/kernelbase.py", line 276, in dispatcher
return self.dispatch_shell(stream, msg)
File "/usr/local/lib/python2.7/dist-packages/ipykernel/kernelbase.py", line 228, in dispatch_shell
handler(stream, idents, msg)
File "/usr/local/lib/python2.7/dist-packages/ipykernel/kernelbase.py", line 390, in execute_request
user_expressions, allow_stdin)
File "/usr/local/lib/python2.7/dist-packages/ipykernel/ipkernel.py", line 196, in do_execute
res = shell.run_cell(code, store_history=store_history, silent=silent)
File "/usr/local/lib/python2.7/dist-packages/ipykernel/zmqshell.py", line 501, in run_cell
return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/IPython/core/interactiveshell.py", line 2717, in run_cell
interactivity=interactivity, compiler=compiler, result=result)
File "/usr/local/lib/python2.7/dist-packages/IPython/core/interactiveshell.py", line 2821, in run_ast_nodes
if self.run_code(code, result):
File "/usr/local/lib/python2.7/dist-packages/IPython/core/interactiveshell.py", line 2881, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-94-297273a8e189>", line 1, in <module>
x = tf.placeholder(tf.float64, [None,None, None,3])
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/array_ops.py", line 1520, in placeholder
name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/gen_array_ops.py", line 2149, in _placeholder
name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/op_def_library.py", line 763, in apply_op
op_def=op_def)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 2395, in create_op
original_op=self._default_original_op, op_def=op_def)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 1264, in __init__
self._traceback = _extract_stack()
InvalidArgumentError (see above for traceback): You must feed a value for placeholder tensor 'Placeholder_34' with dtype double
[[Node: Placeholder_34 = Placeholder[dtype=DT_DOUBLE, shape=[], _device="/job:localhost/replica:0/task:0/cpu:0"]()]]