I'm trying to train an autoencoder with mse loss function with TensorFlow r1.2, but I keep getting a FailedPreconditionError which states that one of the variables related to computing the mse is uninitialized (see full stack trace printout below). I'm running this in Jupyter notebook and I'm using Python 3.
I trimmed down my code to a minimal example as follows
import tensorflow as tf
import numpy as np
from functools import partial
# specify network
def reset_graph(seed=0):
tf.reset_default_graph()
tf.set_random_seed(seed)
np.random.seed(seed)
reset_graph()
n_inputs = 100
n_hidden = 6
n_outputs = n_inputs
learning_rate = 0.001
l2_reg = 0.001
X = tf.placeholder(tf.float32, shape=[None, n_inputs])
he_init = tf.contrib.layers.variance_scaling_initializer()
l2_regularizer = tf.contrib.layers.l2_regularizer(l2_reg)
my_dense_layer = partial(tf.layers.dense,
activation=tf.nn.elu,
kernel_initializer=he_init,
kernel_regularizer=l2_regularizer)
hidden1 = my_dense_layer(X, n_hidden1)
outputs = my_dense_layer(hidden1, n_outputs, activation=None)
reconstruction_loss = tf.reduce_mean(tf.metrics.mean_squared_error(X, outputs))
reg_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
loss = tf.add_n([reconstruction_loss] + reg_losses)
optimizer = tf.train.AdamOptimizer(learning_rate)
training_op = optimizer.minimize(loss)
init = tf.global_variables_initializer()
# generate 1000 random examples
sample_X = np.random.rand(1000, 100)
# train network
n_epochs = 10
batch_size = 50
with tf.Session() as sess:
sess.run(init) # init.run()
for epoch in range(n_epochs):
n_batches = sample_X.shape[0] // batch_size
for iteration in range(n_batches):
start_idx = iteration*batch_size
if iteration == n_batches-1:
end_idx = sample_X.shape[0]
else:
end_idx = start_idx + batch_size
sys.stdout.flush()
X_batch = sample_X[start_idx:end_idx]
sess.run(training_op, feed_dict={X: X_batch})
loss_train = reconstruction_loss.eval(feed_dict={X: X_batch})
print(round(loss_train, 5))
When I replace the line that defines reconstruction_loss to not use tf.metrics, as follows
reconstruction_loss = tf.reduce_mean(tf.square(tf.norm(outputs - X)))
I don't get the exception.
I've checked several similar SO questions, but none of them has solved my problem. For example, one possible cause, suggested in an answer at FailedPreconditionError: Attempting to use uninitialized in Tensorflow, is failing to initialize all the variables in the TF graph, but my script initializes all TF variables with init = tf.global_variables_initializer() and then sess.run(init). Another possible cause is that the Adam optimizer creates its own variables, which need to be initialized after specifying the optimizer (see Tensorflow: Using Adam optimizer). However, my script defines the variable initializer after the optimizer, as suggested in the accepted answer to that question, so that also can't be my problem.
Can anyone spot anything wrong with my script or suggest things to try to suss out the cause of this error?
Below is the stack trace from the error.
---------------------------------------------------------------------------
FailedPreconditionError Traceback (most recent call last)
~\AppData\Local\Continuum\Anaconda3\lib\site-packages\tensorflow\python\client\session.py in _do_call(self, fn, *args)
1138 try:
-> 1139 return fn(*args)
1140 except errors.OpError as e:
~\AppData\Local\Continuum\Anaconda3\lib\site-packages\tensorflow\python\client\session.py in _run_fn(session, feed_dict, fetch_list, target_list, options, run_metadata)
1120 feed_dict, fetch_list, target_list,
-> 1121 status, run_metadata)
1122
~\AppData\Local\Continuum\Anaconda3\lib\contextlib.py in __exit__(self, type, value, traceback)
88 try:
---> 89 next(self.gen)
90 except StopIteration:
~\AppData\Local\Continuum\Anaconda3\lib\site-packages\tensorflow\python\framework\errors_impl.py in raise_exception_on_not_ok_status()
465 compat.as_text(pywrap_tensorflow.TF_Message(status)),
--> 466 pywrap_tensorflow.TF_GetCode(status))
467 finally:
FailedPreconditionError: Attempting to use uninitialized value mean_squared_error/total
[[Node: mean_squared_error/total/read = Identity[T=DT_FLOAT, _class=["loc:#mean_squared_error/total"], _device="/job:localhost/replica:0/task:0/cpu:0"](mean_squared_error/total)]]
During handling of the above exception, another exception occurred:
FailedPreconditionError Traceback (most recent call last)
<ipython-input-55-aac61c488ed8> in <module>()
64 sess.run(training_op, feed_dict={X: X_batch})
65
---> 66 loss_train = reconstruction_loss.eval(feed_dict={X: X_batch})
67 print(round(loss_train, 5))
~\AppData\Local\Continuum\Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py in eval(self, feed_dict, session)
604
605 """
--> 606 return _eval_using_default_session(self, feed_dict, self.graph, session)
607
608
~\AppData\Local\Continuum\Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py in _eval_using_default_session(tensors, feed_dict, graph, session)
3926 "the tensor's graph is different from the session's "
3927 "graph.")
-> 3928 return session.run(tensors, feed_dict)
3929
3930
~\AppData\Local\Continuum\Anaconda3\lib\site-packages\tensorflow\python\client\session.py in run(self, fetches, feed_dict, options, run_metadata)
787 try:
788 result = self._run(None, fetches, feed_dict, options_ptr,
--> 789 run_metadata_ptr)
790 if run_metadata:
791 proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)
~\AppData\Local\Continuum\Anaconda3\lib\site-packages\tensorflow\python\client\session.py in _run(self, handle, fetches, feed_dict, options, run_metadata)
995 if final_fetches or final_targets:
996 results = self._do_run(handle, final_targets, final_fetches,
--> 997 feed_dict_string, options, run_metadata)
998 else:
999 results = []
~\AppData\Local\Continuum\Anaconda3\lib\site-packages\tensorflow\python\client\session.py in _do_run(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)
1130 if handle is None:
1131 return self._do_call(_run_fn, self._session, feed_dict, fetch_list,
-> 1132 target_list, options, run_metadata)
1133 else:
1134 return self._do_call(_prun_fn, self._session, handle, feed_dict,
~\AppData\Local\Continuum\Anaconda3\lib\site-packages\tensorflow\python\client\session.py in _do_call(self, fn, *args)
1150 except KeyError:
1151 pass
-> 1152 raise type(e)(node_def, op, message)
1153
1154 def _extend_graph(self):
FailedPreconditionError: Attempting to use uninitialized value mean_squared_error/total
[[Node: mean_squared_error/total/read = Identity[T=DT_FLOAT, _class=["loc:#mean_squared_error/total"], _device="/job:localhost/replica:0/task:0/cpu:0"](mean_squared_error/total)]]
Caused by op 'mean_squared_error/total/read', defined at:
File "C:\Users\user\AppData\Local\Continuum\Anaconda3\lib\runpy.py", line 193, in _run_module_as_main
"__main__", mod_spec)
File "C:\Users\user\AppData\Local\Continuum\Anaconda3\lib\runpy.py", line 85, in _run_code
exec(code, run_globals)
File "C:\Users\user\AppData\Local\Continuum\Anaconda3\lib\site-packages\ipykernel\__main__.py", line 3, in <module>
app.launch_new_instance()
File "C:\Users\user\AppData\Local\Continuum\Anaconda3\lib\site-packages\traitlets\config\application.py", line 658, in launch_instance
app.start()
File "C:\Users\user\AppData\Local\Continuum\Anaconda3\lib\site-packages\ipykernel\kernelapp.py", line 474, in start
ioloop.IOLoop.instance().start()
File "C:\Users\user\AppData\Local\Continuum\Anaconda3\lib\site-packages\zmq\eventloop\ioloop.py", line 177, in start
super(ZMQIOLoop, self).start()
File "C:\Users\user\AppData\Local\Continuum\Anaconda3\lib\site-packages\tornado\ioloop.py", line 888, in start
handler_func(fd_obj, events)
File "C:\Users\user\AppData\Local\Continuum\Anaconda3\lib\site-packages\tornado\stack_context.py", line 277, in null_wrapper
return fn(*args, **kwargs)
File "C:\Users\user\AppData\Local\Continuum\Anaconda3\lib\site-packages\zmq\eventloop\zmqstream.py", line 440, in _handle_events
self._handle_recv()
File "C:\Users\user\AppData\Local\Continuum\Anaconda3\lib\site-packages\zmq\eventloop\zmqstream.py", line 472, in _handle_recv
self._run_callback(callback, msg)
File "C:\Users\user\AppData\Local\Continuum\Anaconda3\lib\site-packages\zmq\eventloop\zmqstream.py", line 414, in _run_callback
callback(*args, **kwargs)
File "C:\Users\user\AppData\Local\Continuum\Anaconda3\lib\site-packages\tornado\stack_context.py", line 277, in null_wrapper
return fn(*args, **kwargs)
File "C:\Users\user\AppData\Local\Continuum\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 276, in dispatcher
return self.dispatch_shell(stream, msg)
File "C:\Users\user\AppData\Local\Continuum\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 228, in dispatch_shell
handler(stream, idents, msg)
File "C:\Users\user\AppData\Local\Continuum\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 390, in execute_request
user_expressions, allow_stdin)
File "C:\Users\user\AppData\Local\Continuum\Anaconda3\lib\site-packages\ipykernel\ipkernel.py", line 196, in do_execute
res = shell.run_cell(code, store_history=store_history, silent=silent)
File "C:\Users\user\AppData\Local\Continuum\Anaconda3\lib\site-packages\ipykernel\zmqshell.py", line 501, in run_cell
return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
File "C:\Users\user\AppData\Local\Continuum\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2698, in run_cell
interactivity=interactivity, compiler=compiler, result=result)
File "C:\Users\user\AppData\Local\Continuum\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2802, in run_ast_nodes
if self.run_code(code, result):
File "C:\Users\user\AppData\Local\Continuum\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2862, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-55-aac61c488ed8>", line 32, in <module>
reconstruction_loss = tf.reduce_mean(tf.metrics.mean_squared_error(X, outputs))
File "C:\Users\user\AppData\Local\Continuum\Anaconda3\lib\site-packages\tensorflow\python\ops\metrics_impl.py", line 1054, in mean_squared_error
updates_collections, name or 'mean_squared_error')
File "C:\Users\user\AppData\Local\Continuum\Anaconda3\lib\site-packages\tensorflow\python\ops\metrics_impl.py", line 331, in mean
total = _create_local('total', shape=[])
File "C:\Users\user\AppData\Local\Continuum\Anaconda3\lib\site-packages\tensorflow\python\ops\metrics_impl.py", line 196, in _create_local
validate_shape=validate_shape)
File "C:\Users\user\AppData\Local\Continuum\Anaconda3\lib\site-packages\tensorflow\python\ops\variable_scope.py", line 1679, in variable
caching_device=caching_device, name=name, dtype=dtype)
File "C:\Users\user\AppData\Local\Continuum\Anaconda3\lib\site-packages\tensorflow\python\ops\variables.py", line 200, in __init__
expected_shape=expected_shape)
File "C:\Users\user\AppData\Local\Continuum\Anaconda3\lib\site-packages\tensorflow\python\ops\variables.py", line 319, in _init_from_args
self._snapshot = array_ops.identity(self._variable, name="read")
File "C:\Users\user\AppData\Local\Continuum\Anaconda3\lib\site-packages\tensorflow\python\ops\gen_array_ops.py", line 1303, in identity
result = _op_def_lib.apply_op("Identity", input=input, name=name)
File "C:\Users\user\AppData\Local\Continuum\Anaconda3\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 767, in apply_op
op_def=op_def)
File "C:\Users\user\AppData\Local\Continuum\Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py", line 2506, in create_op
original_op=self._default_original_op, op_def=op_def)
File "C:\Users\user\AppData\Local\Continuum\Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py", line 1269, in __init__
self._traceback = _extract_stack()
FailedPreconditionError (see above for traceback): Attempting to use uninitialized value mean_squared_error/total
[[Node: mean_squared_error/total/read = Identity[T=DT_FLOAT, _class=["loc:#mean_squared_error/total"], _device="/job:localhost/replica:0/task:0/cpu:0"](mean_squared_error/total)]]
Looks like you're doing everything right with initialization, so I suspect your error is that you're using tf.metrics.mean_squared_error incorrectly.
The metrics package of classes allows you to compute a value, but also accumulate that value over multiple calls to sess.run. Note the return value of tf.metrics.mean_square_error in the docs:
https://www.tensorflow.org/api_docs/python/tf/metrics/mean_squared_error
You get back both mean_square_error, as you appear to expect, and an update_op. The purpose of the update_op is that you ask tensorflow to compute the update_op and it accumulates the mean square error. Each time you call mean_square_error you get the accumulated value. When you want to reset the value you would run sess.run(tf.local_variables_initializer()) (note local and not global to clear "local" variables as the metrics package defines them).
I don't think the metrics package was intended to be used the way you're using it. I think your intention was to compute the mse only based on the current batch as your loss and not accumulate the value over multiple calls. I'm not even sure how differentiation would work with respect to an accumulated value like this.
So I think the answer to your question is: don't use the metrics package this way. Use metrics for reporting, and for accumulating results over multiple iterations of a test dataset, for example, not for generating a loss function.
I think what you mean to use is tf.losses.mean_squared_error
Related
I am using Tensorflow on Handwritten A_Z dataset on Kaggle Kernel.
I've used 2 conv layers along with 2 maxpool one after another and then reshaped the above layer into full_1 (-1,*7*7*64) and further it to a fully_connected layer(full_2 to which I applied dropout ) and connected it to a layer named last of shape (None,26) to finally get the predicted output which represent the 26 letters of English.
CONV->MAXPOOL->CONV->MAXPOOL->reshaped(named full_1)->FULLY_CONNECTED(full_2)->OUTPUT( last )
The training process on earlier(sometime back) running gave numeric values of accuracy but later it started giving NaNs for some unknown reason.
Also, the numeric values of accuracy never increased much throughout the training process and kept very low, which worries me whether I have correctly applied the Convolutional Network because the network should only learn better to give more accuracy as the batches of data are processed into the training process. Is the less accuracy due to less layers and less complex model ?
Also, I am doubtful about the tf.nn.softmax_cross_entropy_with_logits(labels=output,logits=last) statement in my code because the relu function has already been applied on last variable which denote the output layer in my conv net and used above as logits.
The error says :
FailedPreconditionError: Attempting to use uninitialized value W_4
The code is :
import tensorflow as tf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
import copy
import warnings
warnings.filterwarnings('ignore')
#dataset=pd.read_csv('/Users/ajay/Documents/IpyNote/A_Z Handwritten Data.csv')
dataset=pd.read_csv('../input/handwritten_data_785.csv')
#print(dataset.head(3))
#print(dataset.info())
dataset['0'].unique()
dataset=dataset.astype('float32')
X=copy.deepcopy(dataset)
X.head(1)
Y=X.loc[:,'0']
#print(Y.head(3))
Y=Y.astype('int64')
s=pd.get_dummies(Y)
list(s)
Y=s
Y=Y.astype('float32')
Y.head(2)
X.drop('0',axis=1,inplace=True)
X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.25,stratify=Y)
input=tf.placeholder(dtype=tf.float32,shape=(None,28*28))
output=tf.placeholder(dtype=tf.float32,shape=(None,26))
W1=tf.Variable(tf.truncated_normal(shape=(5,5,1,32)),name='W')#28,28,32
b1=tf.Variable(tf.truncated_normal(shape=(1,32)),name='b')#14,14,32
W2=tf.Variable(tf.truncated_normal(shape=(5,5,32,64)),name='W')#14,14,64
b2=tf.Variable(tf.truncated_normal(shape=(1,64)),name='b')#7,7,64
W3=tf.Variable(tf.truncated_normal(shape=(7*7*64,1024)),name='W')
b3=tf.Variable(tf.truncated_normal(shape=(1,1024)),name='b')
W4=tf.Variable(tf.truncated_normal(shape=(1024,26)),name='W')
b4=tf.Variable(tf.truncated_normal(shape=(1,26)),name='b')
def conv(input,W,b):
return tf.nn.relu(tf.nn.conv2d(input=input,filter=W,strides=(1,1,1,1),padding='SAME')+b)
def maxpool(x):
return tf.nn.max_pool(value=x,ksize=(1,2,2,1),strides=(1,2,2,1),padding='SAME')
def full_connected(x,W,b):
return tf.nn.relu(tf.matmul(x,W)+b)
p=tf.reshape(input,[-1,28,28,1])
conv_1=conv(p,W1,b1)
print('conv_1.shape',conv_1.shape)
maxpool_1=maxpool(conv_1)
print('maxpool_1.shape',maxpool_1.shape)
conv_2=conv(maxpool_1,W2,b2)
print('conv_2.shape',conv_2.shape)
maxpool_2=maxpool(conv_2)
print('maxpool_2.shape',maxpool_2.shape)
full_1=tf.reshape(maxpool_2,[-1,7*7*64])
full_2=full_connected(full_1,W3,b3)#full_1->full_2
print('full_2.shape',full_2.shape)
keep_prob=tf.placeholder(tf.float32)
full_2_dropout=tf.nn.dropout(full_2,keep_prob)
last=full_connected(full_2_dropout,W4,b4)
last = tf.clip_by_value(last, 1e-10, 0.9999999)
print('last.shape',last.shape)
loss=tf.nn.softmax_cross_entropy_with_logits(labels=output,logits=last)#loss=tf.nn.softmax(logits=last)
train_step=tf.train.AdamOptimizer(0.005).minimize(loss)
accuracy=tf.reduce_mean(tf.cast(tf.equal(tf.argmax(output,1), tf.argmax(last,1) ) , tf.float32))
init=tf.global_variables_initializer()
with tf.Session() as sess:
epoch=1
n_iterations=10
sess.run(init)
for i in range(n_iterations):
j=i*50
k=i*50+50
print('j=',j,'k=',k)
x = X_train.iloc[i*50:j,:]
y = Y_train.iloc[i*50:j,:]
#sess.run(accuracy,feed_dict={input:X_train,output:Y_train,keep_prob:1.0})
print('Train_accuracy : ',sess.run(accuracy, feed_dict={input: x, output: y,keep_prob:1.0}))
sess.run(train_step,feed_dict={input:x,output:y,keep_prob:1.0})
with tf.Session() as sess:
n_iterations=20
for i in range(n_iterations):
j=i*50
k=i*50+50
print('j=',j,'k=',k)
x = X_test.iloc[i*50:j,:]
y = Y_test.iloc[i*50:j,:]
print('Test_accuracy : ',sess.run(accuracy, feed_dict={input: x, output: y,keep_prob:1.0}))
The error is showing something like this:
conv_1.shape (?, 28, 28, 32)
maxpool_1.shape (?, 14, 14, 32)
conv_2.shape (?, 14, 14, 64)
maxpool_2.shape (?, 7, 7, 64)
full_2.shape (?, 1024)
last.shape (?, 26)
j= 0 k= 50
Train_accuracy : nan
j= 50 k= 100
Train_accuracy : nan
j= 100 k= 150
Train_accuracy : nan
j= 150 k= 200
Train_accuracy : nan
j= 200 k= 250
Train_accuracy : nan
j= 250 k= 300
Train_accuracy : nan
j= 300 k= 350
Train_accuracy : nan
j= 350 k= 400
Train_accuracy : nan
j= 400 k= 450
Train_accuracy : nan
j= 450 k= 500
Train_accuracy : nan
j= 0 k= 50
---------------------------------------------------------------------------
FailedPreconditionError Traceback (most recent call last)
/opt/conda/lib/python3.6/site-packages/tensorflow/python/client/session.py in _do_call(self, fn, *args)
1329 try:
-> 1330 return fn(*args)
1331 except errors.OpError as e:
/opt/conda/lib/python3.6/site-packages/tensorflow/python/client/session.py in _run_fn(feed_dict, fetch_list, target_list, options, run_metadata)
1314 return self._call_tf_sessionrun(
-> 1315 options, feed_dict, fetch_list, target_list, run_metadata)
1316
/opt/conda/lib/python3.6/site-packages/tensorflow/python/client/session.py in _call_tf_sessionrun(self, options, feed_dict, fetch_list, target_list, run_metadata)
1422 self._session, options, feed_dict, fetch_list, target_list,
-> 1423 status, run_metadata)
1424
/opt/conda/lib/python3.6/site-packages/tensorflow/python/framework/errors_impl.py in __exit__(self, type_arg, value_arg, traceback_arg)
515 compat.as_text(c_api.TF_Message(self.status.status)),
--> 516 c_api.TF_GetCode(self.status.status))
517 # Delete the underlying status object from memory otherwise it stays alive
FailedPreconditionError: Attempting to use uninitialized value W_4
[[Node: W_4/read = Identity[T=DT_FLOAT, _class=["loc:#W_4"], _device="/job:localhost/replica:0/task:0/device:GPU:0"](W_4)]]
During handling of the above exception, another exception occurred:
FailedPreconditionError Traceback (most recent call last)
<ipython-input-2-496ec024fd3b> in <module>()
114 x = X_test.iloc[i*50:j,:]
115 y = Y_test.iloc[i*50:j,:]
--> 116 print('Test_accuracy : ',sess.run(accuracy, feed_dict={input: x, output: y,keep_prob:1.0}))
/opt/conda/lib/python3.6/site-packages/tensorflow/python/client/session.py in run(self, fetches, feed_dict, options, run_metadata)
906 try:
907 result = self._run(None, fetches, feed_dict, options_ptr,
--> 908 run_metadata_ptr)
909 if run_metadata:
910 proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)
/opt/conda/lib/python3.6/site-packages/tensorflow/python/client/session.py in _run(self, handle, fetches, feed_dict, options, run_metadata)
1141 if final_fetches or final_targets or (handle and feed_dict_tensor):
1142 results = self._do_run(handle, final_targets, final_fetches,
-> 1143 feed_dict_tensor, options, run_metadata)
1144 else:
1145 results = []
/opt/conda/lib/python3.6/site-packages/tensorflow/python/client/session.py in _do_run(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)
1322 if handle is None:
1323 return self._do_call(_run_fn, feeds, fetches, targets, options,
-> 1324 run_metadata)
1325 else:
1326 return self._do_call(_prun_fn, handle, feeds, fetches)
/opt/conda/lib/python3.6/site-packages/tensorflow/python/client/session.py in _do_call(self, fn, *args)
1341 except KeyError:
1342 pass
-> 1343 raise type(e)(node_def, op, message)
1344
1345 def _extend_graph(self):
FailedPreconditionError: Attempting to use uninitialized value W_4
[[Node: W_4/read = Identity[T=DT_FLOAT, _class=["loc:#W_4"], _device="/job:localhost/replica:0/task:0/device:GPU:0"](W_4)]]
Caused by op 'W_4/read', defined at:
File "/opt/conda/lib/python3.6/runpy.py", line 193, in _run_module_as_main
"__main__", mod_spec)
File "/opt/conda/lib/python3.6/runpy.py", line 85, in _run_code
exec(code, run_globals)
File "/opt/conda/lib/python3.6/site-packages/ipykernel_launcher.py", line 16, in <module>
app.launch_new_instance()
File "/opt/conda/lib/python3.6/site-packages/traitlets/config/application.py", line 658, in launch_instance
app.start()
File "/opt/conda/lib/python3.6/site-packages/ipykernel/kernelapp.py", line 477, in start
ioloop.IOLoop.instance().start()
File "/opt/conda/lib/python3.6/site-packages/zmq/eventloop/ioloop.py", line 177, in start
super(ZMQIOLoop, self).start()
File "/opt/conda/lib/python3.6/site-packages/tornado/ioloop.py", line 888, in start
handler_func(fd_obj, events)
File "/opt/conda/lib/python3.6/site-packages/tornado/stack_context.py", line 277, in null_wrapper
return fn(*args, **kwargs)
File "/opt/conda/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 440, in _handle_events
self._handle_recv()
File "/opt/conda/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 472, in _handle_recv
self._run_callback(callback, msg)
File "/opt/conda/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 414, in _run_callback
callback(*args, **kwargs)
File "/opt/conda/lib/python3.6/site-packages/tornado/stack_context.py", line 277, in null_wrapper
return fn(*args, **kwargs)
File "/opt/conda/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 283, in dispatcher
return self.dispatch_shell(stream, msg)
File "/opt/conda/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 235, in dispatch_shell
handler(stream, idents, msg)
File "/opt/conda/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 399, in execute_request
user_expressions, allow_stdin)
File "/opt/conda/lib/python3.6/site-packages/ipykernel/ipkernel.py", line 196, in do_execute
res = shell.run_cell(code, store_history=store_history, silent=silent)
File "/opt/conda/lib/python3.6/site-packages/ipykernel/zmqshell.py", line 533, in run_cell
return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
File "/opt/conda/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2698, in run_cell
interactivity=interactivity, compiler=compiler, result=result)
File "/opt/conda/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2802, in run_ast_nodes
if self.run_code(code, result):
File "/opt/conda/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-2-496ec024fd3b>", line 42, in <module>
W1=tf.Variable(tf.truncated_normal(shape=(5,5,1,32)),name='W')#28,28,32
File "/opt/conda/lib/python3.6/site-packages/tensorflow/python/ops/variables.py", line 235, in __init__
constraint=constraint)
File "/opt/conda/lib/python3.6/site-packages/tensorflow/python/ops/variables.py", line 397, in _init_from_args
self._snapshot = array_ops.identity(self._variable, name="read")
File "/opt/conda/lib/python3.6/site-packages/tensorflow/python/ops/array_ops.py", line 142, in identity
return gen_array_ops.identity(input, name=name)
File "/opt/conda/lib/python3.6/site-packages/tensorflow/python/ops/gen_array_ops.py", line 3052, in identity
"Identity", input=input, name=name)
File "/opt/conda/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
op_def=op_def)
File "/opt/conda/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3306, in create_op
op_def=op_def)
File "/opt/conda/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1669, in __init__
self._traceback = self._graph._extract_stack() # pylint: disable=protected-access
FailedPreconditionError (see above for traceback): Attempting to use uninitialized value W_4
[[Node: W_4/read = Identity[T=DT_FLOAT, _class=["loc:#W_4"], _device="/job:localhost/replica:0/task:0/device:GPU:0"](W_4)]]
Reason for Accuracy giving NaNs : You have split the training data into X_train and X_test due to which your indices got disturbed and the train dataset become quite random with respect to the indices and when you feed your X_train batches-wise, the indices from [0:50] do not exist while training and hence you end up feeding nothing to you model.
Before training the model, do this :
X_test.reset_index(drop=True)
Y_test.reset_index(drop=True)
This will reset your indices and drop=True will prevent the original indices from becoming another column in your transformed dataframe.
As far as the Weights and Biases are concerned, DO NOT use another session for testing the model because all your trained variables will be lost in this session and hence the error Attempting to use uninitialized value W_4 will occur.
You can also try saving your variables for the sake of convenience.
Also, refer this for your logits part : here
You can call: sess.run(tf.global_variables_initializer()) to initialize the variables. See this StackOverflow answer for more information about the initializer.
when I try to use the tf.matmul function on the GPU I get the following error:
InternalError: Blas xGEMMBatched launch failed
If the N value in the function calc() is set to a value less than 15 it works.
I am running tensorflow 1.8.0 and Cuda V9.1.85. There is only one Python process working on the GPU and there are no other open sessions. Also I have plenty GPU memory to spare (see attached image).
Changing the CUDA_VISIBLE_DEVICES values does not show any effect. Changing the ConfigProto() settings does not help either. Also the use of tf.matmul does not solve the problem.
This is the code I am running:
import tensorflow as tf
import numpy as np
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0"
config = tf.ConfigProto()
config.gpu_options.allow_growth=True
tf.Session(config=config).close()
def calc():
N = 15 # works for N <= 14
a = 16
b = 8
X = np.random.rand(N, 11520, b, 1).astype(np.float32)
print(X.nbytes*1e-6, "MB")
W = np.random.rand(N, 11520, a, b).astype(np.float32)
print(W.nbytes*1e-6, "MB")
X_ = tf.constant(X, name="X-constant", dtype=tf.float32)
W_ = tf.constant(W, name="W-constant", dtype=tf.float32)
# return tf.matmul(W_, X_, name="mymatmul")
return W_ # X_
tf.reset_default_graph()
a = calc()
sess = tf.Session()
sess.run(tf.global_variables_initializer())
b = sess.run(a)
sess.close()
print(b.shape)
This is the output I get:
5.529599999999999 MB
88.47359999999999 MB
---------------------------------------------------------------------------
InternalError Traceback (most recent call last)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/client/session.py in _do_call(self, fn, *args)
1321 try:
-> 1322 return fn(*args)
1323 except errors.OpError as e:
/usr/local/lib/python3.6/dist-packages/tensorflow/python/client/session.py in _run_fn(feed_dict, fetch_list, target_list, options, run_metadata)
1306 return self._call_tf_sessionrun(
-> 1307 options, feed_dict, fetch_list, target_list, run_metadata)
1308
/usr/local/lib/python3.6/dist-packages/tensorflow/python/client/session.py in _call_tf_sessionrun(self, options, feed_dict, fetch_list, target_list, run_metadata)
1408 self._session, options, feed_dict, fetch_list, target_list,
-> 1409 run_metadata)
1410 else:
InternalError: Blas xGEMMBatched launch failed : a.shape=[172800,16,8], b.shape=[172800,8,1], m=16, n=1, k=8, batch_size=172800
[[Node: matmul = BatchMatMul[T=DT_FLOAT, adj_x=false, adj_y=false, _device="/job:localhost/replica:0/task:0/device:GPU:0"](W-constant, X-constant)]]
During handling of the above exception, another exception occurred:
InternalError Traceback (most recent call last)
<ipython-input-5-013153235a1a> in <module>()
3 sess = tf.Session()
4 sess.run(tf.global_variables_initializer())
----> 5 b = sess.run(a)
6 sess.close()
7 print(b.shape)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/client/session.py in run(self, fetches, feed_dict, options, run_metadata)
898 try:
899 result = self._run(None, fetches, feed_dict, options_ptr,
--> 900 run_metadata_ptr)
901 if run_metadata:
902 proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/client/session.py in _run(self, handle, fetches, feed_dict, options, run_metadata)
1133 if final_fetches or final_targets or (handle and feed_dict_tensor):
1134 results = self._do_run(handle, final_targets, final_fetches,
-> 1135 feed_dict_tensor, options, run_metadata)
1136 else:
1137 results = []
/usr/local/lib/python3.6/dist-packages/tensorflow/python/client/session.py in _do_run(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)
1314 if handle is None:
1315 return self._do_call(_run_fn, feeds, fetches, targets, options,
-> 1316 run_metadata)
1317 else:
1318 return self._do_call(_prun_fn, handle, feeds, fetches)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/client/session.py in _do_call(self, fn, *args)
1333 except KeyError:
1334 pass
-> 1335 raise type(e)(node_def, op, message)
1336
1337 def _extend_graph(self):
InternalError: Blas xGEMMBatched launch failed : a.shape=[172800,16,8], b.shape=[172800,8,1], m=16, n=1, k=8, batch_size=172800
[[Node: matmul = BatchMatMul[T=DT_FLOAT, adj_x=false, adj_y=false, _device="/job:localhost/replica:0/task:0/device:GPU:0"](W-constant, X-constant)]]
Caused by op 'matmul', defined at:
File "/usr/lib/python3.6/runpy.py", line 193, in _run_module_as_main
"__main__", mod_spec)
File "/usr/lib/python3.6/runpy.py", line 85, in _run_code
exec(code, run_globals)
File "/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py", line 16, in <module>
app.launch_new_instance()
File "/usr/local/lib/python3.6/dist-packages/traitlets/config/application.py", line 658, in launch_instance
app.start()
File "/usr/local/lib/python3.6/dist-packages/ipykernel/kernelapp.py", line 486, in start
self.io_loop.start()
File "/usr/local/lib/python3.6/dist-packages/tornado/platform/asyncio.py", line 127, in start
self.asyncio_loop.run_forever()
File "/usr/lib/python3.6/asyncio/base_events.py", line 422, in run_forever
self._run_once()
File "/usr/lib/python3.6/asyncio/base_events.py", line 1432, in _run_once
handle._run()
File "/usr/lib/python3.6/asyncio/events.py", line 145, in _run
self._callback(*self._args)
File "/usr/local/lib/python3.6/dist-packages/tornado/platform/asyncio.py", line 117, in _handle_events
handler_func(fileobj, events)
File "/usr/local/lib/python3.6/dist-packages/tornado/stack_context.py", line 276, in null_wrapper
return fn(*args, **kwargs)
File "/usr/local/lib/python3.6/dist-packages/zmq/eventloop/zmqstream.py", line 450, in _handle_events
self._handle_recv()
File "/usr/local/lib/python3.6/dist-packages/zmq/eventloop/zmqstream.py", line 480, in _handle_recv
self._run_callback(callback, msg)
File "/usr/local/lib/python3.6/dist-packages/zmq/eventloop/zmqstream.py", line 432, in _run_callback
callback(*args, **kwargs)
File "/usr/local/lib/python3.6/dist-packages/tornado/stack_context.py", line 276, in null_wrapper
return fn(*args, **kwargs)
File "/usr/local/lib/python3.6/dist-packages/ipykernel/kernelbase.py", line 283, in dispatcher
return self.dispatch_shell(stream, msg)
File "/usr/local/lib/python3.6/dist-packages/ipykernel/kernelbase.py", line 233, in dispatch_shell
handler(stream, idents, msg)
File "/usr/local/lib/python3.6/dist-packages/ipykernel/kernelbase.py", line 399, in execute_request
user_expressions, allow_stdin)
File "/usr/local/lib/python3.6/dist-packages/ipykernel/ipkernel.py", line 208, in do_execute
res = shell.run_cell(code, store_history=store_history, silent=silent)
File "/usr/local/lib/python3.6/dist-packages/ipykernel/zmqshell.py", line 537, in run_cell
return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
File "/usr/local/lib/python3.6/dist-packages/IPython/core/interactiveshell.py", line 2662, in run_cell
raw_cell, store_history, silent, shell_futures)
File "/usr/local/lib/python3.6/dist-packages/IPython/core/interactiveshell.py", line 2785, in _run_cell
interactivity=interactivity, compiler=compiler, result=result)
File "/usr/local/lib/python3.6/dist-packages/IPython/core/interactiveshell.py", line 2903, in run_ast_nodes
if self.run_code(code, result):
File "/usr/local/lib/python3.6/dist-packages/IPython/core/interactiveshell.py", line 2963, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-5-013153235a1a>", line 2, in <module>
a = calc()
File "<ipython-input-4-bf0e6012e9e2>", line 13, in calc
return W_ # X_
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/math_ops.py", line 847, in binary_op_wrapper
return func(x, y, name=name)
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/math_ops.py", line 1976, in matmul
a, b, adj_x=adjoint_a, adj_y=adjoint_b, name=name)
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/gen_math_ops.py", line 1236, in batch_mat_mul
"BatchMatMul", x=x, y=y, adj_x=adj_x, adj_y=adj_y, name=name)
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
op_def=op_def)
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/ops.py", line 3414, in create_op
op_def=op_def)
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/ops.py", line 1740, in __init__
self._traceback = self._graph._extract_stack() # pylint: disable=protected-access
InternalError (see above for traceback): Blas xGEMMBatched launch failed : a.shape=[172800,16,8], b.shape=[172800,8,1], m=16, n=1, k=8, batch_size=172800
[[Node: matmul = BatchMatMul[T=DT_FLOAT, adj_x=false, adj_y=false, _device="/job:localhost/replica:0/task:0/device:GPU:0"](W-constant, X-constant)]]
Here is a workaround replacing tf.matmul with tf.einsum. However, your code works on my machine with NVIDIA 840M (2004 MiB RAM), cudnn 7.0.5.15 and cuda 9.0.176 (maybe downgrading helps?).
import tensorflow as tf
import numpy as np
sess = tf.Session()
N = 20
M = 11520
a = 16
b = 8
W = np.random.rand(N, M, a, b).astype(np.float32)
X = np.random.rand(N, M, b, 1).astype(np.float32)
# tf.einsum does not support numpy arrays, so wrap W and X in tf.constants
W2 = tf.constant(W)
X2 = tf.constant(X)
# tf.einsum does not support "..." as seen later in np.einsum
WX = tf.einsum("uvik,uvkj->uvij", W2, X2)
# same as:
#WX = tf.matmul(W2, X2)
# calculate W#X using tf.einsum
result1 = sess.run(WX)
# calculate W#X using np.einsum
result2 = np.einsum("...ik,...kj->...ij", W, X)
# calculate W#X by hand (just for illustrative purpose, too slow for practical use)
result3 = np.zeros((N, M, a, 1), dtype=np.float32)
for i in range(a):
for j in range(1):
for k in range(b):
result3[..., i, j] += W[..., i, k] * X[..., k, j]
# ensure that everything is correct
assert(np.allclose(result1, result2))
assert(np.allclose(result1, result3))
print("everything ok")
sess.close()
I got an issue with TensorFlow, hope you can help :D
My goal: say I got two graphs, Graph A and Graph B, each one composed of three convolutional layers. What I want is to link the second layer of Graph A to the third layer of Graph B.
I've successfully combined two graphs using the information given here: Tensorflow: How to replace a node in a calculation graph?
This is what I got:
The problem comes when I try to load variable weights. I've followed this solution, but didn't work: TensorFlow: Restoring variables from from multiple checkpoints
There is no problem when loading the weights of graph A, but this error arises when loading the weights of graph B:
INFO:tensorflow:Restoring parameters from ./models/merge_graph_A.ckpt
INFO:tensorflow:Restoring parameters from ./models/merge_graph_B.ckpt
---------------------------------------------------------------------------
NotFoundError Traceback (most recent call last)
<ipython-input-27-1e08224fde39> in <module>()
1 with tf.Session(graph=graphs_merged) as sess:
2 saver_A.restore(sess, './models/merge_graph_A.ckpt')
----> 3 saver_B.restore(sess, './models/merge_graph_B.ckpt')
4
5 outcome = sess.run(output, feed_dict={
/home/solano/.local/lib/python2.7/site-packages/tensorflow/python/training/saver.pyc in restore(self, sess, save_path)
1753 if context.in_graph_mode():
1754 sess.run(self.saver_def.restore_op_name,
-> 1755 {self.saver_def.filename_tensor_name: save_path})
1756 else:
1757 self._build_eager(save_path, build_save=False, build_restore=True)
/home/solano/.local/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in run(self, fetches, feed_dict, options, run_metadata)
903 try:
904 result = self._run(None, fetches, feed_dict, options_ptr,
--> 905 run_metadata_ptr)
906 if run_metadata:
907 proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)
/home/solano/.local/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in _run(self, handle, fetches, feed_dict, options, run_metadata)
1135 if final_fetches or final_targets or (handle and feed_dict_tensor):
1136 results = self._do_run(handle, final_targets, final_fetches,
-> 1137 feed_dict_tensor, options, run_metadata)
1138 else:
1139 results = []
/home/solano/.local/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in _do_run(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)
1353 if handle is None:
1354 return self._do_call(_run_fn, self._session, feeds, fetches, targets,
-> 1355 options, run_metadata)
1356 else:
1357 return self._do_call(_prun_fn, self._session, handle, feeds, fetches)
/home/solano/.local/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in _do_call(self, fn, *args)
1372 except KeyError:
1373 pass
-> 1374 raise type(e)(node_def, op, message)
1375
1376 def _extend_graph(self):
NotFoundError: Key Variables_A/W_c1 not found in checkpoint
[[Node: save/RestoreV2 = RestoreV2[dtypes=[DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT], _device="/job:localhost/replica:0/task:0/device:CPU:0"](_arg_save/Const_0_0, save/RestoreV2/tensor_names, save/RestoreV2/shape_and_slices)]]
Caused by op u'save/RestoreV2', defined at:
File "/usr/lib/python2.7/runpy.py", line 174, in _run_module_as_main
"__main__", fname, loader, pkg_name)
File "/usr/lib/python2.7/runpy.py", line 72, in _run_code
exec code in run_globals
File "/home/solano/.local/lib/python2.7/site-packages/ipykernel_launcher.py", line 16, in <module>
app.launch_new_instance()
File "/home/solano/.local/lib/python2.7/site-packages/traitlets/config/application.py", line 658, in launch_instance
app.start()
File "/home/solano/.local/lib/python2.7/site-packages/ipykernel/kernelapp.py", line 486, in start
self.io_loop.start()
File "/home/solano/.local/lib/python2.7/site-packages/zmq/eventloop/ioloop.py", line 177, in start
super(ZMQIOLoop, self).start()
File "/home/solano/.local/lib/python2.7/site-packages/tornado/ioloop.py", line 888, in start
handler_func(fd_obj, events)
File "/home/solano/.local/lib/python2.7/site-packages/tornado/stack_context.py", line 277, in null_wrapper
return fn(*args, **kwargs)
File "/home/solano/.local/lib/python2.7/site-packages/zmq/eventloop/zmqstream.py", line 440, in _handle_events
self._handle_recv()
File "/home/solano/.local/lib/python2.7/site-packages/zmq/eventloop/zmqstream.py", line 472, in _handle_recv
self._run_callback(callback, msg)
File "/home/solano/.local/lib/python2.7/site-packages/zmq/eventloop/zmqstream.py", line 414, in _run_callback
callback(*args, **kwargs)
File "/home/solano/.local/lib/python2.7/site-packages/tornado/stack_context.py", line 277, in null_wrapper
return fn(*args, **kwargs)
File "/home/solano/.local/lib/python2.7/site-packages/ipykernel/kernelbase.py", line 283, in dispatcher
return self.dispatch_shell(stream, msg)
File "/home/solano/.local/lib/python2.7/site-packages/ipykernel/kernelbase.py", line 233, in dispatch_shell
handler(stream, idents, msg)
File "/home/solano/.local/lib/python2.7/site-packages/ipykernel/kernelbase.py", line 399, in execute_request
user_expressions, allow_stdin)
File "/home/solano/.local/lib/python2.7/site-packages/ipykernel/ipkernel.py", line 208, in do_execute
res = shell.run_cell(code, store_history=store_history, silent=silent)
File "/home/solano/.local/lib/python2.7/site-packages/ipykernel/zmqshell.py", line 537, in run_cell
return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
File "/home/solano/.local/lib/python2.7/site-packages/IPython/core/interactiveshell.py", line 2718, in run_cell
interactivity=interactivity, compiler=compiler, result=result)
File "/home/solano/.local/lib/python2.7/site-packages/IPython/core/interactiveshell.py", line 2822, in run_ast_nodes
if self.run_code(code, result):
File "/home/solano/.local/lib/python2.7/site-packages/IPython/core/interactiveshell.py", line 2882, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-22-4397b861df66>", line 5, in <module>
return_elements=["conv_A_2/output:0"], name="")
File "/home/solano/.local/lib/python2.7/site-packages/tensorflow/python/util/deprecation.py", line 432, in new_func
return func(*args, **kwargs)
File "/home/solano/.local/lib/python2.7/site-packages/tensorflow/python/framework/importer.py", line 553, in import_graph_def
op_def=op_def)
File "/home/solano/.local/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 3271, in create_op
op_def=op_def)
File "/home/solano/.local/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 1650, in __init__
self._traceback = self._graph._extract_stack() # pylint: disable=protected-access
NotFoundError (see above for traceback): Key Variables_A/W_c1 not found in checkpoint
[[Node: save/RestoreV2 = RestoreV2[dtypes=[DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT], _device="/job:localhost/replica:0/task:0/device:CPU:0"](_arg_save/Const_0_0, save/RestoreV2/tensor_names, save/RestoreV2/shape_and_slices)]]
Something really interesting is that, if I changed the order of merging, taking the first two layers from B and the third layer from A, the error comes when loading the weights of graph A, just the opposite.
CODE
Loading graph A.
tf.reset_default_graph()
graph_A = tf.Graph()
with graph_A.as_default():
saver_A = tf.train.import_meta_graph('./models/merge_graph_A.ckpt.meta')
graph_A_def = graph_A.as_graph_def()
Loading Graph B.
tf.reset_default_graph()
graph_B = tf.Graph()
with graph_B.as_default():
saver_B = tf.train.import_meta_graph('./models/merge_graph_B.ckpt.meta')
graph_B_def = graph_B.as_graph_def()
Merging the two graphs:
tf.reset_default_graph() # not necessary
with tf.Graph().as_default() as graphs_merged:
input_image = tf.placeholder(tf.float32, (None, None, None, 3), name='input')
bottleneck, = tf.import_graph_def(graph_A_def, input_map={"input_A:0": input_image},
return_elements=["conv_A_2/output:0"], name="")
# Notice how the input is the bottleneck.
output, = tf.import_graph_def(graph_B_def, input_map={"conv_B_2/output:0": bottleneck}, # HERE'S THE PROBLEM
return_elements=["conv_B_3/output:0"], name="")
Loading weights:
with tf.Session(graph=graphs_merged) as sess:
saver_A.restore(sess, './models/merge_graph_A.ckpt')
saver_B.restore(sess, './models/merge_graph_B.ckpt') # HERE'S THE PROBLEM
outcome = sess.run(output, feed_dict={
input_image: image_test
})
Thanks everybody!!!
EDIT: It's a problem of the saver. All checkpoints have been saved with the same prefix 'save'. This causes that, when merging, TensorFlow changes the prefix of the second graph to 'save_1', making impossible to the saver to find the variables it has to restore, trying instead to restore the variables using the operations prefixed with 'save'.
So the question changes: is there a way to change the name of this ops after importing the meta graph here: saver_A = tf.train.import_meta_graph('./models/merge_graph_A.ckpt.meta') ?
save/Const
save/SaveV2/tensor_names
save/SaveV2/shape_and_slices
save/SaveV2
save/control_dependency
save/RestoreV2/tensor_names
save/RestoreV2/shape_and_slices
save/RestoreV2
save/Assign
save/Assign_1
save/Assign_2
save/Assign_3
save/Assign_4
save/Assign_5
save/restore_all
save_1/Const
save_1/SaveV2/tensor_names
save_1/SaveV2/shape_and_slices
save_1/SaveV2
save_1/control_dependency
save_1/RestoreV2/tensor_names
save_1/RestoreV2/shape_and_slices
save_1/RestoreV2
save_1/Assign
save_1/Assign_1
save_1/Assign_2
save_1/Assign_3
save_1/Assign_4
save_1/Assign_5
save_1/restore_all
try to define different saver name when save the two checkpoint,then it works.
example:
save:
saver1 = tf.train.Saver(name="saver1")
saver1.save(sess, model_path1)
saver2 = tf.train.Saver(name="saver2")
saver2.save(sess, model_path2)
restore:
saver1 = tf.train.import_meta_graph(self.meta_path1)
saver2 = tf.train.import_meta_graph(self.meta_path2)
saver1.restore(sess, tf.train.latest_checkpoint(self.model_path1))
saver2.restore(sess, tf.train.latest_checkpoint(self.model_path2))
I was trying to adapt an NN architecture I saw in a blog post here: https://sorenbouma.github.io/blog/oneshot/
The only thing I was trying to change about it was the input, instead of (105,105,1) greyscale I was hoping to use (100,100,3) RGB. I thus used Keras to define the architecture as in the blog post but with different input:
def W_init(shape,name=None):
"""Initialize weights as in paper"""
values = rng.normal(loc=0,scale=1e-2,size=shape)
return K.variable(values,name=name)
#//TODO: figure out how to initialize layer biases in keras.
def b_init(shape,name=None):
"""Initialize bias as in paper"""
values=rng.normal(loc=0.5,scale=1e-2,size=shape)
return K.variable(values,name=name)
input_shape = (100, 100, 3)
left_input = Input(input_shape)
right_input = Input(input_shape)
#build convnet to use in each siamese 'leg'
convnet = Sequential()
convnet.add(Conv2D(64,(10,10),activation='relu',input_shape=input_shape,
kernel_initializer=W_init,kernel_regularizer=l2(2e-4)))
convnet.add(MaxPooling2D())
convnet.add(Conv2D(128,(7,7),activation='relu',
kernel_regularizer=l2(2e-4),kernel_initializer=W_init,bias_initializer=b_init))
convnet.add(MaxPooling2D())
convnet.add(Conv2D(128,(4,4),activation='relu',kernel_initializer=W_init,kernel_regularizer=l2(2e-4),bias_initializer=b_init))
convnet.add(MaxPooling2D())
convnet.add(Conv2D(256,(4,4),activation='relu',kernel_initializer=W_init,kernel_regularizer=l2(2e-4),bias_initializer=b_init))
convnet.add(Flatten())
convnet.add(Dense(4096,activation="sigmoid",kernel_regularizer=l2(1e-3),kernel_initializer=W_init,bias_initializer=b_init))
#encode each of the two inputs into a vector with the convnet
encoded_l = convnet(left_input)
encoded_r = convnet(right_input)
#merge two encoded inputs with the l1 distance between them
L1_distance = lambda x: K.abs(x[0]-x[1])
both = merge([encoded_l,encoded_r], mode = L1_distance, output_shape=lambda x: x[0])
prediction = Dense(1,activation='sigmoid',bias_initializer=b_init)(both)
siamese_net = Model(input=[left_input,right_input],output=prediction)
#optimizer = SGD(0.0004,momentum=0.6,nesterov=True,decay=0.0003)
optimizer = Adam(0.00006)
#//TODO: get layerwise learning rates and momentum annealing scheme described in paperworking
siamese_net.compile(loss="binary_crossentropy",optimizer=optimizer)
siamese_net.count_params()
I then train the network on my data as in the paper:
#Training loop
evaluate_every = 500
loss_every=50
batch_size = 20
N_way = 20
n_val = 250
#siamese_net.load_weights("/home/soren/keras-oneshot/weights")
max_epochs = 100
for i in range(0,max_epochs):
(inputs,targets)=loader.get_batch(batch_size)
loss=siamese_net.train_on_batch(inputs,targets)
if i % evaluate_every == 0:
val_acc = loader.test_oneshot(siamese_net,N_way,n_val,verbose=True)
if val_acc >= best:
print("saving")
siamese_net.save('/home/soren/keras-oneshot/weights')
best=val_acc
if i % loss_every == 0:
print("iteration {}, training loss: {:.2f},".format(i,loss))
But I get
FailedPreconditionError: Attempting to use uninitialized value conv2d_1/Variable
[[Node: conv2d_1/Variable/read = Identity[T=DT_FLOAT, _class=["loc:#conv2d_1/Variable"], _device="/job:localhost/replica:0/task:0/cpu:0"](conv2d_1/Variable)]]
Here is full error output:
---------------------------------------------------------------------------
FailedPreconditionError Traceback (most recent call last)
/usr/local/lib/python3.4/dist-packages/tensorflow/python/client/session.py in _do_call(self, fn, *args)
1138 try:
-> 1139 return fn(*args)
1140 except errors.OpError as e:
/usr/local/lib/python3.4/dist-packages/tensorflow/python/client/session.py in _run_fn(session, feed_dict, fetch_list, target_list, options, run_metadata)
1120 feed_dict, fetch_list, target_list,
-> 1121 status, run_metadata)
1122
/usr/lib/python3.4/contextlib.py in __exit__(self, type, value, traceback)
65 try:
---> 66 next(self.gen)
67 except StopIteration:
/usr/local/lib/python3.4/dist-packages/tensorflow/python/framework/errors_impl.py in raise_exception_on_not_ok_status()
465 compat.as_text(pywrap_tensorflow.TF_Message(status)),
--> 466 pywrap_tensorflow.TF_GetCode(status))
467 finally:
FailedPreconditionError: Attempting to use uninitialized value conv2d_1/Variable
[[Node: conv2d_1/Variable/read = Identity[T=DT_FLOAT, _class=["loc:#conv2d_1/Variable"], _device="/job:localhost/replica:0/task:0/cpu:0"](conv2d_1/Variable)]]
During handling of the above exception, another exception occurred:
FailedPreconditionError Traceback (most recent call last)
<ipython-input-15-06f79f757a6e> in <module>()
9 for i in range(0,max_epochs):
10 (inputs,targets)=loader.get_batch(batch_size)
---> 11 loss=siamese_net.train_on_batch(inputs,targets)
12 if i % evaluate_every == 0:
13 val_acc = loader.test_oneshot(siamese_net,N_way,n_val,verbose=True)
/usr/local/lib/python3.4/dist-packages/keras/engine/training.py in train_on_batch(self, x, y, sample_weight, class_weight)
1563 ins = x + y + sample_weights
1564 self._make_train_function()
-> 1565 outputs = self.train_function(ins)
1566 if len(outputs) == 1:
1567 return outputs[0]
/usr/local/lib/python3.4/dist-packages/keras/backend/tensorflow_backend.py in __call__(self, inputs)
2263 value = (indices, sparse_coo.data, sparse_coo.shape)
2264 feed_dict[tensor] = value
-> 2265 session = get_session()
2266 updated = session.run(self.outputs + [self.updates_op],
2267 feed_dict=feed_dict,
/usr/local/lib/python3.4/dist-packages/keras/backend/tensorflow_backend.py in get_session()
166 if not _MANUAL_VAR_INIT:
167 with session.graph.as_default():
--> 168 _initialize_variables()
169 return session
170
/usr/local/lib/python3.4/dist-packages/keras/backend/tensorflow_backend.py in _initialize_variables()
339 if uninitialized_variables:
340 sess = get_session()
--> 341 sess.run(tf.variables_initializer(uninitialized_variables))
342
343
/usr/local/lib/python3.4/dist-packages/tensorflow/python/client/session.py in run(self, fetches, feed_dict, options, run_metadata)
787 try:
788 result = self._run(None, fetches, feed_dict, options_ptr,
--> 789 run_metadata_ptr)
790 if run_metadata:
791 proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)
/usr/local/lib/python3.4/dist-packages/tensorflow/python/client/session.py in _run(self, handle, fetches, feed_dict, options, run_metadata)
995 if final_fetches or final_targets:
996 results = self._do_run(handle, final_targets, final_fetches,
--> 997 feed_dict_string, options, run_metadata)
998 else:
999 results = []
/usr/local/lib/python3.4/dist-packages/tensorflow/python/client/session.py in _do_run(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)
1130 if handle is None:
1131 return self._do_call(_run_fn, self._session, feed_dict, fetch_list,
-> 1132 target_list, options, run_metadata)
1133 else:
1134 return self._do_call(_prun_fn, self._session, handle, feed_dict,
/usr/local/lib/python3.4/dist-packages/tensorflow/python/client/session.py in _do_call(self, fn, *args)
1150 except KeyError:
1151 pass
-> 1152 raise type(e)(node_def, op, message)
1153
1154 def _extend_graph(self):
FailedPreconditionError: Attempting to use uninitialized value conv2d_1/Variable
[[Node: conv2d_1/Variable/read = Identity[T=DT_FLOAT, _class=["loc:#conv2d_1/Variable"], _device="/job:localhost/replica:0/task:0/cpu:0"](conv2d_1/Variable)]]
Caused by op 'conv2d_1/Variable/read', defined at:
File "/usr/lib/python3.4/runpy.py", line 170, in _run_module_as_main
"__main__", mod_spec)
File "/usr/lib/python3.4/runpy.py", line 85, in _run_code
exec(code, run_globals)
File "/usr/local/lib/python3.4/dist-packages/ipykernel_launcher.py", line 16, in <module>
app.launch_new_instance()
File "/usr/local/lib/python3.4/dist-packages/traitlets/config/application.py", line 658, in launch_instance
app.start()
File "/usr/local/lib/python3.4/dist-packages/ipykernel/kernelapp.py", line 477, in start
ioloop.IOLoop.instance().start()
File "/usr/local/lib/python3.4/dist-packages/zmq/eventloop/ioloop.py", line 177, in start
super(ZMQIOLoop, self).start()
File "/usr/local/lib/python3.4/dist-packages/tornado/ioloop.py", line 888, in start
handler_func(fd_obj, events)
File "/usr/local/lib/python3.4/dist-packages/tornado/stack_context.py", line 277, in null_wrapper
return fn(*args, **kwargs)
File "/usr/local/lib/python3.4/dist-packages/zmq/eventloop/zmqstream.py", line 440, in _handle_events
self._handle_recv()
File "/usr/local/lib/python3.4/dist-packages/zmq/eventloop/zmqstream.py", line 472, in _handle_recv
self._run_callback(callback, msg)
File "/usr/local/lib/python3.4/dist-packages/zmq/eventloop/zmqstream.py", line 414, in _run_callback
callback(*args, **kwargs)
File "/usr/local/lib/python3.4/dist-packages/tornado/stack_context.py", line 277, in null_wrapper
return fn(*args, **kwargs)
File "/usr/local/lib/python3.4/dist-packages/ipykernel/kernelbase.py", line 283, in dispatcher
return self.dispatch_shell(stream, msg)
File "/usr/local/lib/python3.4/dist-packages/ipykernel/kernelbase.py", line 235, in dispatch_shell
handler(stream, idents, msg)
File "/usr/local/lib/python3.4/dist-packages/ipykernel/kernelbase.py", line 399, in execute_request
user_expressions, allow_stdin)
File "/usr/local/lib/python3.4/dist-packages/ipykernel/ipkernel.py", line 196, in do_execute
res = shell.run_cell(code, store_history=store_history, silent=silent)
File "/usr/local/lib/python3.4/dist-packages/ipykernel/zmqshell.py", line 533, in run_cell
return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
File "/usr/local/lib/python3.4/dist-packages/IPython/core/interactiveshell.py", line 2698, in run_cell
interactivity=interactivity, compiler=compiler, result=result)
File "/usr/local/lib/python3.4/dist-packages/IPython/core/interactiveshell.py", line 2802, in run_ast_nodes
if self.run_code(code, result):
File "/usr/local/lib/python3.4/dist-packages/IPython/core/interactiveshell.py", line 2862, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-2-51595f796dab>", line 17, in <module>
kernel_initializer=W_init,kernel_regularizer=l2(2e-4)))
File "/usr/local/lib/python3.4/dist-packages/keras/models.py", line 436, in add
layer(x)
File "/usr/local/lib/python3.4/dist-packages/keras/engine/topology.py", line 569, in __call__
self.build(input_shapes[0])
File "/usr/local/lib/python3.4/dist-packages/keras/layers/convolutional.py", line 134, in build
constraint=self.kernel_constraint)
File "/usr/local/lib/python3.4/dist-packages/keras/legacy/interfaces.py", line 87, in wrapper
return func(*args, **kwargs)
File "/usr/local/lib/python3.4/dist-packages/keras/engine/topology.py", line 391, in add_weight
weight = K.variable(initializer(shape), dtype=dtype, name=name)
File "<ipython-input-2-51595f796dab>", line 4, in W_init
return K.variable(values,name=name)
File "/usr/local/lib/python3.4/dist-packages/keras/backend/tensorflow_backend.py", line 321, in variable
v = tf.Variable(value, dtype=_convert_string_dtype(dtype), name=name)
File "/usr/local/lib/python3.4/dist-packages/tensorflow/python/ops/variables.py", line 200, in __init__
expected_shape=expected_shape)
File "/usr/local/lib/python3.4/dist-packages/tensorflow/python/ops/variables.py", line 319, in _init_from_args
self._snapshot = array_ops.identity(self._variable, name="read")
File "/usr/local/lib/python3.4/dist-packages/tensorflow/python/ops/gen_array_ops.py", line 1303, in identity
result = _op_def_lib.apply_op("Identity", input=input, name=name)
File "/usr/local/lib/python3.4/dist-packages/tensorflow/python/framework/op_def_library.py", line 767, in apply_op
op_def=op_def)
File "/usr/local/lib/python3.4/dist-packages/tensorflow/python/framework/ops.py", line 2506, in create_op
original_op=self._default_original_op, op_def=op_def)
File "/usr/local/lib/python3.4/dist-packages/tensorflow/python/framework/ops.py", line 1269, in __init__
self._traceback = _extract_stack()
FailedPreconditionError (see above for traceback): Attempting to use uninitialized value conv2d_1/Variable
[[Node: conv2d_1/Variable/read = Identity[T=DT_FLOAT, _class=["loc:#conv2d_1/Variable"], _device="/job:localhost/replica:0/task:0/cpu:0"](conv2d_1/Variable)]]
Googlign about the error did not really make things clear. I saw some posts about the error coming up when using Tensorflow, but nothing regarding this erro and Keras, so I am a little bit confused about what is going on.
I experienced the same error a few days ago.
The cause of the error is weight initialization.
you try to change the follow code.
(before)
convnet.add(Conv2D(64,(10,10),activation='relu',input_shape=input_shape,
kernel_initializer=W_init,kernel_regularizer=l2(2e-4)))
(after)
convnet.add(Conv2D(64,(10,10),activation='relu',input_shape=input_shape,
kernel_initializer=keras.initializers.RandomNormal(mean=0.0,
stddev=1e-2, seed=None),kernel_regularizer=l2(2e-4)))
Please change from all W_init and b_init to keras.initializers.RandomNormal(...).
I am trying to save model and restore in other file using tensorflow. I use this code for train and save model.
import input_data
import os
import tensorflow as tf
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
x = tf.placeholder("float", shape=[None, 784])
W = tf.Variable(tf.zeros([784,10]))
b = tf.Variable(tf.zeros([10]))
y = tf.nn.softmax(tf.matmul(x,W) + b)
y_ = tf.placeholder("float", shape=[None, 10])
cross_entropy = -tf.reduce_sum(y_*tf.log(y))
train_step = tf.train.GradientDescentOptimizer(0.01).minimize(cross_entropy)
saver = tf.train.Saver()
# train data and get results for batches
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)
# train the data
for i in range(10):
batch_xs, batch_ys = mnist.train.next_batch(100)
sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})
print(batch_xs)
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
print ("accuracy", sess.run(accuracy, feed_dict={x: mnist.test.images, y_: mnist.test.labels}))
prediction=tf.argmax(y,1)
arr=prediction.eval(feed_dict={x: mnist.test.images}, session=sess)
#print ("predictions", )
#for i in range(len(arr)):
#print(arr[i])
save_path = saver.save(sess, '/model.ckpt')
print ('Model saved in file: ', save_path)
and try to restore using this code.
import input_data
import os
import tensorflow as tf
#mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
x = tf.placeholder("float", shape=[None, 784])
W = tf.Variable(tf.zeros([784,10]))
b = tf.Variable(tf.zeros([10]))
y = tf.nn.softmax(tf.matmul(x,W) + b)
y_ = tf.placeholder("float", shape=[None, 10])
cross_entropy = -tf.reduce_sum(y_*tf.log(y))
train_step = tf.train.GradientDescentOptimizer(0.01).minimize(cross_entropy)
init_op = tf.global_variables_initializer()
saver = tf.train.Saver()
tf.train.NewCheckpointReader("./model.ckpt")
with tf.Session() as sess:
sess.run(init_op)
#print("sess.run")
saver.restore(sess, "./model.ckpt")
print ("Model restored.")
In saver.restore(sess, "./model.ckpt") line I have a NotFoundError. The error look like this
---------------------------------------------------------------------------
NotFoundError Traceback (most recent call last)
C:\Anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\client\session.py in _do_call(self, fn, *args)
1020 try:
-> 1021 return fn(*args)
1022 except errors.OpError as e:
C:\Anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\client\session.py in _run_fn(session, feed_dict, fetch_list, target_list, options, run_metadata)
1002 feed_dict, fetch_list, target_list,
-> 1003 status, run_metadata)
1004
C:\Anaconda3\envs\tensorflow\lib\contextlib.py in __exit__(self, type, value, traceback)
65 try:
---> 66 next(self.gen)
67 except StopIteration:
C:\Anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\framework\errors_impl.py in raise_exception_on_not_ok_status()
468 compat.as_text(pywrap_tensorflow.TF_Message(status)),
--> 469 pywrap_tensorflow.TF_GetCode(status))
470 finally:
NotFoundError: Key y_3 not found in checkpoint
[[Node: save_16/RestoreV2_47 = RestoreV2[dtypes=[DT_FLOAT], _device="/job:localhost/replica:0/task:0/cpu:0"](_recv_save_16/Const_0, save_16/RestoreV2_47/tensor_names, save_16/RestoreV2_47/shape_and_slices)]]
During handling of the above exception, another exception occurred:
NotFoundError Traceback (most recent call last)
<ipython-input-42-17503962c118> in <module>()
17 sess.run(init_op)
18 #print("sess.run")
---> 19 saver.restore(sess, "./model.ckpt")
20 print ("Model restored.")
C:\Anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\training\saver.py in restore(self, sess, save_path)
1386 return
1387 sess.run(self.saver_def.restore_op_name,
-> 1388 {self.saver_def.filename_tensor_name: save_path})
1389
1390 #staticmethod
C:\Anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\client\session.py in run(self, fetches, feed_dict, options, run_metadata)
764 try:
765 result = self._run(None, fetches, feed_dict, options_ptr,
--> 766 run_metadata_ptr)
767 if run_metadata:
768 proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)
C:\Anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\client\session.py in _run(self, handle, fetches, feed_dict, options, run_metadata)
962 if final_fetches or final_targets:
963 results = self._do_run(handle, final_targets, final_fetches,
--> 964 feed_dict_string, options, run_metadata)
965 else:
966 results = []
C:\Anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\client\session.py in _do_run(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)
1012 if handle is None:
1013 return self._do_call(_run_fn, self._session, feed_dict, fetch_list,
-> 1014 target_list, options, run_metadata)
1015 else:
1016 return self._do_call(_prun_fn, self._session, handle, feed_dict,
C:\Anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\client\session.py in _do_call(self, fn, *args)
1032 except KeyError:
1033 pass
-> 1034 raise type(e)(node_def, op, message)
1035
1036 def _extend_graph(self):
NotFoundError: Key y_3 not found in checkpoint
[[Node: save_16/RestoreV2_47 = RestoreV2[dtypes=[DT_FLOAT], _device="/job:localhost/replica:0/task:0/cpu:0"](_recv_save_16/Const_0, save_16/RestoreV2_47/tensor_names, save_16/RestoreV2_47/shape_and_slices)]]
Caused by op 'save_16/RestoreV2_47', defined at:
File "C:\Anaconda3\envs\tensorflow\lib\runpy.py", line 184, in _run_module_as_main
"__main__", mod_spec)
File "C:\Anaconda3\envs\tensorflow\lib\runpy.py", line 85, in _run_code
exec(code, run_globals)
File "C:\Anaconda3\envs\tensorflow\lib\site-packages\ipykernel\__main__.py", line 3, in <module>
app.launch_new_instance()
File "C:\Anaconda3\envs\tensorflow\lib\site-packages\traitlets\config\application.py", line 658, in launch_instance
app.start()
File "C:\Anaconda3\envs\tensorflow\lib\site-packages\ipykernel\kernelapp.py", line 474, in start
ioloop.IOLoop.instance().start()
File "C:\Anaconda3\envs\tensorflow\lib\site-packages\zmq\eventloop\ioloop.py", line 177, in start
super(ZMQIOLoop, self).start()
File "C:\Anaconda3\envs\tensorflow\lib\site-packages\tornado\ioloop.py", line 887, in start
handler_func(fd_obj, events)
File "C:\Anaconda3\envs\tensorflow\lib\site-packages\tornado\stack_context.py", line 275, in null_wrapper
return fn(*args, **kwargs)
File "C:\Anaconda3\envs\tensorflow\lib\site-packages\zmq\eventloop\zmqstream.py", line 440, in _handle_events
self._handle_recv()
File "C:\Anaconda3\envs\tensorflow\lib\site-packages\zmq\eventloop\zmqstream.py", line 472, in _handle_recv
self._run_callback(callback, msg)
File "C:\Anaconda3\envs\tensorflow\lib\site-packages\zmq\eventloop\zmqstream.py", line 414, in _run_callback
callback(*args, **kwargs)
File "C:\Anaconda3\envs\tensorflow\lib\site-packages\tornado\stack_context.py", line 275, in null_wrapper
return fn(*args, **kwargs)
File "C:\Anaconda3\envs\tensorflow\lib\site-packages\ipykernel\kernelbase.py", line 276, in dispatcher
return self.dispatch_shell(stream, msg)
File "C:\Anaconda3\envs\tensorflow\lib\site-packages\ipykernel\kernelbase.py", line 228, in dispatch_shell
handler(stream, idents, msg)
File "C:\Anaconda3\envs\tensorflow\lib\site-packages\ipykernel\kernelbase.py", line 390, in execute_request
user_expressions, allow_stdin)
File "C:\Anaconda3\envs\tensorflow\lib\site-packages\ipykernel\ipkernel.py", line 196, in do_execute
res = shell.run_cell(code, store_history=store_history, silent=silent)
File "C:\Anaconda3\envs\tensorflow\lib\site-packages\ipykernel\zmqshell.py", line 501, in run_cell
return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
File "C:\Anaconda3\envs\tensorflow\lib\site-packages\IPython\core\interactiveshell.py", line 2717, in run_cell
interactivity=interactivity, compiler=compiler, result=result)
File "C:\Anaconda3\envs\tensorflow\lib\site-packages\IPython\core\interactiveshell.py", line 2821, in run_ast_nodes
if self.run_code(code, result):
File "C:\Anaconda3\envs\tensorflow\lib\site-packages\IPython\core\interactiveshell.py", line 2881, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-42-17503962c118>", line 14, in <module>
saver = tf.train.Saver()
File "C:\Anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\training\saver.py", line 1000, in __init__
self.build()
File "C:\Anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\training\saver.py", line 1030, in build
restore_sequentially=self._restore_sequentially)
File "C:\Anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\training\saver.py", line 624, in build
restore_sequentially, reshape)
File "C:\Anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\training\saver.py", line 361, in _AddRestoreOps
tensors = self.restore_op(filename_tensor, saveable, preferred_shard)
File "C:\Anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\training\saver.py", line 200, in restore_op
[spec.tensor.dtype])[0])
File "C:\Anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\ops\gen_io_ops.py", line 441, in restore_v2
dtypes=dtypes, name=name)
File "C:\Anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 759, in apply_op
op_def=op_def)
File "C:\Anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\framework\ops.py", line 2240, in create_op
original_op=self._default_original_op, op_def=op_def)
File "C:\Anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\framework\ops.py", line 1128, in __init__
self._traceback = _extract_stack()
NotFoundError (see above for traceback): Key y_3 not found in checkpoint
[[Node: save_16/RestoreV2_47 = RestoreV2[dtypes=[DT_FLOAT], _device="/job:localhost/replica:0/task:0/cpu:0"](_recv_save_16/Const_0, save_16/RestoreV2_47/tensor_names, save_16/RestoreV2_47/shape_and_slices)]]
What is the real problem here?
It looks like you are saving in your "/" directory and loading from "./".
In my opinion, you should give the tf.Variable w and b a name. For example,
W = tf.Variable(tf.zeros([784,10]),name="w_var")
b = tf.Variable(tf.zeros([10]),name="b_var")
Good luck to you!