NotFoundError when restoring tensorflow session - python

Here is the code:
import tensorflow as tf
def save(checkpoint_file='hello.chk'):
with tf.Session() as session:
x = tf.Variable(initial_value=[1, 2, 3], name="x")
y = tf.Variable(initial_value=[[1.0, 2.0], [3.0, 4.0]], name="y")
not_saved = tf.Variable(initial_value=[[11.0, 2.0], [3.0, 4.0]], name="not_saved")
session.run(tf.global_variables_initializer())
print(session.run(tf.global_variables()))
saver = tf.train.Saver([x, y])
saver.save(session, checkpoint_file)
print(session.run(tf.global_variables()))
print("saved!!!!!!!!!!")
def restore(checkpoint_file='hello.chk'):
with tf.Session() as session:
saver = tf.train.Saver()
saver.restore(sess=session, save_path=checkpoint_file)
print(session.run(tf.global_variables()))
def reset():
tf.reset_default_graph()
save()
restore("/home/kaiyin/PycharmProjects/text-classify/hello.chk")
I am just trying to save and restore some simple variables here, nothing complicated. The saving part seems fine, but I got the following errors when restoring:
Traceback (most recent call last):
File "/home/kaiyin/virtualenvs/tensorflow/lib/python3.5/site-packages/tensorflow/python/client/session.py", line 1022, in _do_call
return fn(*args)
File "/home/kaiyin/virtualenvs/tensorflow/lib/python3.5/site-packages/tensorflow/python/client/session.py", line 1004, in _run_fn
status, run_metadata)
File "/usr/lib/python3.5/contextlib.py", line 66, in __exit__
next(self.gen)
File "/home/kaiyin/virtualenvs/tensorflow/lib/python3.5/site-packages/tensorflow/python/framework/errors_impl.py", line 466, in raise_exception_on_not_ok_status
pywrap_tensorflow.TF_GetCode(status))
tensorflow.python.framework.errors_impl.NotFoundError: Key not_saved not found in checkpoint
[[Node: save_1/RestoreV2 = RestoreV2[dtypes=[DT_FLOAT], _device="/job:localhost/replica:0/task:0/cpu:0"](_recv_save_1/Const_0, save_1/RestoreV2/tensor_names, save_1/RestoreV2/shape_and_slices)]]
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/kaiyin/PycharmProjects/text-classify/restore.py", line 25, in <module>
restore("/home/kaiyin/PycharmProjects/text-classify/hello.chk")
File "/home/kaiyin/PycharmProjects/text-classify/restore.py", line 18, in restore
saver.restore(sess=session, save_path=checkpoint_file)
File "/home/kaiyin/virtualenvs/tensorflow/lib/python3.5/site-packages/tensorflow/python/training/saver.py", line 1428, in restore
{self.saver_def.filename_tensor_name: save_path})
File "/home/kaiyin/virtualenvs/tensorflow/lib/python3.5/site-packages/tensorflow/python/client/session.py", line 767, in run
run_metadata_ptr)
File "/home/kaiyin/virtualenvs/tensorflow/lib/python3.5/site-packages/tensorflow/python/client/session.py", line 965, in _run
feed_dict_string, options, run_metadata)
File "/home/kaiyin/virtualenvs/tensorflow/lib/python3.5/site-packages/tensorflow/python/client/session.py", line 1015, in _do_run
target_list, options, run_metadata)
File "/home/kaiyin/virtualenvs/tensorflow/lib/python3.5/site-packages/tensorflow/python/client/session.py", line 1035, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.NotFoundError: Key not_saved not found in checkpoint
[[Node: save_1/RestoreV2 = RestoreV2[dtypes=[DT_FLOAT], _device="/job:localhost/replica:0/task:0/cpu:0"](_recv_save_1/Const_0, save_1/RestoreV2/tensor_names, save_1/RestoreV2/shape_and_slices)]]
Caused by op 'save_1/RestoreV2', defined at:
File "/home/kaiyin/PycharmProjects/text-classify/restore.py", line 25, in <module>
restore("/home/kaiyin/PycharmProjects/text-classify/hello.chk")
File "/home/kaiyin/PycharmProjects/text-classify/restore.py", line 17, in restore
saver = tf.train.Saver()
File "/home/kaiyin/virtualenvs/tensorflow/lib/python3.5/site-packages/tensorflow/python/training/saver.py", line 1040, in __init__
self.build()
File "/home/kaiyin/virtualenvs/tensorflow/lib/python3.5/site-packages/tensorflow/python/training/saver.py", line 1070, in build
restore_sequentially=self._restore_sequentially)
File "/home/kaiyin/virtualenvs/tensorflow/lib/python3.5/site-packages/tensorflow/python/training/saver.py", line 675, in build
restore_sequentially, reshape)
File "/home/kaiyin/virtualenvs/tensorflow/lib/python3.5/site-packages/tensorflow/python/training/saver.py", line 402, in _AddRestoreOps
tensors = self.restore_op(filename_tensor, saveable, preferred_shard)
File "/home/kaiyin/virtualenvs/tensorflow/lib/python3.5/site-packages/tensorflow/python/training/saver.py", line 242, in restore_op
[spec.tensor.dtype])[0])
File "/home/kaiyin/virtualenvs/tensorflow/lib/python3.5/site-packages/tensorflow/python/ops/gen_io_ops.py", line 668, in restore_v2
dtypes=dtypes, name=name)
File "/home/kaiyin/virtualenvs/tensorflow/lib/python3.5/site-packages/tensorflow/python/framework/op_def_library.py", line 763, in apply_op
op_def=op_def)
File "/home/kaiyin/virtualenvs/tensorflow/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 2327, in create_op
original_op=self._default_original_op, op_def=op_def)
File "/home/kaiyin/virtualenvs/tensorflow/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 1226, in __init__
self._traceback = _extract_stack()
NotFoundError (see above for traceback): Key not_saved not found in checkpoint
[[Node: save_1/RestoreV2 = RestoreV2[dtypes=[DT_FLOAT], _device="/job:localhost/replica:0/task:0/cpu:0"](_recv_save_1/Const_0, save_1/RestoreV2/tensor_names, save_1/RestoreV2/shape_and_slices)]]
Process finished with exit code 1
Tensorflow version:
>>> print(tf.__version__)
1.0.1

Deleting the list of vars in tf.train.Saver() somehow solves the problem. Here is the working code:
import tensorflow as tf
filepath = "/home/kaiyin/PycharmProjects/text-classify/hello.chk"
def save(checkpoint_file=filepath):
with tf.Session() as session:
x = tf.Variable(initial_value=[1, 2, 3], name="x")
y = tf.Variable(initial_value=[[1.0, 2.0], [3.0, 4.0]], name="y")
not_saved = tf.Variable(initial_value=[[11.0, 2.0], [3.0, 4.0]], name="not_saved")
session.run(tf.global_variables_initializer())
print(session.run(tf.global_variables()))
saver = tf.train.Saver()
saver.save(session, checkpoint_file)
print(session.run(tf.global_variables()))
print("saved!!!!!!!!!!")
def restore(checkpoint_file='hello.chk'):
with tf.Session() as session:
saver = tf.train.Saver()
saver.restore(sess=session, save_path=checkpoint_file)
print(session.run(tf.global_variables()[0]))
print(session.run(x))
def reset():
tf.reset_default_graph()
save()
restore(filepath)

Related

tf.keras - Importing model with batchnormalization layers

I've gotten stuck on this issue for a little while. I'm trying to run the code below with the tf_cnnvis (https://github.com/InFoCusp/tf_cnnvis) package for visualising learnt features in the network, where I import my protobuf model and then try and provide it a tensor containing some image data (which I believe is provided as a feed_dict, although I could be mistaken).
import numpy as np
import tensorflow as tf
import keras as k
import cv2
import tf_cnnvis as tfv
from tensorflow.python.platform import gfile
from keras import backend as K
model_filename = "saved_model.pb"
image = "test.jpg"
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.8, allow_growth=False)
sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
K.set_session(sess)
K._LEARNING_PHASE = tf.constant(0)
K.set_learning_phase(0)
with gfile.FastGFile(model_filename, 'rb') as f:
graph_def = tf.GraphDef()
graph_def.ParseFromString(f.read())
tf.import_graph_def(graph_def)
X = tf.placeholder(tf.float32, shape = [None, 48, 64, 3],name = "input") # placeholder for input images
y = tf.placeholder(tf.float32, shape = [None, 8])
im = np.array(cv2.imread(image))
im = np.expand_dims(im, 0)
layers = ['r', 'p', 'c']
init_op = init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
sess.run(init_op)
with sess.as_default():
is_success = tfv.activation_visualization(sess_graph_path=tf.get_default_graph(), value_feed_dict = {X : im}, layers=layers)
sess.close()
When I run my code, I get an "InvalidArgumentError" with this traceback:
Traceback (most recent call last):
File "/usr/local/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1292, in _do_call
return fn(*args)
File "/usr/local/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1277, in _run_fn
options, feed_dict, fetch_list, target_list, run_metadata)
File "/usr/local/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1367, in _call_tf_sessionrun
run_metadata)
tensorflow.python.framework.errors_impl.InvalidArgumentError: You must feed a value for placeholder tensor 'import/batch_normalization_1_input' with dtype float and shape [?,48,64,3]
[[{{node import/batch_normalization_1_input}} = Placeholder[_class=["loc:#import/batch_normalization/cond/FusedBatchNorm_1/Switch"], dtype=DT_FLOAT, shape=[?,48,64,3], _device="/job:localhost/replica:0/task:0/device:GPU:0"]()]]
[[{{node import/conv2d/Relu/_5}} = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_50_import/conv2d/Relu", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "vis2.py", line 36, in <module>
is_success = tfv.activation_visualization(sess_graph_path=tf.get_default_graph(), value_feed_dict = {X : im}, layers=layers)
File "/usr/local/anaconda3/lib/python3.6/site-packages/tf_cnnvis-1.0.0-py3.6.egg/tf_cnnvis/tf_cnnvis.py", line 406, in activation_visualization
File "/usr/local/anaconda3/lib/python3.6/site-packages/tf_cnnvis-1.0.0-py3.6.egg/tf_cnnvis/tf_cnnvis.py", line 169, in _get_visualization
File "/usr/local/anaconda3/lib/python3.6/site-packages/tf_cnnvis-1.0.0-py3.6.egg/tf_cnnvis/tf_cnnvis.py", line 227, in _visualization_by_layer_type
File "/usr/local/anaconda3/lib/python3.6/site-packages/tf_cnnvis-1.0.0-py3.6.egg/tf_cnnvis/tf_cnnvis.py", line 288, in _visualization_by_layer_name
File "/usr/local/anaconda3/lib/python3.6/site-packages/tf_cnnvis-1.0.0-py3.6.egg/tf_cnnvis/tf_cnnvis.py", line 315, in _activation
File "/usr/local/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 887, in run
run_metadata_ptr)
File "/usr/local/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1110, in _run
feed_dict_tensor, options, run_metadata)
File "/usr/local/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1286, in _do_run
run_metadata)
File "/usr/local/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1308, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InvalidArgumentError: You must feed a value for placeholder tensor 'import/batch_normalization_1_input' with dtype float and shape [?,48,64,3]
[[{{node import/batch_normalization_1_input}} = Placeholder[_class=["loc:#import/batch_normalization/cond/FusedBatchNorm_1/Switch"], dtype=DT_FLOAT, shape=[?,48,64,3], _device="/job:localhost/replica:0/task:0/device:GPU:0"]()]]
[[{{node import/conv2d/Relu/_5}} = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_50_import/conv2d/Relu", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]
Caused by op 'import/batch_normalization_1_input', defined at:
File "vis2.py", line 36, in <module>
is_success = tfv.activation_visualization(sess_graph_path=tf.get_default_graph(), value_feed_dict = {X : im}, layers=layers)
File "/usr/local/anaconda3/lib/python3.6/site-packages/tf_cnnvis-1.0.0-py3.6.egg/tf_cnnvis/tf_cnnvis.py", line 406, in activation_visualization
path_logdir = path_logdir, path_outdir = path_outdir)
File "/usr/local/anaconda3/lib/python3.6/site-packages/tf_cnnvis-1.0.0-py3.6.egg/tf_cnnvis/tf_cnnvis.py", line 159, in _get_visualization
s = _graph_import_function(PATH,s)
File "/usr/local/anaconda3/lib/python3.6/site-packages/tf_cnnvis-1.0.0-py3.6.egg/tf_cnnvis/tf_cnnvis.py", line 177, in _graph_import_function
new_saver = tf.train.import_meta_graph(PATH) # Import graph
File "/usr/local/anaconda3/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 1650, in import_meta_graph
meta_graph_or_file, clear_devices, import_scope, **kwargs)[0]
File "/usr/local/anaconda3/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 1672, in _import_meta_graph_with_return_elements
**kwargs))
File "/usr/local/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/meta_graph.py", line 806, in import_scoped_meta_graph_with_return_elements
return_elements=return_elements)
File "/usr/local/anaconda3/lib/python3.6/site-packages/tensorflow/python/util/deprecation.py", line 488, in new_func
return func(*args, **kwargs)
File "/usr/local/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/importer.py", line 442, in import_graph_def
_ProcessNewOps(graph)
File "/usr/local/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/importer.py", line 234, in _ProcessNewOps
for new_op in graph._add_new_tf_operations(compute_devices=False): # pylint: disable=protected-access
File "/usr/local/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3426, in _add_new_tf_operations
for c_op in c_api_util.new_tf_operations(self)
File "/usr/local/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3426, in <listcomp>
for c_op in c_api_util.new_tf_operations(self)
File "/usr/local/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3285, in _create_op_from_tf_operation
ret = Operation(c_op, self)
File "/usr/local/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1748, in __init__
self._traceback = tf_stack.extract_stack()
InvalidArgumentError (see above for traceback): You must feed a value for placeholder tensor 'import/batch_normalization_1_input' with dtype float and shape [?,48,64,3]
[[{{node import/batch_normalization_1_input}} = Placeholder[_class=["loc:#import/batch_normalization/cond/FusedBatchNorm_1/Switch"], dtype=DT_FLOAT, shape=[?,48,64,3], _device="/job:localhost/replica:0/task:0/device:GPU:0"]()]]
[[{{node import/conv2d/Relu/_5}} = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_50_import/conv2d/Relu", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]
Now, I've looked around and I've arrived (tentatively) at the conclusion that this is due to a learning phase variable that's set in the BatchNormalization layer that I have in the model. I'm unclear as to how to set the learning phase when you've imported the model. Some people set the learning phase before initializing the model (which as you can see, I have attempted), but in most examples of this they're using one of the large, pre-provided models (such as MNIST). Others provide the learning phase in the feed_dict, which I have also tried, like so:
with sess.as_default():
is_success = tfv.activation_visualization(sess_graph_path=tf.get_default_graph(), value_feed_dict = {X : im, K.learning_phase(): 0}, layers=layers)
But this gives me a different error message:
Traceback (most recent call last):
File "vis2.py", line 36, in <module>
is_success = tfv.activation_visualization(sess_graph_path=tf.get_default_graph(), value_feed_dict = {X : im, K.learning_phase(): 0}, layers=layers)
File "/usr/local/anaconda3/lib/python3.6/site-packages/tf_cnnvis-1.0.0-py3.6.egg/tf_cnnvis/tf_cnnvis.py", line 406, in activation_visualization
File "/usr/local/anaconda3/lib/python3.6/site-packages/tf_cnnvis-1.0.0-py3.6.egg/tf_cnnvis/tf_cnnvis.py", line 169, in _get_visualization
File "/usr/local/anaconda3/lib/python3.6/site-packages/tf_cnnvis-1.0.0-py3.6.egg/tf_cnnvis/tf_cnnvis.py", line 227, in _visualization_by_layer_type
File "/usr/local/anaconda3/lib/python3.6/site-packages/tf_cnnvis-1.0.0-py3.6.egg/tf_cnnvis/tf_cnnvis.py", line 270, in _visualization_by_layer_name
File "/usr/local/anaconda3/lib/python3.6/site-packages/tf_cnnvis-1.0.0-py3.6.egg/tf_cnnvis/utils.py", line 79, in parse_tensors_dict
AttributeError: 'int' object has no attribute 'name'
At this stage, seeing as I'm still not completely sure if the problem I'm trying to fix is even the right one, I would very much appreciate some input. If there's anything else you need me to provide, please ask.

OutOfRangeError (see above for traceback): FIFOQueue '_1_batch/fifo_queue' is closed and has insufficient elements (requested 5, current size 0)

i don't know how to solve this problem, this error message is useless for me to locate the problem. Thanks for helping!
here is the data in e.csv, D.csv and F.csv
e.csv: 1,2,3
4,5,6
7,8,9
D.csv: 11,12,13
14,15,16
17,18,19
F.csv: 21,22,23
24,25,26
27,28,29
here is my code
import tensorflow as tf
import os
file_dir = './KDD2'
fileNameQueue = []
for file in os.listdir(file_dir):
fileNameQueue.append(file)
print fileNameQueue
filename_queue = tf.train.string_input_producer(fileNameQueue, shuffle=False)
reader = tf.TextLineReader()
key, value = reader.read(filename_queue)
col1,col2,label = tf.decode_csv(value, record_defaults=[[1],[1],[1]])
example = tf.pack([col1,col2])
example_batch, label_batch = tf.train.batch([example, label], batch_size=5)
with tf.Session() as sess:
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
for i in range(10):
print example_batch.eval()
coord.request_stop()
coord.join(threads)
here is the error message
root#ubuntumagiclab:/home/magiclab/SAE# python try.py
['e.csv', 'D.csv', 'F.csv']
Traceback (most recent call last):
File "try.py", line 30, in <module>
print example_batch.eval()
File "/usr/local/lib/python2.7/dist-
packages/tensorflow/python/framework/ops.py", line 575, in eval
return _eval_using_default_session(self, feed_dict, self.graph, session)
File "/usr/local/lib/python2.7/dist-
packages/tensorflow/python/framework/ops.py", line 3633, in _eval_using_default_session
return session.run(tensors, feed_dict)
File "/usr/local/lib/python2.7/dist-
packages/tensorflow/python/client/session.py", line 766, in run
run_metadata_ptr)
File "/usr/local/lib/python2.7/dist-
packages/tensorflow/python/client/session.py", line 964, in _run
feed_dict_string, options, run_metadata)
File "/usr/local/lib/python2.7/dist-
packages/tensorflow/python/client/session.py", line 1014, in _do_run
target_list, options, run_metadata)
File "/usr/local/lib/python2.7/dist-
packages/tensorflow/python/client/session.py", line 1034, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.OutOfRangeError: FIFOQueue '_0_batch/fifo_queue' is closed and has insufficient elements (requested 5, current size 0)
[[Node: batch = QueueDequeueMany[_class=["loc:#batch/fifo_queue"], component_types=[DT_INT32, DT_INT32], timeout_ms=-1, _device="/job:localhost/replica:0/task:0/cpu:0"](batch/fifo_queue, batch/n)]]
Caused by op u'batch', defined at:
File "try.py", line 24, in <module>
example_batch, label_batch = tf.train.batch([example, label], batch_size=5)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/input.py", line 692, in batch
dequeued = queue.dequeue_many(batch_size, name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/data_flow_ops.py", line 458, in dequeue_many
self._queue_ref, n=n, component_types=self._dtypes, name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/gen_data_flow_ops.py", line 1099, in _queue_dequeue_many
timeout_ms=timeout_ms, name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/op_def_library.py", line 759, in apply_op
op_def=op_def)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 2240, in create_op
original_op=self._default_original_op, op_def=op_def)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 1128, in __init__
self._traceback = _extract_stack()
OutOfRangeError (see above for traceback): FIFOQueue '_0_batch/fifo_queue' is closed and has insufficient elements (requested 5, current size 0)
[[Node: batch = QueueDequeueMany[_class=["loc:#batch/fifo_queue"], component_types=[DT_INT32, DT_INT32], timeout_ms=-1, _device="/job:localhost/replica:0/task:0/cpu:0"](batch/fifo_queue, batch/n)]]
Problem is with filepaths. Please provide complete paths as shown below to fileName Queue.
This works for me:
fileNameQueue.append('/home/****/Desktop/stackoverflow/data/' +file)
Hope this helps.

Tensorflow placeholder problems: error: you must feed a value for placeholder tensor 'Placeholder' with dtype float error

I am implementing a convolutional autoencoder that has to reduce the dimension of camera images per image at real time. Therefore my batch size is one. Y_pred is the output of the network. I want to evaluate this so I can see what my network is outputting.
This is the code I use to import my data and feed the placeholders:
def inputs(image_file_path):
filenames = [image_file_path]
filename_queue = tf.train.string_input_producer(filenames)
read_input = read_image(filename_queue)
return read_input
with tf.Session() as sess:
image = inputs(file_path)
coord = tf.train.Coordinator()
init = tf.global_variables_initializer()
sess.run(init)
threads = tf.train.start_queue_runners(sess=sess, coord = coord)
image = tf.cast(image/255, tf.float32)
###data is just one image, so I set the dimension [batch, height, width, channels] to [1,120,160,3]
image = tf.reshape(image, [1, 120,160,3])
X_data = image.eval()
sess.run(train_step,{X_input:X_data, Y_true: X_data})
output = Y_pred
###Everything works fine up until this point.
print(Y_pred.eval())
coord.request_stop()
coord.join(threads)
Then the placeholders are here:
X_input = tf.placeholder(tf.float32, [None,120,160,3])
Y_true = tf.placeholder(tf.float32, [None,120,160,3])
The error I'm getting:
Traceback (most recent call last):
File "C:\Python35\lib\site-packages\tensorflow\python\client\session.py", line 1039, in _do_call
return fn(*args)
File "C:\Python35\lib\site-packages\tensorflow\python\client\session.py", line 1021, in _run_fn
status, run_metadata)
File "C:\Python35\lib\contextlib.py", line 66, in __exit__
next(self.gen)
File "C:\Python35\lib\site-packages\tensorflow\python\framework\errors_impl.py", line 466, in raise_exception_on_not_ok_status
pywrap_tensorflow.TF_GetCode(status))
tensorflow.python.framework.errors_impl.InvalidArgumentError: You must feed a value for placeholder tensor 'Placeholder' with dtype float
[[Node: Placeholder = Placeholder[dtype=DT_FLOAT, shape=[], _device="/job:localhost/replica:0/task:0/cpu:0"]()]]
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Users\Julian\workspaceNeon\Scriptie\Autoencoder\__init__.py", line 164, in <module>
print(output.eval())
File "C:\Python35\lib\site-packages\tensorflow\python\framework\ops.py", line 569, in eval
return _eval_using_default_session(self, feed_dict, self.graph, session)
File "C:\Python35\lib\site-packages\tensorflow\python\framework\ops.py", line 3741, in _eval_using_default_session
return session.run(tensors, feed_dict)
File "C:\Python35\lib\site-packages\tensorflow\python\client\session.py", line 778, in run
run_metadata_ptr)
File "C:\Python35\lib\site-packages\tensorflow\python\client\session.py", line 982, in _run
feed_dict_string, options, run_metadata)
File "C:\Python35\lib\site-packages\tensorflow\python\client\session.py", line 1032, in _do_run
target_list, options, run_metadata)
File "C:\Python35\lib\site-packages\tensorflow\python\client\session.py", line 1052, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InvalidArgumentError: You must feed a value for placeholder tensor 'Placeholder' with dtype float
[[Node: Placeholder = Placeholder[dtype=DT_FLOAT, shape=[], _device="/job:localhost/replica:0/task:0/cpu:0"]()]]
Caused by op 'Placeholder', defined at:
File "C:\Users\Julian\workspaceNeon\Scriptie\Autoencoder\__init__.py", line 50, in <module>
X_input = tf.placeholder(tf.float32, [None,120,160,3])
File "C:\Python35\lib\site-packages\tensorflow\python\ops\array_ops.py", line 1507, in placeholder
name=name)
File "C:\Python35\lib\site-packages\tensorflow\python\ops\gen_array_ops.py", line 1997, in _placeholder
name=name)
File "C:\Python35\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 768, in apply_op
op_def=op_def)
File "C:\Python35\lib\site-packages\tensorflow\python\framework\ops.py", line 2336, in create_op
original_op=self._default_original_op, op_def=op_def)
File "C:\Python35\lib\site-packages\tensorflow\python\framework\ops.py", line 1228, in __init__
self._traceback = _extract_stack()
InvalidArgumentError (see above for traceback): You must feed a value for placeholder tensor 'Placeholder' with dtype float
[[Node: Placeholder = Placeholder[dtype=DT_FLOAT, shape=[], _device="/job:localhost/replica:0/task:0/cpu:0"]()]]

TensorFlow OutOfRangeError FIFOQueue '_1_batch/fifo_queue' is closed and has insufficient elements

I am desperately trying to find the error in my program. I consistently get the following error:
[[Node: input_producer/limit_epochs/CountUpTo = CountUpTo[T=DT_INT64, _class=["loc:#input_producer/limit_epochs/epochs"], limit=1, _device="/job:localhost/replica:0/task:0/cpu:0"](input_producer/limit_epochs/epochs)]]
Traceback (most recent call last):
File "/media/home/user/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1039, in _do_call
return fn(*args)
File "/media/home/user/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1021, in _run_fn
status, run_metadata)
File "/media/home/user/.conda/envs/tf/lib/python3.6/contextlib.py", line 89, in __exit__
next(self.gen)
File "/media/home/user/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/python/framework/errors_impl.py", line 466, in raise_exception_on_not_ok_status
pywrap_tensorflow.TF_GetCode(status))
tensorflow.python.framework.errors_impl.OutOfRangeError: FIFOQueue '_1_batch/fifo_queue' is closed and has insufficient elements (requested 3, current size 0)
[[Node: batch = QueueDequeueUpToV2[component_types=[DT_FLOAT], timeout_ms=-1, _device="/job:localhost/replica:0/task:0/cpu:0"](batch/fifo_queue, batch/n)]]
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "test_tfr.py", line 60, in <module>
res = sess.run(item_cont_b)
File "/media/home/user/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 778, in run
run_metadata_ptr)
File "/media/home/user/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 982, in _run
feed_dict_string, options, run_metadata)
File "/media/home/user/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1032, in _do_run
target_list, options, run_metadata)
File "/media/home/user/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1052, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.OutOfRangeError: FIFOQueue '_1_batch/fifo_queue' is closed and has insufficient elements (requested 3, current size 0)
[[Node: batch = QueueDequeueUpToV2[component_types=[DT_FLOAT], timeout_ms=-1, _device="/job:localhost/replica:0/task:0/cpu:0"](batch/fifo_queue, batch/n)]]
Caused by op 'batch', defined at:
File "test_tfr.py", line 48, in <module>
allow_smaller_final_batch=True)
File "/media/home/user/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/python/training/input.py", line 917, in batch
name=name)
File "/media/home/user/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/python/training/input.py", line 710, in _batch
dequeued = queue.dequeue_up_to(batch_size, name=name)
File "/media/home/user/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/python/ops/data_flow_ops.py", line 499, in dequeue_up_to
self._queue_ref, n=n, component_types=self._dtypes, name=name)
File "/media/home/user/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/python/ops/gen_data_flow_ops.py", line 1420, in _queue_dequeue_up_to_v2
timeout_ms=timeout_ms, name=name)
File "/media/home/user/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 768, in apply_op
op_def=op_def)
File "/media/home/user/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 2336, in create_op
original_op=self._default_original_op, op_def=op_def)
File "/media/home/user/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1228, in __init__
self._traceback = _extract_stack()
OutOfRangeError (see above for traceback): FIFOQueue '_1_batch/fifo_queue' is closed and has insufficient elements (requested 3, current size 0)
[[Node: batch = QueueDequeueUpToV2[component_types=[DT_FLOAT], timeout_ms=-1, _device="/job:localhost/replica:0/task:0/cpu:0"](batch/fifo_queue, batch/n)]]
I am using TF version 1.1.0, the tfrecords-FIle is 40GB large and contains 1.6M examples. My script (as below) is in the same folder as 201704_test.tfrecords which I am trying to read from. Current batch size, capacity and loop-range are just arbitrarily chosen for testing. Trying a few things mentioned across GitHub and StackOverFlow I keep getting stuck here.
My Script
import os
import numpy as np
import tensorflow as tf
def read_and_decode(filename_queue):
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
features = tf.parse_single_example(
serialized_example,
features={
'user': tf.FixedLenFeature([1], tf.int64),
'item': tf.FixedLenFeature([1], tf.int64),
'week': tf.FixedLenFeature([1], tf.int64),
'label': tf.FixedLenFeature([1], tf.int64),
'item_cont': tf.FixedLenFeature([6], tf.float32),
'item_cat': tf.FixedLenFeature([3092], tf.float32),
'user_cont': tf.FixedLenFeature([12], tf.float32),
'user_cat': tf.FixedLenFeature([3138], tf.float32)
}
)
user = features['user']
item = features['item']
week = features['week']
label = features['label']
item_cont = features['item_cont']
item_cat = features['item_cat']
user_cont = features['user_cont']
user_cat = features['user_cat']
return user, item, week, label, item_cont, item_cat, user_cont, user_cat
filename_queue = tf.train.string_input_producer(['201704_test.tfrecords'],
num_epochs=1)
user, item, week, label, item_cont, item_cat, user_cont, user_cat = \
read_and_decode(filename_queue=filename_queue)
item_cont_b = tf.train.batch([item_cont],
batch_size=3,
num_threads=1,
capacity=32,
enqueue_many=False,
shapes=None,
dynamic_pad=False,
allow_smaller_final_batch=True)
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
for step in range(100000):
if coord.should_stop():
break
res = sess.run(item_cont_b)
print(res)
coord.request_stop()
coord.join(threads)
Thanks for any helpful advice!

Tensorflow Out of Memory when saving?

Hi I'm running the Linux CPU version of tensorflow on Ubuntu 14.04 and I'm running out of memory when I try to save my model. I'm using the tutorial for Deep MNIST that builds a convolution network. You can find it here:
https://www.tensorflow.org/versions/r0.9/tutorials/mnist/pros/index.html#deep-mnist-for-experts
I changed a couple of things and tried to add a Saver to export the model weights. However when I run it I get an error that says I am out of memory. Which doesn't make sense to me because it can train the data forever but saving it somehow uses too much memory?
Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
step 0, training accuracy 0.06
W tensorflow/core/framework/op_kernel.cc:909] Resource exhausted: OOM when allocating tensor with shape[10000,28,28,32]
Traceback (most recent call last):
File "/home/mgump/Lambda_Project/MNIST_TRAINER.py", line 66, in <module>
x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0}))
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 555, in eval
return _eval_using_default_session(self, feed_dict, self.graph, session)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 3498, in _eval_using_default_session
return session.run(tensors, feed_dict)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 372, in run
run_metadata_ptr)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 636, in _run
feed_dict_string, options, run_metadata)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 708, in _do_run
target_list, options, run_metadata)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 728, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors.ResourceExhaustedError: OOM when allocating tensor with shape[10000,28,28,32]
[[Node: Conv2D = Conv2D[T=DT_FLOAT, data_format="NHWC", padding="SAME", strides=[1, 1, 1, 1], use_cudnn_on_gpu=true, _device="/job:localhost/replica:0/task:0/cpu:0"](Reshape, Variable/read)]]
Caused by op u'Conv2D', defined at:
File "/home/mgump/Lambda_Project/MNIST_TRAINER.py", line 28, in <module>
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
File "/home/mgump/Lambda_Project/MNIST_TRAINER.py", line 18, in conv2d
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
File "/usr/local/lib/python2.7/dist- packages/tensorflow/python/ops/gen_nn_ops.py", line 394, in conv2d
data_format=data_format, name=name)
File "/usr/local/lib/python2.7/dist- packages/tensorflow/python/ops/op_def_library.py", line 704, in apply_op
op_def=op_def)
File "/usr/local/lib/python2.7/dist- packages/tensorflow/python/framework/ops.py", line 2260, in create_op
original_op=self._default_original_op, op_def=op_def)
File "/usr/local/lib/python2.7/dist- packages/tensorflow/python/framework/ops.py", line 1230, in __init__
self._traceback = _extract_stack()`
This is what it outputs when I run it thanks so much!

Categories