i convert my own grayscale dataset with 60*60 pixel to tfrecords with write_tfrecord() but when i want to read and decode them it causes error . what is the problem ?
train_tfrecord_addr = './data/train.tfrecords'
test_tfrecord_addr = './data/test.tfrecords'
n_train_samples = 43990
n_test_samples = 12500
batch_size = 32 # number of batches in each iteration
keep_prob = 0.5 # Dropout, probability to keep units
n_epochs = 25
tfrecords_filename = './data/test.tfrecords'
def _bytes_feature(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
def _int64_feature(value):
return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
def _float32_feature(value):
return tf.train.Feature(float_list=tf.train.FloatList(value=value))
def write_tfrecord(path):
images_addrs, images_labels = get_lable_and_image(path=path)
filename_pairs = list(zip(images_addrs, images_labels))
print(filename_pairs)
# to shuffle data
shuffle(filename_pairs)
writer = tf.python_io.TFRecordWriter(tfrecords_filename)
for img_path, label in filename_pairs:
# in this case all images are png with (32, 32) shape
img = np.array(Image.open(img_path)) # (32, 32) uint8
img_raw = img.tostring()
label_raw = label.tostring()
example = tf.train.Example(features=tf.train.Features(feature={
'image_raw': _bytes_feature(img_raw),
'label_raw': _bytes_feature(label_raw),
}))
writer.write(example.SerializeToString())
writer.close()
decode method ...
def read_and_decode(filename, batch_size, num_epochs, num_samples):
filename_queue = tf.train.string_input_producer([train_tfrecord_addr],
num_epochs=num_epochs)
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
features = tf.parse_single_example(
serialized_example,
# Defaults are not specified since both keys are required.
features={
'image_raw': tf.FixedLenFeature([], tf.string),
'label_raw': tf.FixedLenFeature([], tf.string),
})
# Convert from a scalar string tensor to a uint8 tensor
image_raw = tf.decode_raw(features['image_raw'], tf.uint8)
image_resized = tf.reshape(image_raw, [60 * 60])
# Convert from [0, 255] -> [-0.5, 0.5] floats.
image_resized = tf.cast(image_resized, tf.float32) * (1. / 255) - 0.5
# Convert from a scalar string tensor to a uint8 tensor
label_raw = tf.decode_raw(features['label_raw'], tf.uint8)
label_resized = tf.reshape(label_raw, [2])
images, labels = tf.train.batch([image_resized, label_resized],
batch_size=batch_size,
capacity=num_samples,
num_threads=2, )
return images, labels
this is the main code that feed convolutions data .
def run_training():
"""Train ShapeNet for a number of steps."""
# Tell TensorFlow that the model will be built into the default Graph.
with tf.Graph().as_default():
# Input train images and labels.
train_images, train_labels = read_and_decode(filename=train_tfrecord_addr,
batch_size=batch_size,
num_epochs=n_epochs,
num_samples=n_train_samples)
# Input test images and labels.
# define batch_size = all test samples
test_images, test_labels = read_and_decode(filename=test_tfrecord_addr,
batch_size=n_test_samples,
num_epochs=n_epochs,
num_samples=n_test_samples)
# define placeholder for input images and labels
X = tf.placeholder(tf.float32, [None, 60 * 60])
Y = tf.placeholder(tf.float32, [None, 2])
# Build a Graph that computes predictions from the inference model.
prediction = convolutional_network_model(X)
# Backpropagation
# measure of error of our model
# this needs to be minimised by adjusting W and b
cross_entropy = -tf.reduce_sum(Y * tf.log(prediction))
# define training step which minimises cross entropy
train_op =tf.train.GradientDescentOptimizer(
learning_rate=0.001)
.minimize(cross_entropy)
# argmax gives index of highest entry in vector (1st axis of 1D tensor)
correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(Y, 1))
# get mean of all entries in correct prediction, the higher the better
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
# The op for initializing the variables.
init_op = tf.group(tf.global_variables_initializer(),
tf.local_variables_initializer())
st = time.time()
# Create a session for running operations in the Graph.
with tf.Session() as sess:
# Initialize the variables (the trained variables and the
# epoch counter).
sess.run(init_op)
# Start input enqueue threads.
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
for epoch in range(n_epochs):
for itr in range(n_train_samples // batch_size):
# fetch the batch train images and labels
batch_x, batch_y = sess.run([train_images, train_labels])
sess.run([train_op], feed_dict={X: batch_x, Y: batch_y})
# fetch whole test images and labels
batch_x, batch_y = sess.run([test_images, test_labels])
# feed the model with all test images and labels
acc, _ = sess.run([accuracy, train_op],
feed_dict={X: batch_x, Y: batch_y})
print('epoch %d/%d: , accuracy = %.3f'
% (epoch, n_epochs, acc))
# When done, ask the threads to stop.
coord.request_stop()
# Wait for threads to finish.
coord.join(threads)
et = time.time()
duration = et - st
print(duration)
this is the error . i also convert my dataset in binary format but again i see the same error
2018-02-13 13:53:02.401813: I C:\tf_jenkins\workspace\rel-
win\M\windows\PY\35\tensorflow\core\platform\cpu_feature_guard.cc:137] Your
CPU supports instructions that this TensorFlow binary was not compiled to
use: AVX
Traceback (most recent call last):
File "C:\Program Files\Python35\lib\site-
packages\tensorflow\python\client\session.py", line 1350, in _do_call
return fn(*args) File "C:\Program Files\Python35\lib\site-
packages\tensorflow\python\client\session.py", line 1329, in _run_fn
status, run_metadata)
File "C:\Program Files\Python35\lib\site-
packages\tensorflow\python\framework\errors_impl.py", line 473, in __exit__
c_api.TF_GetCode(self.status.status))
tensorflow.python.framework.errors_impl.OutOfRangeError: FIFOQueue
'_1_batch/fifo_queue' is closed and has insufficient elements (requested 32,
current size 0)
[[Node: batch = QueueDequeueManyV2[component_types=[DT_FLOAT, DT_UINT8],
timeout_ms=-1, _device="/job:localhost/replica:0/task:0/device:CPU:0"]
(batch/fifo_queue, batch/n)]]
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "F:/project/python/MathNet/mathnet.py", line 232, in <module>
run_training()
File "F:/project/python/MathNet/mathnet.py", line 207, in run_training
batch_x, batch_y = sess.run([train_images, train_labels])
File "C:\Program Files\Python35\lib\site-
packages\tensorflow\python\client\session.py", line 895, in run
run_metadata_ptr)
File "C:\Program Files\Python35\lib\site-
packages\tensorflow\python\client\session.py", line 1128, in _run
feed_dict_tensor, options, run_metadata)
File "C:\Program Files\Python35\lib\site-
packages\tensorflow\python\client\session.py", line 1344, in _do_run
options, run_metadata)
File "C:\Program Files\Python35\lib\site-
packages\tensorflow\python\client\session.py", line 1363, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.OutOfRangeError: FIFOQueue
'_1_batch/fifo_queue' is closed and has insufficient elements (requested
32, current size 0)
[[Node: batch = QueueDequeueManyV2[component_types=[DT_FLOAT,
DT_UINT8], timeout_ms=-1,
_device="/job:localhost/replica:0/task:0/device:CPU:0"](batch/fifo_queue,
batch/n)]]
Caused by op 'batch', defined at:
File "F:/project/python/MathNet/mathnet.py", line 232, in <module>
run_training()
File "F:/project/python/MathNet/mathnet.py", line 159, in run_training
num_samples=n_train_samples)
File "F:/project/python/MathNet/mathnet.py", line 105, in
read_and_decode
num_threads=2, )
File "C:\Program Files\Python35\lib\site-
packages\tensorflow\python\training\input.py", line 979, in batch
name=name)
File "C:\Program Files\Python35\lib\site-
packages\tensorflow\python\training\input.py", line 754, in _batch
dequeued = queue.dequeue_many(batch_size, name=name)
File "C:\Program Files\Python35\lib\site-
packages\tensorflow\python\ops\data_flow_ops.py", line 475, in dequeue_many
self._queue_ref, n=n, component_types=self._dtypes, name=name)
File "C:\Program Files\Python35\lib\site-
packages\tensorflow\python\ops\gen_data_flow_ops.py", line 2764, in
_queue_dequeue_many_v2
component_types=component_types, timeout_ms=timeout_ms, name=name)
File "C:\Program Files\Python35\lib\site-
packages\tensorflow\python\framework\op_def_library.py", line 787, in
_apply_op_helper
op_def=op_def)
File "C:\Program Files\Python35\lib\site-
packages\tensorflow\python\framework\ops.py", line 3160, in create_op
op_def=op_def)
File "C:\Program Files\Python35\lib\site-
packages\tensorflow\python\framework\ops.py", line 1625, in __init__
self._traceback = self._graph._extract_stack() # pylint: disable=protected-
access
OutOfRangeError (see above for traceback): FIFOQueue '_1_batch/fifo_queue'
is closed and has insufficient elements (requested 32, current size 0)
[[Node: batch = QueueDequeueManyV2[component_types=[DT_FLOAT, DT_UINT8],
timeout_ms=-1, _device="/job:localhost/replica:0/task:0/device:CPU:0"]
(batch/fifo_queue, batch/n)]]
Related
Could someone please explain the following situation:
I've created a simple convolutional neural network using Tensorflow. I'm using a class and I've created my graph in the constructor. I then train the network using a train method I've written. I'm also using queues and the feed-in mechanism. This is an excerpt from the code:
class Super_res:
'Create a CNN model which augments the resolution of an image'
# object initialization (python) - constructor
def __init__(self, input, output, batch_size, record_size, weights, biases): # input (neurons), output (no. neurons), batch_size (batches to process before registering delta), record_size ()
print("Initializing object")
self.input = input
self.output = output
self.batch_size = batch_size
self.record_size = record_size
self.weights = weights
self.biases = biases
# initialize data batch readers. Parameters: [Path], record_size, batch_size
self.data_batch = data_reader3.batch_generator([DATA_PATH_OPTICAL_TRAIN],self.record_size, self.batch_size) # train set
self.data_batch_eval = data_reader3.batch_generator([DATA_PATH_EVAL],self.record_size, self.batch_size) # eval set
# this returns a [batch_size, 2, n_input] tensor. The second dimension is comprised of the low-res image and the GT high-res image. Each of these images is comprised of n_input entries (flat vector)
self.data1 = tf.placeholder_with_default(tf.transpose(self.data_batch, [1, 0, 2]), [2, batch_size, n_input]) # one for optical and another for GT image [batch_size, n_input] each
self.keep_prob = tf.placeholder(tf.float32) #dropout (keep probability) - this placeholder can accept a Tensor of arbitrary shape
# create network model
self.pred = self.cnn_model(self.data1[0], self.weights, self.biases) # self.data1[0] is the low-res data
def train(self):
#self.low_res = self.data1[0]
#self.high_res = self.data1[1]
# define loss and optimizer
#self.cost = tf.reduce_mean(tf.pow(self.data1[1] - self.pred, 2))
#self.optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(self.cost)
# Initializing the variables
init = tf.global_variables_initializer()
# Initialize session
with tf.Session() as sess:
sess.run(init)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
step = 1
print("Entering training")
# Keep training until reach max iterations
while step * batch_size < training_iters:
#_, c = sess.run([self.optimizer, self.cost])
conv_result = sess.run(self.pred)
print(conv_result)
#data2 = self.data1[0]
#print(data2)
if step % display_step == 0:
print("Step:", '%04d' % (step+1))
# "cost=", c)
step = step + 1
coord.request_stop()
coord.join(threads)
When I run this code, I get the following error output:
Entering training
Traceback (most recent call last):
File "C:\Users\divin\Miniconda3\envs\tensorflow_gpu\lib\site-packages \tensorflow\python\client\session.py", line 1139, in _do_call
return fn(*args)
File "C:\Users\divin\Miniconda3\envs\tensorflow_gpu\lib\site-packages \tensorflow\python\client\session.py", line 1121, in _run_fn
status, run_metadata)
File "C:\Users\divin\Miniconda3\envs\tensorflow_gpu\lib\contextlib.py", line 66, in __exit__
next(self.gen)
File "C:\Users\divin\Miniconda3\envs\tensorflow_gpu\lib\site-packages\tensorflow\python\framework\errors_impl.py", line 466, in raise_exception_on_not_ok_status
pywrap_tensorflow.TF_GetCode(status))
tensorflow.python.framework.errors_impl.OutOfRangeError: RandomShuffleQueue '_2_shuffle_batch/random_shuffle_queue' is closed and has insufficient elements (requested 512, current size 0)
[[Node: shuffle_batch = QueueDequeueManyV2[component_types= [DT_FLOAT], timeout_ms=-1, _device="/job:localhost/replica:0/task:0/cpu:0"] (shuffle_batch/random_shuffle_queue, shuffle_batch/n)]]
[[Node: shuffle_batch/_25 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/gpu:0", send_device="/job:localhost/replica:0/task:0/cpu:0", send_device_incarnation=1, tensor_name="edge_5_shuffle_batch", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/gpu:0"]()]]
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "super_res_class.py", line 137, in <module>
p.train()
File "super_res_class.py", line 106, in train
conv_result = sess.run(self.pred)
File "C:\Users\divin\Miniconda3\envs\tensorflow_gpu\lib\site-packages \tensorflow\python\client\session.py", line 789, in run
run_metadata_ptr)
File "C:\Users\divin\Miniconda3\envs\tensorflow_gpu\lib\site-packages \tensorflow\python\client\session.py", line 997, in _run
feed_dict_string, options, run_metadata)
File "C:\Users\divin\Miniconda3\envs\tensorflow_gpu\lib\site-packages \tensorflow\python\client\session.py", line 1132, in _do_run
target_list, options, run_metadata)
File "C:\Users\divin\Miniconda3\envs\tensorflow_gpu\lib\site-packages \tensorflow\python\client\session.py", line 1152, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.OutOfRangeError: RandomShuffleQueue '_2_shuffle_batch/random_shuffle_queue' is closed and has insufficient elements (requested 512, current size 0)
[[Node: shuffle_batch = QueueDequeueManyV2[component_types= [DT_FLOAT], timeout_ms=-1, _device="/job:localhost/replica:0/task:0/cpu:0"] (shuffle_batch/random_shuffle_queue, shuffle_batch/n)]]
[[Node: shuffle_batch/_25 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/gpu:0", send_device="/job:localhost/replica:0/task:0/cpu:0", send_device_incarnation=1, tensor_name="edge_5_shuffle_batch", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/gpu:0"]()]]
Caused by op 'shuffle_batch', defined at:
File "super_res_class.py", line 136, in <module>
p = Super_res(1024,1024,512,record_size, weights, biases) # params (n_input, n_output, batch_size)
File "super_res_class.py", line 50, in __init__
self.data_batch = data_reader3.batch_generator([DATA_PATH_OPTICAL_TRAIN],self.record_size, self.batch_size) # train set
File "E:\google_drive\Doctorate\matlab code\Tensorflow\doctorate_CNN\dong_recreation\data_reader3.py", line 156, in batch_generator
capacity=capacity, min_after_dequeue=min_after_dequeue)
File "C:\Users\divin\Miniconda3\envs\tensorflow_gpu\lib\site-packages\tensorflow\python\training\input.py", line 1217, in shuffle_batch
name=name)
File "C:\Users\divin\Miniconda3\envs\tensorflow_gpu\lib\site-packages\tensorflow\python\training\input.py", line 788, in _shuffle_batch
dequeued = queue.dequeue_many(batch_size, name=name)
File "C:\Users\divin\Miniconda3\envs\tensorflow_gpu\lib\site-packages \tensorflow\python\ops\data_flow_ops.py", line 457, in dequeue_many
self._queue_ref, n=n, component_types=self._dtypes, name=name)
File "C:\Users\divin\Miniconda3\envs\tensorflow_gpu\lib\site-packages \tensorflow\python\ops\gen_data_flow_ops.py", line 946, in _queue_dequeue_many_v2
timeout_ms=timeout_ms, name=name)
File "C:\Users\divin\Miniconda3\envs\tensorflow_gpu\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 767, in apply_op
op_def=op_def)
File "C:\Users\divin\Miniconda3\envs\tensorflow_gpu\lib\site-packages\tensorflow\python\framework\ops.py", line 2506, in create_op
original_op=self._default_original_op, op_def=op_def)
File "C:\Users\divin\Miniconda3\envs\tensorflow_gpu\lib\site-packages\tensorflow\python\framework\ops.py", line 1269, in __init__
self._traceback = _extract_stack()
OutOfRangeError (see above for traceback): RandomShuffleQueue '_2_shuffle_batch/random_shuffle_queue' is closed and has insufficient elements (requested 512, current size 0)
[[Node: shuffle_batch = QueueDequeueManyV2[component_types=[DT_FLOAT], timeout_ms=-1, _device="/job:localhost/replica:0/task:0/cpu:0"](shuffle_batch/random_shuffle_queue, shuffle_batch/n)]]
[[Node: shuffle_batch/_25 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/gpu:0", send_device="/job:localhost/replica:0/task:0/cpu:0", send_device_incarnation=1, tensor_name="edge_5_shuffle_batch", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/gpu:0"]()]]
(tensorflow_gpu) E:\google_drive\Doctorate\matlab code\Tensorflow\doctorate_CNN\dong_recreation>
When I remove the sess.run() from my pred output, the code seems to operate normally.
def train(self):
#self.low_res = self.data1[0]
#self.high_res = self.data1[1]
# define loss and optimizer
#self.cost = tf.reduce_mean(tf.pow(self.data1[1] - self.pred, 2))
#self.optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(self.cost)
# Initializing the variables
init = tf.global_variables_initializer()
# Initialize session
with tf.Session() as sess:
sess.run(init)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
step = 1
print("Entering training")
# Keep training until reach max iterations
while step * batch_size < training_iters:
#_, c = sess.run([self.optimizer, self.cost])
conv_result = self.pred
print(conv_result)
#data2 = self.data1[0]
#print(data2)
if step % display_step == 0:
print("Step:", '%04d' % (step+1))
# "cost=", c)
step = step + 1
coord.request_stop()
coord.join(threads)
Could someone please explain this to me? Normally, the graph is only evaluated when run under a session! What gives here?
Just saying conv_result = self.pred won't do anything -- you need, indeed, to do sess.run(self.pred) to get it to execute. The errors you're getting are something else about your model. As they say, your InputProducer has an empty queue. With the information you've given it can't be diagnosed, but I would search further on the site for why your InputProducer isn't filling / has zero size.
I want to train and check accuracy of validation set in this code. I use is_training and tf.QueueBase.from_list to switch between train and compute accuracy. type of datasets are tfrecord. inference give input images and a float as keep_drop_prop.
#inference, loss, training, evaluation functions ...
train_queue = tf.train.string_input_producer([train_data_path])
test_queue = tf.train.string_input_producer([validation_data_path])
# SELECT QUEUE
is_training = tf.placeholder(tf.bool, shape=None, name="is_training")
q_selector = tf.cond(is_training,
lambda: tf.constant(0),
lambda: tf.constant(1))
q = tf.QueueBase.from_list(q_selector, [train_queue, test_queue])
if is_training==True:
feature = {'train/image': tf.FixedLenFeature([], tf.string),
'train/label': tf.FixedLenFeature([], tf.int64)}
reader = tf.TFRecordReader()
_, serialized_example1 = reader.read(q)
features = tf.parse_single_example(serialized_example, features=feature)
images = tf.decode_raw(features1['train/image'], tf.float32)
labels = tf.cast(features['train/label'], tf.int32)
images = tf.reshape(images, [50, 50, 3])
batch_Xs,batch_Ys=tf.train.shuffle_batch([images,labels],batch_size=500,capacity=500,min_after_dequeue=100)
batch_Xs = tf.cast(batch_Xs,tf.float32)/255
else:
feature = {'validation/image': tf.FixedLenFeature([], tf.string),
'validation/label': tf.FixedLenFeature([], tf.int64)}
reader = tf.TFRecordReader()
_, serialized_example = reader.read(q)
features = tf.parse_single_example(serialized_example, features=feature)
images = tf.decode_raw(features['validation/image'], tf.float32)
labels = tf.cast(features['validation/label'], tf.int32)
images = tf.reshape(images, [50, 50, 3])
batch_Xs,batch_Ys=tf.train.shuffle_batch([images,labels],batch_size=500,capacity=500,min_after_dequeue=100)
batch_Xs = tf.cast(batch_Xs,tf.float32)/255
if is_training==True:
logits=inference(batch_Xs,0.7)
total_loss = loss(logits,batch_Ys)
train_op = training(total_loss,learning_rate=LEARNING_RATE)
accuracy = evaluation(logits,batch_Ys)
else:
logits=inference(batch_Xs,1)
accuracy = evaluation(logits,batch_Ys)
sess = tf.Session()
init = tf.initialize_all_variables()
sess.run(init)
tf.train.start_queue_runners(sess=sess)
for i in range(NUM_ITER):
_,loss_value,acc=sess.run([train_op,total_loss,accuracy],feed_dict={is_training:True})
val_acc=sess.run([accuracy],feed_dict={is_training:False})
The result of this code is :
UnboundLocalError: local variable 'train_op' referenced before assignment
InvalidArgumentError: You must feed a value for placeholder tensor 'is_training' with dtype bool
[[Node: is_training = Placeholder[dtype=DT_BOOL, shape=[], _device="/job:localhost/replica:0/task:0/gpu:0"]()]]
Caused by op u'is_training', defined at:
File "/home/.../anaconda2/lib/python2.7/runpy.py", line 174, in _run_module_as_main
"__main__", fname, loader, pkg_name)
File "/home/.../anaconda2/lib/python2.7/runpy.py", line 72, in _run_code
exec code in run_globals
File "/home/.../.local/lib/python2.7/site-packages/ipykernel/__main__.py", line 3, in <module>
app.launch_new_instance()
File "/home/.../.local/lib/python2.7/site-packages/traitlets/config/application.py", line 658, in launch_instance
app.start()
File "/home/.../.local/lib/python2.7/site-packages/ipykernel/kernelapp.py", line 474, in start
ioloop.IOLoop.instance().start()
File "/home/.../.local/lib/python2.7/site-packages/zmq/eventloop/ioloop.py", line 177, in start
super(ZMQIOLoop, self).start()
File "/home/.../.local/lib/python2.7/site-packages/tornado/ioloop.py", line 887, in start
handler_func(fd_obj, events)
File "/home/.../.local/lib/python2.7/site-packages/tornado/stack_context.py", line 275, in null_wrapper
return fn(*args, **kwargs)
I think queues can't access input data ... Exactly I don't know where is the problem ...Thanks so much for your help...
Classify MNIST Digits with Tensorflow by a 2-layer RNN approach. Training works fine, but when evaluating accuracy, incompatible shape of test data is reported.
import tensorflow as tf
import inspect
import numpy as np
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot = True)
hm_epochs = 1
n_classes = 10
batch_size = 128
chunk_size = 28
n_chunks = 28
rnn_size = 128
x = tf.placeholder('float', [None, n_chunks,chunk_size])
y = tf.placeholder('float')
def lstm_cell():
if 'reuse' in inspect.getargspec(
tf.contrib.rnn.BasicLSTMCell.__init__).args:
return tf.contrib.rnn.BasicLSTMCell(
rnn_size, forget_bias=0.0, state_is_tuple=True,
reuse=tf.get_variable_scope().reuse)
else:
return tf.contrib.rnn.BasicLSTMCell(
rnn_size, forget_bias=0.0, state_is_tuple=True)
def attn_cell():
return tf.contrib.rnn.DropoutWrapper(
lstm_cell())
def recurrent_neural_network(x):
layer = {'weights':tf.Variable(tf.random_normal([rnn_size,n_classes])),
'biases':tf.Variable(tf.random_normal([n_classes]))}
x = tf.transpose(x, [1,0,2])
x = tf.reshape(x, [-1, chunk_size])
x = tf.split(x, n_chunks, 0)
stacked_lstm = tf.contrib.rnn.MultiRNNCell([attn_cell(),attn_cell()], state_is_tuple=True)
initial_state = state = stacked_lstm.zero_state(batch_size, tf.float32)
outputs, states = tf.contrib.rnn.static_rnn(stacked_lstm, x,state)
output = tf.matmul(outputs[-1],layer['weights']) + layer['biases']
return output
def train_neural_network(x):
prediction = recurrent_neural_network(x)
cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(labels=y,logits=prediction) )
optimizer = tf.train.AdamOptimizer().minimize(cost)
with tf.Session() as sess:
sess.run(tf.initialize_all_variables())
for epoch in range(hm_epochs):
epoch_loss = 0
for _ in range(int(mnist.train.num_examples/batch_size)):
epoch_x, epoch_y = mnist.train.next_batch(batch_size)
epoch_x = epoch_x.reshape((batch_size,n_chunks,chunk_size))
_, c = sess.run([optimizer, cost], feed_dict={x: epoch_x, y: epoch_y})
epoch_loss += c
print('Epoch', epoch, 'completed out of',hm_epochs,'loss:',epoch_loss)
correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
testdata= np.reshape( mnist.test.images, (10000, n_chunks, chunk_size))
print("Testdata ",testdata.shape)
print("x ",x)
print('Accuracy:',accuracy.eval({x:testdata, y:mnist.test.labels}))
train_neural_network(x)
However, the shapes of test data and placeholders are printed as follows. Aren't they compatible?
Epoch 0 completed out of 1 loss: 228.159379691
Testdata (10000, 28, 28)
x Tensor("Placeholder:0", shape=(?, 28, 28), dtype=float32)
Error:
Caused by op 'rnn/rnn/multi_rnn_cell/cell_0/cell_0/basic_lstm_cell/basic_lstm_ce
ll/concat', defined at:
File "main.py", line 90, in <module>
train_neural_network(x)
File "main.py", line 59, in train_neural_network
prediction = recurrent_neural_network(x)
File "main.py", line 52, in recurrent_neural_network
outputs, states = tf.contrib.rnn.static_rnn(stacked_lstm, x,state)
File "C:\Users\henry\Anaconda3\lib\site-packages\tensorflow\python\ops\rnn.py"
, line 1212, in static_rnn
(output, state) = call_cell()
File "C:\Users\henry\Anaconda3\lib\site-packages\tensorflow\python\ops\rnn.py"
, line 1199, in <lambda>
call_cell = lambda: cell(input_, state)
File "C:\Users\henry\Anaconda3\lib\site-packages\tensorflow\python\ops\rnn_cel
l_impl.py", line 180, in __call__
return super(RNNCell, self).__call__(inputs, state)
File "C:\Users\henry\Anaconda3\lib\site-packages\tensorflow\python\layers\base
.py", line 441, in __call__
outputs = self.call(inputs, *args, **kwargs)
File "C:\Users\henry\Anaconda3\lib\site-packages\tensorflow\python\ops\rnn_cel
l_impl.py", line 916, in call
cur_inp, new_state = cell(cur_inp, cur_state)
File "C:\Users\henry\Anaconda3\lib\site-packages\tensorflow\python\ops\rnn_cel
l_impl.py", line 752, in __call__
output, new_state = self._cell(inputs, state, scope)
File "C:\Users\henry\Anaconda3\lib\site-packages\tensorflow\python\ops\rnn_cel
l_impl.py", line 180, in __call__
return super(RNNCell, self).__call__(inputs, state)
File "C:\Users\henry\Anaconda3\lib\site-packages\tensorflow\python\layers\base
.py", line 441, in __call__
outputs = self.call(inputs, *args, **kwargs)
File "C:\Users\henry\Anaconda3\lib\site-packages\tensorflow\python\ops\rnn_cel
l_impl.py", line 383, in call
concat = _linear([inputs, h], 4 * self._num_units, True)
File "C:\Users\henry\Anaconda3\lib\site-packages\tensorflow\python\ops\rnn_cel
l_impl.py", line 1021, in _linear
res = math_ops.matmul(array_ops.concat(args, 1), weights)
File "C:\Users\henry\Anaconda3\lib\site-packages\tensorflow\python\ops\array_o
ps.py", line 1048, in concat
name=name)
File "C:\Users\henry\Anaconda3\lib\site-packages\tensorflow\python\ops\gen_arr
ay_ops.py", line 495, in _concat_v2
name=name)
File "C:\Users\henry\Anaconda3\lib\site-packages\tensorflow\python\framework\o
p_def_library.py", line 767, in apply_op
op_def=op_def)
File "C:\Users\henry\Anaconda3\lib\site-packages\tensorflow\python\framework\o
ps.py", line 2506, in create_op
original_op=self._default_original_op, op_def=op_def)
File "C:\Users\henry\Anaconda3\lib\site-packages\tensorflow\python\framework\o
ps.py", line 1269, in __init__
self._traceback = _extract_stack()
InvalidArgumentError (see above for traceback): ConcatOp : Dimensions of inputs
should match: shape[0] = [10000,28] vs. shape[1] = [128,128]
[[Node: rnn/rnn/multi_rnn_cell/cell_0/cell_0/basic_lstm_cell/basic_lstm
_cell/concat = ConcatV2[N=2, T=DT_FLOAT, Tidx=DT_INT32, _device="/job:localhost/
replica:0/task:0/cpu:0"](split, MultiRNNCellZeroState/DropoutWrapperZeroState/Ba
sicLSTMCellZeroState/zeros_1, rnn/rnn/multi_rnn_cell/cell_0/cell_0/basic_lstm_ce
ll/basic_lstm_cell/concat/axis)]]
When I print the shape of training data it is (128,28,28). I am confused that why the test data leads to the error because both training data and test data are formatted in the same way, that is (?,n_chunks,chunk_size). Thanks in advance.
The issue is that you always create the initial state with shape set to the training batch size instead of the eval batch size.
This is the culprit line:
initial_state = state = stacked_lstm.zero_state(batch_size, tf.float32)
When training I store my weights in a dictionary of tensorflow-variables. I pass that dictionary of weights to a 'model'-function together with some data to get my desired output.
After training, I would like to store that dictionary in a file in such a way that I can recreate it. That way I can apply the learned weights by simply passing the dictionary of weights together with the new data to the same model function.
According to the documentation, simply passing the dictionary of weights to a saver should save those weights under the correct names. Then I should be able to create the same dictionary in the application function and then restore the saved values. However if I do this, I get an 'values are uninitialized'-error. Can anyone help me find what I am doing wrong?
Minimal self-contained code example and corresponding error:
import tensorflow as tf
import numpy as np
# first train a linear model on random vectors of length 5 and store the trained parameters.
# Then load those parameters and try to apply them to a new vector.
def run():
train_model()
apply_model()
def train_model():
# create random training data: 100 vectors of length 5 for both input and output.
train_data = np.random.random((100,5))
train_labels = np.random.random((100,5))
train_data_node = tf.placeholder(tf.float32, shape=(5), name="train_data_node")
train_labels_node = tf.placeholder(tf.float32, shape=(5), name="train_labels_node")
weights = defineWeights()
prediction = model(train_data_node, weights)
loss = tf.norm(prediction - train_labels_node)
train_op = tf.train.AdagradOptimizer(learning_rate=1).minimize(loss)
saver = tf.train.Saver(weights)
sess = tf.Session()
sess.run(tf.global_variables_initializer())
# train for 50 epochs on all 100 training examples, with a batchsize of 1.
for _ in range(50):
for i in range(100):
batch_data = train_data[i,:]
batch_labels = train_labels[i,:]
feed_dict = {train_data_node: batch_data, train_labels_node: batch_labels}
sess.run([train_op, loss, weights], feed_dict=feed_dict)
saver.save(sess, '/results/weights')
def apply_model():
sess = tf.Session()
weights = defineWeights()
new_saver = tf.train.import_meta_graph('/results/weights.meta')
new_saver.restore(sess, tf.train.latest_checkpoint('/results'))
print(model(np.random.random(5).astype(np.float32), weights).eval(session=sess))
def model(data, weights):
# multiply the matrix weights['a'] with the vector data
l1 = tf.matmul(tf.expand_dims(data,0), weights['a'])
l1 = l1 + weights['b']
return l1
def defineWeights():
weights = {
'a': tf.Variable(tf.random_normal([5, 5],
stddev=0.01,
dtype = tf.float32),
name = 'a'),
'b': tf.Variable(tf.random_normal([5]), name = 'b'),
}
return weights
Calling the 'run()' function in the code above gives the following error:
Traceback (most recent call last):
File "<string>", line 1, in <module>
File "/usr/local/lib/python2.7/dist-packages/myFolder/example.py", line 8, in run
apply_model()
File "/usr/local/lib/python2.7/dist-packages/myFolder/example.py", line 50, in apply_model
print(model(np.random.random(5).astype(np.float32), weights).eval(session=sess))
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 569, in eval
return _eval_using_default_session(self, feed_dict, self.graph, session)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 3741, in _eval_using_default_session
return session.run(tensors, feed_dict)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 786, in run
run_metadata_ptr)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 994, in _run
feed_dict_string, options, run_metadata)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 1044, in _do_run
target_list, options, run_metadata)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 1064, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.FailedPreconditionError: Attempting to use uninitialized value a_1
[[Node: a_1/read = Identity[T=DT_FLOAT, _class=["loc:#a_1"], _device="/job:localhost/replica:0/task:0/gpu:0"](a_1)]]
[[Node: add_2/_5 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/cpu:0", send_device="/job:localhost/replica:0/task:0/gpu:0", send_device_incarnation=1, tensor_name="edge_7_add_2", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"]()]]
Caused by op u'a_1/read', defined at:
File "<string>", line 1, in <module>
File "/usr/local/lib/python2.7/dist-packages/myFolder/example.py", line 8, in run
apply_model()
File "/usr/local/lib/python2.7/dist-packages/myFolder/example.py", line 45, in apply_model
weights = defineWeights()
File "/usr/local/lib/python2.7/dist-packages/myFolder/example.py", line 63, in defineWeights
name = 'a'),
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/variables.py", line 197, in __init__
expected_shape=expected_shape)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/variables.py", line 316, in _init_from_args
self._snapshot = array_ops.identity(self._variable, name="read")
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/gen_array_ops.py", line 1338, in identity
result = _op_def_lib.apply_op("Identity", input=input, name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/op_def_library.py", line 768, in apply_op
op_def=op_def)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 2336, in create_op
original_op=self._default_original_op, op_def=op_def)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 1228, in __init__
self._traceback = _extract_stack()
FailedPreconditionError (see above for traceback): Attempting to use uninitialized value a_1
[[Node: a_1/read = Identity[T=DT_FLOAT, _class=["loc:#a_1"], _device="/job:localhost/replica:0/task:0/gpu:0"](a_1)]]
[[Node: add_2/_5 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/cpu:0", send_device="/job:localhost/replica:0/task:0/gpu:0", send_device_incarnation=1, tensor_name="edge_7_add_2", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"]()]]
I edited your code to make it work - One possible way ! Check it.
import tensorflow as tf
import numpy as np
# first train a linear model on random vectors of length 5 and store the trained parameters.
# Then load those parameters and try to apply them to a new vector.
def run():
train_model()
apply_model()
def train_model():
# create random training data: 100 vectors of length 5 for both input and output.
train_data = np.random.random((100,5))
train_labels = np.random.random((100,5))
train_data_node = tf.placeholder(tf.float32, shape=(5), name="train_data_node")
train_labels_node = tf.placeholder(tf.float32, shape=(5), name="train_labels_node")
weights = defineWeights()
prediction = model(train_data_node, weights)
prediction = tf.identity(prediction, name="prediction")
loss = tf.norm(prediction - train_labels_node)
train_op = tf.train.AdagradOptimizer(learning_rate=1).minimize(loss)
saver = tf.train.Saver()
sess = tf.Session()
sess.run(tf.global_variables_initializer())
# train for 50 epochs on all 100 training examples, with a batchsize of 1.
for _ in range(50):
for i in range(100):
batch_data = train_data[i,:]
batch_labels = train_labels[i,:]
feed_dict = {train_data_node: batch_data, train_labels_node: batch_labels}
sess.run([train_op, loss, weights], feed_dict=feed_dict)
saver.save(sess, 'results/model')
print("Trained Weights")
print(sess.run(weights))
def apply_model():
sess = tf.Session()
new_saver = tf.train.import_meta_graph('results/model.meta')
new_saver.restore(sess, tf.train.latest_checkpoint('results'))
print("Loaded Weights")
print(sess.run(['a:0','b:0']))
prediction = tf.get_default_graph().get_tensor_by_name("prediction:0")
train_data_node = tf.get_default_graph().get_tensor_by_name("train_data_node:0")
test_data = np.random.random(5).astype(np.float32)
pred = sess.run([prediction],feed_dict={train_data_node:test_data})
print("Prediction")
print(pred)
def model(data, weights):
# multiply the matrix weights['a'] with the vector data
l1 = tf.matmul(tf.expand_dims(data,0), weights['a'])
l1 = l1 + weights['b']
return l1
def defineWeights():
weights = {
'a': tf.Variable(tf.random_normal([5, 5],
stddev=0.01,
dtype = tf.float32),
name = 'a'),
'b': tf.Variable(tf.random_normal([5]), name = 'b'),
}
return weights
def main(_):
run()
if __name__ == '__main__':
tf.app.run(main=main)
Output:
Trained Weights
{'a': array([[ 0.01243415, -0.42879951, 0.0174435 , -0.24622701, 0.35309449],
[ 0.03154161, -0.08194152, 0.09223857, -0.15719411, -0.06323836],
[-0.03263358, 0.05096304, 0.1769278 , -0.17564282, 0.04325204],
[-0.17412457, -0.00338688, 0.08468977, -0.06877152, -0.02180972],
[ 0.25160244, -0.19224152, 0.14535131, -0.20594895, -0.03813718]], dtype=float32), 'b': array([ 0.33825615, 0.79861975, 0.30609566, 0.91897982, 0.20577262], dtype=float32)}
I tensorflow/core/common_runtime/gpu/gpu_device.cc:975] Creating TensorFlow device (/gpu:0) -> (device: 0, name: GeForce GTX 1060 6GB, pci bus id: 0000:01:00.0)
Loaded Weights
[array([[ 0.01243415, -0.42879951, 0.0174435 , -0.24622701, 0.35309449],
[ 0.03154161, -0.08194152, 0.09223857, -0.15719411, -0.06323836],
[-0.03263358, 0.05096304, 0.1769278 , -0.17564282, 0.04325204],
[-0.17412457, -0.00338688, 0.08468977, -0.06877152, -0.02180972],
[ 0.25160244, -0.19224152, 0.14535131, -0.20594895, -0.03813718]], dtype=float32), array([ 0.33825615, 0.79861975, 0.30609566, 0.91897982, 0.20577262], dtype=float32)]
Prediction
[array([[ 0.3465074 , 0.42139536, 0.71310139, 0.30854774, 0.32671657]], dtype=float32)]
Explanation:
Name the tensors which you want to access after restoring.
Restore the graph and restore variables that you named - shown in apply_model()
Feed the new test_data into placeholder using feed_dict
Issues:
I tried to use sess.run(tf.global_variables_initializer()) but it is re-initializing variables to new random values. (Using TF 1.0)
I hope this helps !
I would like to write scripts to build a Tensorflow graph that can be used with different tf.placeholder with different sizes. I got InvalidArgumentError when I ran the following scripts:
# Loading training and validation data
# train data are in x_train, y_train
# validation data are in x_valid, y_valid
...
with tf.variable_scope("model", reuse=None):
# Build a Graph that computes the logits
network = Model(
batch_size=BATCH_SIZE,
input_dims=INPUT_DIMS,
seq_length=SEQ_LENGTH,
num_classes=NUM_CLASSES !!!!! ----> line 528, in main_seq
)
# Calculate loss
network.loss_op = softmax_seq_loss_by_example(
logits=network.model,
labels=network.targets,
batch_size=network.batch_size,
seq_length=network.seq_length
)
# Calculate predictions
network.pred_op = tf.argmax(network.model, 1)
with tf.variable_scope("model", reuse=True):
# Build a Graph that computes the logits
network_valid = Model(
batch_size=1,
input_dims=INPUT_DIMS,
seq_length=1,
num_classes=NUM_CLASSES
)
# Calculate loss
network_valid.loss_op = softmax_seq_loss_by_example(
logits=network_valid.model,
labels=network_valid.targets,
batch_size=network_valid.batch_size,
seq_length=network_valid.seq_length
)
# Calculate predictions
network_valid.pred_op = tf.argmax(network_valid.model, 1)
with tf.Session() as sess:
# Initialize variables in the graph
sess.run(tf.initialize_all_variables())
for epoch in xrange(n_epochs):
# Update parameters and compute loss of training set
y_true_train, y_pred_train, train_loss, train_duration = \
run_seq_epoch(
sess=sess, network=network,
inputs=x_train, targets=y_train,
train_op=train_op,
is_train=True
)
# Evaluate the model on the validation set
y_true_val, y_pred_val, valid_loss, valid_duration = \
run_seq_epoch(
sess=sess, network=network_valid, **** ----> replace_location
inputs=x_valid, targets=x_valid,
train_op=tf.no_op(),
is_train=False !!!!! ----> line 644, in main_seq
)
...
The function run_seq_epoch and the class Model is shown below:
class Model(object):
def __init__(self, batch_size, input_dims, seq_length, num_classes):
self.batch_size = batch_size
self.input_dims = input_dims
self.seq_length = seq_length
self.num_classes = num_classes
# Operations to compute loss and prediction, which will be assigned
# after the initialization
self.loss_op = None
self.pred_op = None
# Placeholder for input data
self.inputs = tf.placeholder(
tf.float32,
shape=[batch_size*seq_length, input_dims, 1, 1]
)
self.targets = tf.placeholder(
tf.int32,
shape=[batch_size*seq_length, ]
)
self.is_train = tf.placeholder(tf.bool)
# Use the defined placeholder above create a model
...
def run_seq_epoch(sess, network, inputs, targets, train_op, is_train):
...
# Initial state for LSTM
state = network.initial_state.eval()
for x_batch, y_batch in iterate_batch_seq_minibatches(inputs,
targets,
network.batch_size,
network.seq_length):
feed_dict = {
network.inputs: x_batch,
network.targets: y_batch,
network.initial_state: state,
network.is_train: is_train
}
_, loss_value, y_pred, state = sess.run(
[train_op, network.loss_op, network.pred_op, network.final_state],
feed_dict=feed_dict !!!!! ----> line 471, in run_seq_epoch
)
...
return _y_true, y_pred, loss, duration
The followings are the name and shape of the placeholders in the network and network_valid, and the shape and the type of x_batch and y_batch for training and validation:
# Placeholder of network.inputs and network.targets
model/Placeholder:0: (200, 7680, 1, 1)
model/Placeholder_1:0: (200,)
# x_batch and y_batch for training
x_batch.shape: (200, 7860, 1, 1), dtype=float32
y_batch.shape: (200, ), dtype=int32
# Placeholder of network_valid.inputs and network_valid.targets
model_1/Placeholder:0: (1, 7680, 1, 1)
model_1/Placeholder_1:0: (1,)
# x_batch and y_batch for validation
x_batch.shape: (1, 7860, 1, 1), type=float32
y_batch.shape: (1, ), type=int32
However, I got tensorflow.python.framework.errors.InvalidArgumentError for targets placeholder when I ran the scripts:
Traceback (most recent call last):
File "main.py", line 789, in <module>
fold_idx=fold_idx
File "main.py", line 644, in main_seq
is_train=False
File "main.py", line 471, in run_seq_epoch
feed_dict=feed_dict
File "/home/akara/miniconda2/envs/tensorflow/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 372, in run
run_metadata_ptr)
File "/home/akara/miniconda2/envs/tensorflow/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 636, in _run
feed_dict_string, options, run_metadata)
File "/home/akara/miniconda2/envs/tensorflow/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 708, in _do_run
target_list, options, run_metadata)
File "/home/akara/miniconda2/envs/tensorflow/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 728, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors.InvalidArgumentError: You must feed a value for placeholder tensor 'model/Placeholder_1' with dtype int32 and shape [200]
[[Node: model/Placeholder_1 = Placeholder[dtype=DT_INT32, shape=[200], _device="/job:localhost/replica:0/task:0/gpu:0"]()]]
[[Node: model_1/total_loss/_329 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/cpu:0", send_device="/job:localhost/replica:0/task:0/gpu:0", send_device_incarnation=1, tensor_name="edge_1326_model_1/total_loss", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"]()]]
Caused by op u'model/Placeholder_1', defined at:
File "main.py", line 789, in <module>
fold_idx=fold_idx
File "main.py", line 528, in main_seq
num_classes=NUM_CLASSES
File "/home/akara/Workspace/project/model.py", line 62, in __init__
shape=[batch_size*seq_length, ]
File "/home/akara/miniconda2/envs/tensorflow/lib/python2.7/site-packages/tensorflow/python/ops/array_ops.py", line 895, in placeholder
name=name)
File "/home/akara/miniconda2/envs/tensorflow/lib/python2.7/site-packages/tensorflow/python/ops/gen_array_ops.py", line 1238, in _placeholder
name=name)
File "/home/akara/miniconda2/envs/tensorflow/lib/python2.7/site-packages/tensorflow/python/ops/op_def_library.py", line 704, in apply_op
op_def=op_def)
File "/home/akara/miniconda2/envs/tensorflow/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 2260, in create_op
original_op=self._default_original_op, op_def=op_def)
File "/home/akara/miniconda2/envs/tensorflow/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 1230, in __init__
self._traceback = _extract_stack()
If when I re-ran it several times, it reported the same error, but with different placeholders:
For is_train placeholder
...
tensorflow.python.framework.errors.InvalidArgumentError: You must feed a value for placeholder tensor 'model/Placeholder_2' with dtype bool
[[Node: model/Placeholder_2 = Placeholder[dtype=DT_BOOL, shape=[], _device="/job:localhost/replica:0/task:0/gpu:0"]()]]
[[Node: _recv_model_1/Placeholder_0/_349 = _Send[T=DT_FLOAT, client_terminated=false, recv_device="/job:localhost/replica:0/task:0/gpu:0", send_device="/job:localhost/replica:0/task:0/cpu:0", send_device_incarnation=1, tensor_name="edge_2567__recv_model_1/Placeholder_0", _device="/job:localhost/replica:0/task:0/cpu:0"](_recv_model_1/Placeholder_0)]]
Caused by op u'model/Placeholder_2', defined at:
...
For inputs placeholder
...
tensorflow.python.framework.errors.InvalidArgumentError: You must feed a value for placeholder tensor 'model/Placeholder' with dtype float and shape [200,7680,1,1]
[[Node: model/Placeholder = Placeholder[dtype=DT_FLOAT, shape=[200,7680,1,1], _device="/job:localhost/replica:0/task:0/gpu:0"]()]]
[[Node: model_1/ArgMax/_354 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/cpu:0", send_device="/job:localhost/replica:0/task:0/gpu:0", send_device_incarnation=1, tensor_name="edge_105_model_1/ArgMax", tensor_type=DT_INT64, _device="/job:localhost/replica:0/task:0/cpu:0"]()]]
Caused by op u'model/Placeholder', defined at:
...
These scripts WORK PERFECTLY if I replace network_valid with network in the replace_location. So I am not sure why I got the error when I used network_valid.
Update
I also found that the error is from the placeholder of the model variable scope (i.e., network for training), not model_1 scope (i.e., network_valid for validation), even though the scripts stop running during validation.
I renamed the placeholders and move them to be outside the Model class. Then I view my graph in the Tensorboard (Graph from Tensorboard). It seems that there is no link from the network placeholders to the network_valid, and similar to the network_valid placeholders.