Sharing the same variables with different placeholders - python

I would like to write scripts to build a Tensorflow graph that can be used with different tf.placeholder with different sizes. I got InvalidArgumentError when I ran the following scripts:
# Loading training and validation data
# train data are in x_train, y_train
# validation data are in x_valid, y_valid
...
with tf.variable_scope("model", reuse=None):
# Build a Graph that computes the logits
network = Model(
batch_size=BATCH_SIZE,
input_dims=INPUT_DIMS,
seq_length=SEQ_LENGTH,
num_classes=NUM_CLASSES !!!!! ----> line 528, in main_seq
)
# Calculate loss
network.loss_op = softmax_seq_loss_by_example(
logits=network.model,
labels=network.targets,
batch_size=network.batch_size,
seq_length=network.seq_length
)
# Calculate predictions
network.pred_op = tf.argmax(network.model, 1)
with tf.variable_scope("model", reuse=True):
# Build a Graph that computes the logits
network_valid = Model(
batch_size=1,
input_dims=INPUT_DIMS,
seq_length=1,
num_classes=NUM_CLASSES
)
# Calculate loss
network_valid.loss_op = softmax_seq_loss_by_example(
logits=network_valid.model,
labels=network_valid.targets,
batch_size=network_valid.batch_size,
seq_length=network_valid.seq_length
)
# Calculate predictions
network_valid.pred_op = tf.argmax(network_valid.model, 1)
with tf.Session() as sess:
# Initialize variables in the graph
sess.run(tf.initialize_all_variables())
for epoch in xrange(n_epochs):
# Update parameters and compute loss of training set
y_true_train, y_pred_train, train_loss, train_duration = \
run_seq_epoch(
sess=sess, network=network,
inputs=x_train, targets=y_train,
train_op=train_op,
is_train=True
)
# Evaluate the model on the validation set
y_true_val, y_pred_val, valid_loss, valid_duration = \
run_seq_epoch(
sess=sess, network=network_valid, **** ----> replace_location
inputs=x_valid, targets=x_valid,
train_op=tf.no_op(),
is_train=False !!!!! ----> line 644, in main_seq
)
...
The function run_seq_epoch and the class Model is shown below:
class Model(object):
def __init__(self, batch_size, input_dims, seq_length, num_classes):
self.batch_size = batch_size
self.input_dims = input_dims
self.seq_length = seq_length
self.num_classes = num_classes
# Operations to compute loss and prediction, which will be assigned
# after the initialization
self.loss_op = None
self.pred_op = None
# Placeholder for input data
self.inputs = tf.placeholder(
tf.float32,
shape=[batch_size*seq_length, input_dims, 1, 1]
)
self.targets = tf.placeholder(
tf.int32,
shape=[batch_size*seq_length, ]
)
self.is_train = tf.placeholder(tf.bool)
# Use the defined placeholder above create a model
...
def run_seq_epoch(sess, network, inputs, targets, train_op, is_train):
...
# Initial state for LSTM
state = network.initial_state.eval()
for x_batch, y_batch in iterate_batch_seq_minibatches(inputs,
targets,
network.batch_size,
network.seq_length):
feed_dict = {
network.inputs: x_batch,
network.targets: y_batch,
network.initial_state: state,
network.is_train: is_train
}
_, loss_value, y_pred, state = sess.run(
[train_op, network.loss_op, network.pred_op, network.final_state],
feed_dict=feed_dict !!!!! ----> line 471, in run_seq_epoch
)
...
return _y_true, y_pred, loss, duration
The followings are the name and shape of the placeholders in the network and network_valid, and the shape and the type of x_batch and y_batch for training and validation:
# Placeholder of network.inputs and network.targets
model/Placeholder:0: (200, 7680, 1, 1)
model/Placeholder_1:0: (200,)
# x_batch and y_batch for training
x_batch.shape: (200, 7860, 1, 1), dtype=float32
y_batch.shape: (200, ), dtype=int32
# Placeholder of network_valid.inputs and network_valid.targets
model_1/Placeholder:0: (1, 7680, 1, 1)
model_1/Placeholder_1:0: (1,)
# x_batch and y_batch for validation
x_batch.shape: (1, 7860, 1, 1), type=float32
y_batch.shape: (1, ), type=int32
However, I got tensorflow.python.framework.errors.InvalidArgumentError for targets placeholder when I ran the scripts:
Traceback (most recent call last):
File "main.py", line 789, in <module>
fold_idx=fold_idx
File "main.py", line 644, in main_seq
is_train=False
File "main.py", line 471, in run_seq_epoch
feed_dict=feed_dict
File "/home/akara/miniconda2/envs/tensorflow/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 372, in run
run_metadata_ptr)
File "/home/akara/miniconda2/envs/tensorflow/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 636, in _run
feed_dict_string, options, run_metadata)
File "/home/akara/miniconda2/envs/tensorflow/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 708, in _do_run
target_list, options, run_metadata)
File "/home/akara/miniconda2/envs/tensorflow/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 728, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors.InvalidArgumentError: You must feed a value for placeholder tensor 'model/Placeholder_1' with dtype int32 and shape [200]
[[Node: model/Placeholder_1 = Placeholder[dtype=DT_INT32, shape=[200], _device="/job:localhost/replica:0/task:0/gpu:0"]()]]
[[Node: model_1/total_loss/_329 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/cpu:0", send_device="/job:localhost/replica:0/task:0/gpu:0", send_device_incarnation=1, tensor_name="edge_1326_model_1/total_loss", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"]()]]
Caused by op u'model/Placeholder_1', defined at:
File "main.py", line 789, in <module>
fold_idx=fold_idx
File "main.py", line 528, in main_seq
num_classes=NUM_CLASSES
File "/home/akara/Workspace/project/model.py", line 62, in __init__
shape=[batch_size*seq_length, ]
File "/home/akara/miniconda2/envs/tensorflow/lib/python2.7/site-packages/tensorflow/python/ops/array_ops.py", line 895, in placeholder
name=name)
File "/home/akara/miniconda2/envs/tensorflow/lib/python2.7/site-packages/tensorflow/python/ops/gen_array_ops.py", line 1238, in _placeholder
name=name)
File "/home/akara/miniconda2/envs/tensorflow/lib/python2.7/site-packages/tensorflow/python/ops/op_def_library.py", line 704, in apply_op
op_def=op_def)
File "/home/akara/miniconda2/envs/tensorflow/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 2260, in create_op
original_op=self._default_original_op, op_def=op_def)
File "/home/akara/miniconda2/envs/tensorflow/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 1230, in __init__
self._traceback = _extract_stack()
If when I re-ran it several times, it reported the same error, but with different placeholders:
For is_train placeholder
...
tensorflow.python.framework.errors.InvalidArgumentError: You must feed a value for placeholder tensor 'model/Placeholder_2' with dtype bool
[[Node: model/Placeholder_2 = Placeholder[dtype=DT_BOOL, shape=[], _device="/job:localhost/replica:0/task:0/gpu:0"]()]]
[[Node: _recv_model_1/Placeholder_0/_349 = _Send[T=DT_FLOAT, client_terminated=false, recv_device="/job:localhost/replica:0/task:0/gpu:0", send_device="/job:localhost/replica:0/task:0/cpu:0", send_device_incarnation=1, tensor_name="edge_2567__recv_model_1/Placeholder_0", _device="/job:localhost/replica:0/task:0/cpu:0"](_recv_model_1/Placeholder_0)]]
Caused by op u'model/Placeholder_2', defined at:
...
For inputs placeholder
...
tensorflow.python.framework.errors.InvalidArgumentError: You must feed a value for placeholder tensor 'model/Placeholder' with dtype float and shape [200,7680,1,1]
[[Node: model/Placeholder = Placeholder[dtype=DT_FLOAT, shape=[200,7680,1,1], _device="/job:localhost/replica:0/task:0/gpu:0"]()]]
[[Node: model_1/ArgMax/_354 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/cpu:0", send_device="/job:localhost/replica:0/task:0/gpu:0", send_device_incarnation=1, tensor_name="edge_105_model_1/ArgMax", tensor_type=DT_INT64, _device="/job:localhost/replica:0/task:0/cpu:0"]()]]
Caused by op u'model/Placeholder', defined at:
...
These scripts WORK PERFECTLY if I replace network_valid with network in the replace_location. So I am not sure why I got the error when I used network_valid.
Update
I also found that the error is from the placeholder of the model variable scope (i.e., network for training), not model_1 scope (i.e., network_valid for validation), even though the scripts stop running during validation.
I renamed the placeholders and move them to be outside the Model class. Then I view my graph in the Tensorboard (Graph from Tensorboard). It seems that there is no link from the network placeholders to the network_valid, and similar to the network_valid placeholders.

Related

read and decode tfrecords error tensorflow

i convert my own grayscale dataset with 60*60 pixel to tfrecords with write_tfrecord() but when i want to read and decode them it causes error . what is the problem ?
train_tfrecord_addr = './data/train.tfrecords'
test_tfrecord_addr = './data/test.tfrecords'
n_train_samples = 43990
n_test_samples = 12500
batch_size = 32 # number of batches in each iteration
keep_prob = 0.5 # Dropout, probability to keep units
n_epochs = 25
tfrecords_filename = './data/test.tfrecords'
def _bytes_feature(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
def _int64_feature(value):
return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
def _float32_feature(value):
return tf.train.Feature(float_list=tf.train.FloatList(value=value))
def write_tfrecord(path):
images_addrs, images_labels = get_lable_and_image(path=path)
filename_pairs = list(zip(images_addrs, images_labels))
print(filename_pairs)
# to shuffle data
shuffle(filename_pairs)
writer = tf.python_io.TFRecordWriter(tfrecords_filename)
for img_path, label in filename_pairs:
# in this case all images are png with (32, 32) shape
img = np.array(Image.open(img_path)) # (32, 32) uint8
img_raw = img.tostring()
label_raw = label.tostring()
example = tf.train.Example(features=tf.train.Features(feature={
'image_raw': _bytes_feature(img_raw),
'label_raw': _bytes_feature(label_raw),
}))
writer.write(example.SerializeToString())
writer.close()
decode method ...
def read_and_decode(filename, batch_size, num_epochs, num_samples):
filename_queue = tf.train.string_input_producer([train_tfrecord_addr],
num_epochs=num_epochs)
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
features = tf.parse_single_example(
serialized_example,
# Defaults are not specified since both keys are required.
features={
'image_raw': tf.FixedLenFeature([], tf.string),
'label_raw': tf.FixedLenFeature([], tf.string),
})
# Convert from a scalar string tensor to a uint8 tensor
image_raw = tf.decode_raw(features['image_raw'], tf.uint8)
image_resized = tf.reshape(image_raw, [60 * 60])
# Convert from [0, 255] -> [-0.5, 0.5] floats.
image_resized = tf.cast(image_resized, tf.float32) * (1. / 255) - 0.5
# Convert from a scalar string tensor to a uint8 tensor
label_raw = tf.decode_raw(features['label_raw'], tf.uint8)
label_resized = tf.reshape(label_raw, [2])
images, labels = tf.train.batch([image_resized, label_resized],
batch_size=batch_size,
capacity=num_samples,
num_threads=2, )
return images, labels
this is the main code that feed convolutions data .
def run_training():
"""Train ShapeNet for a number of steps."""
# Tell TensorFlow that the model will be built into the default Graph.
with tf.Graph().as_default():
# Input train images and labels.
train_images, train_labels = read_and_decode(filename=train_tfrecord_addr,
batch_size=batch_size,
num_epochs=n_epochs,
num_samples=n_train_samples)
# Input test images and labels.
# define batch_size = all test samples
test_images, test_labels = read_and_decode(filename=test_tfrecord_addr,
batch_size=n_test_samples,
num_epochs=n_epochs,
num_samples=n_test_samples)
# define placeholder for input images and labels
X = tf.placeholder(tf.float32, [None, 60 * 60])
Y = tf.placeholder(tf.float32, [None, 2])
# Build a Graph that computes predictions from the inference model.
prediction = convolutional_network_model(X)
# Backpropagation
# measure of error of our model
# this needs to be minimised by adjusting W and b
cross_entropy = -tf.reduce_sum(Y * tf.log(prediction))
# define training step which minimises cross entropy
train_op =tf.train.GradientDescentOptimizer(
learning_rate=0.001)
.minimize(cross_entropy)
# argmax gives index of highest entry in vector (1st axis of 1D tensor)
correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(Y, 1))
# get mean of all entries in correct prediction, the higher the better
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
# The op for initializing the variables.
init_op = tf.group(tf.global_variables_initializer(),
tf.local_variables_initializer())
st = time.time()
# Create a session for running operations in the Graph.
with tf.Session() as sess:
# Initialize the variables (the trained variables and the
# epoch counter).
sess.run(init_op)
# Start input enqueue threads.
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
for epoch in range(n_epochs):
for itr in range(n_train_samples // batch_size):
# fetch the batch train images and labels
batch_x, batch_y = sess.run([train_images, train_labels])
sess.run([train_op], feed_dict={X: batch_x, Y: batch_y})
# fetch whole test images and labels
batch_x, batch_y = sess.run([test_images, test_labels])
# feed the model with all test images and labels
acc, _ = sess.run([accuracy, train_op],
feed_dict={X: batch_x, Y: batch_y})
print('epoch %d/%d: , accuracy = %.3f'
% (epoch, n_epochs, acc))
# When done, ask the threads to stop.
coord.request_stop()
# Wait for threads to finish.
coord.join(threads)
et = time.time()
duration = et - st
print(duration)
this is the error . i also convert my dataset in binary format but again i see the same error
2018-02-13 13:53:02.401813: I C:\tf_jenkins\workspace\rel-
win\M\windows\PY\35\tensorflow\core\platform\cpu_feature_guard.cc:137] Your
CPU supports instructions that this TensorFlow binary was not compiled to
use: AVX
Traceback (most recent call last):
File "C:\Program Files\Python35\lib\site-
packages\tensorflow\python\client\session.py", line 1350, in _do_call
return fn(*args) File "C:\Program Files\Python35\lib\site-
packages\tensorflow\python\client\session.py", line 1329, in _run_fn
status, run_metadata)
File "C:\Program Files\Python35\lib\site-
packages\tensorflow\python\framework\errors_impl.py", line 473, in __exit__
c_api.TF_GetCode(self.status.status))
tensorflow.python.framework.errors_impl.OutOfRangeError: FIFOQueue
'_1_batch/fifo_queue' is closed and has insufficient elements (requested 32,
current size 0)
[[Node: batch = QueueDequeueManyV2[component_types=[DT_FLOAT, DT_UINT8],
timeout_ms=-1, _device="/job:localhost/replica:0/task:0/device:CPU:0"]
(batch/fifo_queue, batch/n)]]
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "F:/project/python/MathNet/mathnet.py", line 232, in <module>
run_training()
File "F:/project/python/MathNet/mathnet.py", line 207, in run_training
batch_x, batch_y = sess.run([train_images, train_labels])
File "C:\Program Files\Python35\lib\site-
packages\tensorflow\python\client\session.py", line 895, in run
run_metadata_ptr)
File "C:\Program Files\Python35\lib\site-
packages\tensorflow\python\client\session.py", line 1128, in _run
feed_dict_tensor, options, run_metadata)
File "C:\Program Files\Python35\lib\site-
packages\tensorflow\python\client\session.py", line 1344, in _do_run
options, run_metadata)
File "C:\Program Files\Python35\lib\site-
packages\tensorflow\python\client\session.py", line 1363, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.OutOfRangeError: FIFOQueue
'_1_batch/fifo_queue' is closed and has insufficient elements (requested
32, current size 0)
[[Node: batch = QueueDequeueManyV2[component_types=[DT_FLOAT,
DT_UINT8], timeout_ms=-1,
_device="/job:localhost/replica:0/task:0/device:CPU:0"](batch/fifo_queue,
batch/n)]]
Caused by op 'batch', defined at:
File "F:/project/python/MathNet/mathnet.py", line 232, in <module>
run_training()
File "F:/project/python/MathNet/mathnet.py", line 159, in run_training
num_samples=n_train_samples)
File "F:/project/python/MathNet/mathnet.py", line 105, in
read_and_decode
num_threads=2, )
File "C:\Program Files\Python35\lib\site-
packages\tensorflow\python\training\input.py", line 979, in batch
name=name)
File "C:\Program Files\Python35\lib\site-
packages\tensorflow\python\training\input.py", line 754, in _batch
dequeued = queue.dequeue_many(batch_size, name=name)
File "C:\Program Files\Python35\lib\site-
packages\tensorflow\python\ops\data_flow_ops.py", line 475, in dequeue_many
self._queue_ref, n=n, component_types=self._dtypes, name=name)
File "C:\Program Files\Python35\lib\site-
packages\tensorflow\python\ops\gen_data_flow_ops.py", line 2764, in
_queue_dequeue_many_v2
component_types=component_types, timeout_ms=timeout_ms, name=name)
File "C:\Program Files\Python35\lib\site-
packages\tensorflow\python\framework\op_def_library.py", line 787, in
_apply_op_helper
op_def=op_def)
File "C:\Program Files\Python35\lib\site-
packages\tensorflow\python\framework\ops.py", line 3160, in create_op
op_def=op_def)
File "C:\Program Files\Python35\lib\site-
packages\tensorflow\python\framework\ops.py", line 1625, in __init__
self._traceback = self._graph._extract_stack() # pylint: disable=protected-
access
OutOfRangeError (see above for traceback): FIFOQueue '_1_batch/fifo_queue'
is closed and has insufficient elements (requested 32, current size 0)
[[Node: batch = QueueDequeueManyV2[component_types=[DT_FLOAT, DT_UINT8],
timeout_ms=-1, _device="/job:localhost/replica:0/task:0/device:CPU:0"]
(batch/fifo_queue, batch/n)]]

Is it possible for Tensorflow graph to run outside of session

Could someone please explain the following situation:
I've created a simple convolutional neural network using Tensorflow. I'm using a class and I've created my graph in the constructor. I then train the network using a train method I've written. I'm also using queues and the feed-in mechanism. This is an excerpt from the code:
class Super_res:
'Create a CNN model which augments the resolution of an image'
# object initialization (python) - constructor
def __init__(self, input, output, batch_size, record_size, weights, biases): # input (neurons), output (no. neurons), batch_size (batches to process before registering delta), record_size ()
print("Initializing object")
self.input = input
self.output = output
self.batch_size = batch_size
self.record_size = record_size
self.weights = weights
self.biases = biases
# initialize data batch readers. Parameters: [Path], record_size, batch_size
self.data_batch = data_reader3.batch_generator([DATA_PATH_OPTICAL_TRAIN],self.record_size, self.batch_size) # train set
self.data_batch_eval = data_reader3.batch_generator([DATA_PATH_EVAL],self.record_size, self.batch_size) # eval set
# this returns a [batch_size, 2, n_input] tensor. The second dimension is comprised of the low-res image and the GT high-res image. Each of these images is comprised of n_input entries (flat vector)
self.data1 = tf.placeholder_with_default(tf.transpose(self.data_batch, [1, 0, 2]), [2, batch_size, n_input]) # one for optical and another for GT image [batch_size, n_input] each
self.keep_prob = tf.placeholder(tf.float32) #dropout (keep probability) - this placeholder can accept a Tensor of arbitrary shape
# create network model
self.pred = self.cnn_model(self.data1[0], self.weights, self.biases) # self.data1[0] is the low-res data
def train(self):
#self.low_res = self.data1[0]
#self.high_res = self.data1[1]
# define loss and optimizer
#self.cost = tf.reduce_mean(tf.pow(self.data1[1] - self.pred, 2))
#self.optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(self.cost)
# Initializing the variables
init = tf.global_variables_initializer()
# Initialize session
with tf.Session() as sess:
sess.run(init)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
step = 1
print("Entering training")
# Keep training until reach max iterations
while step * batch_size < training_iters:
#_, c = sess.run([self.optimizer, self.cost])
conv_result = sess.run(self.pred)
print(conv_result)
#data2 = self.data1[0]
#print(data2)
if step % display_step == 0:
print("Step:", '%04d' % (step+1))
# "cost=", c)
step = step + 1
coord.request_stop()
coord.join(threads)
When I run this code, I get the following error output:
Entering training
Traceback (most recent call last):
File "C:\Users\divin\Miniconda3\envs\tensorflow_gpu\lib\site-packages \tensorflow\python\client\session.py", line 1139, in _do_call
return fn(*args)
File "C:\Users\divin\Miniconda3\envs\tensorflow_gpu\lib\site-packages \tensorflow\python\client\session.py", line 1121, in _run_fn
status, run_metadata)
File "C:\Users\divin\Miniconda3\envs\tensorflow_gpu\lib\contextlib.py", line 66, in __exit__
next(self.gen)
File "C:\Users\divin\Miniconda3\envs\tensorflow_gpu\lib\site-packages\tensorflow\python\framework\errors_impl.py", line 466, in raise_exception_on_not_ok_status
pywrap_tensorflow.TF_GetCode(status))
tensorflow.python.framework.errors_impl.OutOfRangeError: RandomShuffleQueue '_2_shuffle_batch/random_shuffle_queue' is closed and has insufficient elements (requested 512, current size 0)
[[Node: shuffle_batch = QueueDequeueManyV2[component_types= [DT_FLOAT], timeout_ms=-1, _device="/job:localhost/replica:0/task:0/cpu:0"] (shuffle_batch/random_shuffle_queue, shuffle_batch/n)]]
[[Node: shuffle_batch/_25 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/gpu:0", send_device="/job:localhost/replica:0/task:0/cpu:0", send_device_incarnation=1, tensor_name="edge_5_shuffle_batch", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/gpu:0"]()]]
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "super_res_class.py", line 137, in <module>
p.train()
File "super_res_class.py", line 106, in train
conv_result = sess.run(self.pred)
File "C:\Users\divin\Miniconda3\envs\tensorflow_gpu\lib\site-packages \tensorflow\python\client\session.py", line 789, in run
run_metadata_ptr)
File "C:\Users\divin\Miniconda3\envs\tensorflow_gpu\lib\site-packages \tensorflow\python\client\session.py", line 997, in _run
feed_dict_string, options, run_metadata)
File "C:\Users\divin\Miniconda3\envs\tensorflow_gpu\lib\site-packages \tensorflow\python\client\session.py", line 1132, in _do_run
target_list, options, run_metadata)
File "C:\Users\divin\Miniconda3\envs\tensorflow_gpu\lib\site-packages \tensorflow\python\client\session.py", line 1152, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.OutOfRangeError: RandomShuffleQueue '_2_shuffle_batch/random_shuffle_queue' is closed and has insufficient elements (requested 512, current size 0)
[[Node: shuffle_batch = QueueDequeueManyV2[component_types= [DT_FLOAT], timeout_ms=-1, _device="/job:localhost/replica:0/task:0/cpu:0"] (shuffle_batch/random_shuffle_queue, shuffle_batch/n)]]
[[Node: shuffle_batch/_25 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/gpu:0", send_device="/job:localhost/replica:0/task:0/cpu:0", send_device_incarnation=1, tensor_name="edge_5_shuffle_batch", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/gpu:0"]()]]
Caused by op 'shuffle_batch', defined at:
File "super_res_class.py", line 136, in <module>
p = Super_res(1024,1024,512,record_size, weights, biases) # params (n_input, n_output, batch_size)
File "super_res_class.py", line 50, in __init__
self.data_batch = data_reader3.batch_generator([DATA_PATH_OPTICAL_TRAIN],self.record_size, self.batch_size) # train set
File "E:\google_drive\Doctorate\matlab code\Tensorflow\doctorate_CNN\dong_recreation\data_reader3.py", line 156, in batch_generator
capacity=capacity, min_after_dequeue=min_after_dequeue)
File "C:\Users\divin\Miniconda3\envs\tensorflow_gpu\lib\site-packages\tensorflow\python\training\input.py", line 1217, in shuffle_batch
name=name)
File "C:\Users\divin\Miniconda3\envs\tensorflow_gpu\lib\site-packages\tensorflow\python\training\input.py", line 788, in _shuffle_batch
dequeued = queue.dequeue_many(batch_size, name=name)
File "C:\Users\divin\Miniconda3\envs\tensorflow_gpu\lib\site-packages \tensorflow\python\ops\data_flow_ops.py", line 457, in dequeue_many
self._queue_ref, n=n, component_types=self._dtypes, name=name)
File "C:\Users\divin\Miniconda3\envs\tensorflow_gpu\lib\site-packages \tensorflow\python\ops\gen_data_flow_ops.py", line 946, in _queue_dequeue_many_v2
timeout_ms=timeout_ms, name=name)
File "C:\Users\divin\Miniconda3\envs\tensorflow_gpu\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 767, in apply_op
op_def=op_def)
File "C:\Users\divin\Miniconda3\envs\tensorflow_gpu\lib\site-packages\tensorflow\python\framework\ops.py", line 2506, in create_op
original_op=self._default_original_op, op_def=op_def)
File "C:\Users\divin\Miniconda3\envs\tensorflow_gpu\lib\site-packages\tensorflow\python\framework\ops.py", line 1269, in __init__
self._traceback = _extract_stack()
OutOfRangeError (see above for traceback): RandomShuffleQueue '_2_shuffle_batch/random_shuffle_queue' is closed and has insufficient elements (requested 512, current size 0)
[[Node: shuffle_batch = QueueDequeueManyV2[component_types=[DT_FLOAT], timeout_ms=-1, _device="/job:localhost/replica:0/task:0/cpu:0"](shuffle_batch/random_shuffle_queue, shuffle_batch/n)]]
[[Node: shuffle_batch/_25 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/gpu:0", send_device="/job:localhost/replica:0/task:0/cpu:0", send_device_incarnation=1, tensor_name="edge_5_shuffle_batch", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/gpu:0"]()]]
(tensorflow_gpu) E:\google_drive\Doctorate\matlab code\Tensorflow\doctorate_CNN\dong_recreation>
When I remove the sess.run() from my pred output, the code seems to operate normally.
def train(self):
#self.low_res = self.data1[0]
#self.high_res = self.data1[1]
# define loss and optimizer
#self.cost = tf.reduce_mean(tf.pow(self.data1[1] - self.pred, 2))
#self.optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(self.cost)
# Initializing the variables
init = tf.global_variables_initializer()
# Initialize session
with tf.Session() as sess:
sess.run(init)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
step = 1
print("Entering training")
# Keep training until reach max iterations
while step * batch_size < training_iters:
#_, c = sess.run([self.optimizer, self.cost])
conv_result = self.pred
print(conv_result)
#data2 = self.data1[0]
#print(data2)
if step % display_step == 0:
print("Step:", '%04d' % (step+1))
# "cost=", c)
step = step + 1
coord.request_stop()
coord.join(threads)
Could someone please explain this to me? Normally, the graph is only evaluated when run under a session! What gives here?
Just saying conv_result = self.pred won't do anything -- you need, indeed, to do sess.run(self.pred) to get it to execute. The errors you're getting are something else about your model. As they say, your InputProducer has an empty queue. With the information you've given it can't be diagnosed, but I would search further on the site for why your InputProducer isn't filling / has zero size.

How do I store and rebuild and dictionary of weights in tensorflow

When training I store my weights in a dictionary of tensorflow-variables. I pass that dictionary of weights to a 'model'-function together with some data to get my desired output.
After training, I would like to store that dictionary in a file in such a way that I can recreate it. That way I can apply the learned weights by simply passing the dictionary of weights together with the new data to the same model function.
According to the documentation, simply passing the dictionary of weights to a saver should save those weights under the correct names. Then I should be able to create the same dictionary in the application function and then restore the saved values. However if I do this, I get an 'values are uninitialized'-error. Can anyone help me find what I am doing wrong?
Minimal self-contained code example and corresponding error:
import tensorflow as tf
import numpy as np
# first train a linear model on random vectors of length 5 and store the trained parameters.
# Then load those parameters and try to apply them to a new vector.
def run():
train_model()
apply_model()
def train_model():
# create random training data: 100 vectors of length 5 for both input and output.
train_data = np.random.random((100,5))
train_labels = np.random.random((100,5))
train_data_node = tf.placeholder(tf.float32, shape=(5), name="train_data_node")
train_labels_node = tf.placeholder(tf.float32, shape=(5), name="train_labels_node")
weights = defineWeights()
prediction = model(train_data_node, weights)
loss = tf.norm(prediction - train_labels_node)
train_op = tf.train.AdagradOptimizer(learning_rate=1).minimize(loss)
saver = tf.train.Saver(weights)
sess = tf.Session()
sess.run(tf.global_variables_initializer())
# train for 50 epochs on all 100 training examples, with a batchsize of 1.
for _ in range(50):
for i in range(100):
batch_data = train_data[i,:]
batch_labels = train_labels[i,:]
feed_dict = {train_data_node: batch_data, train_labels_node: batch_labels}
sess.run([train_op, loss, weights], feed_dict=feed_dict)
saver.save(sess, '/results/weights')
def apply_model():
sess = tf.Session()
weights = defineWeights()
new_saver = tf.train.import_meta_graph('/results/weights.meta')
new_saver.restore(sess, tf.train.latest_checkpoint('/results'))
print(model(np.random.random(5).astype(np.float32), weights).eval(session=sess))
def model(data, weights):
# multiply the matrix weights['a'] with the vector data
l1 = tf.matmul(tf.expand_dims(data,0), weights['a'])
l1 = l1 + weights['b']
return l1
def defineWeights():
weights = {
'a': tf.Variable(tf.random_normal([5, 5],
stddev=0.01,
dtype = tf.float32),
name = 'a'),
'b': tf.Variable(tf.random_normal([5]), name = 'b'),
}
return weights
Calling the 'run()' function in the code above gives the following error:
Traceback (most recent call last):
File "<string>", line 1, in <module>
File "/usr/local/lib/python2.7/dist-packages/myFolder/example.py", line 8, in run
apply_model()
File "/usr/local/lib/python2.7/dist-packages/myFolder/example.py", line 50, in apply_model
print(model(np.random.random(5).astype(np.float32), weights).eval(session=sess))
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 569, in eval
return _eval_using_default_session(self, feed_dict, self.graph, session)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 3741, in _eval_using_default_session
return session.run(tensors, feed_dict)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 786, in run
run_metadata_ptr)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 994, in _run
feed_dict_string, options, run_metadata)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 1044, in _do_run
target_list, options, run_metadata)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 1064, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.FailedPreconditionError: Attempting to use uninitialized value a_1
[[Node: a_1/read = Identity[T=DT_FLOAT, _class=["loc:#a_1"], _device="/job:localhost/replica:0/task:0/gpu:0"](a_1)]]
[[Node: add_2/_5 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/cpu:0", send_device="/job:localhost/replica:0/task:0/gpu:0", send_device_incarnation=1, tensor_name="edge_7_add_2", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"]()]]
Caused by op u'a_1/read', defined at:
File "<string>", line 1, in <module>
File "/usr/local/lib/python2.7/dist-packages/myFolder/example.py", line 8, in run
apply_model()
File "/usr/local/lib/python2.7/dist-packages/myFolder/example.py", line 45, in apply_model
weights = defineWeights()
File "/usr/local/lib/python2.7/dist-packages/myFolder/example.py", line 63, in defineWeights
name = 'a'),
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/variables.py", line 197, in __init__
expected_shape=expected_shape)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/variables.py", line 316, in _init_from_args
self._snapshot = array_ops.identity(self._variable, name="read")
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/gen_array_ops.py", line 1338, in identity
result = _op_def_lib.apply_op("Identity", input=input, name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/op_def_library.py", line 768, in apply_op
op_def=op_def)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 2336, in create_op
original_op=self._default_original_op, op_def=op_def)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 1228, in __init__
self._traceback = _extract_stack()
FailedPreconditionError (see above for traceback): Attempting to use uninitialized value a_1
[[Node: a_1/read = Identity[T=DT_FLOAT, _class=["loc:#a_1"], _device="/job:localhost/replica:0/task:0/gpu:0"](a_1)]]
[[Node: add_2/_5 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/cpu:0", send_device="/job:localhost/replica:0/task:0/gpu:0", send_device_incarnation=1, tensor_name="edge_7_add_2", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"]()]]
I edited your code to make it work - One possible way ! Check it.
import tensorflow as tf
import numpy as np
# first train a linear model on random vectors of length 5 and store the trained parameters.
# Then load those parameters and try to apply them to a new vector.
def run():
train_model()
apply_model()
def train_model():
# create random training data: 100 vectors of length 5 for both input and output.
train_data = np.random.random((100,5))
train_labels = np.random.random((100,5))
train_data_node = tf.placeholder(tf.float32, shape=(5), name="train_data_node")
train_labels_node = tf.placeholder(tf.float32, shape=(5), name="train_labels_node")
weights = defineWeights()
prediction = model(train_data_node, weights)
prediction = tf.identity(prediction, name="prediction")
loss = tf.norm(prediction - train_labels_node)
train_op = tf.train.AdagradOptimizer(learning_rate=1).minimize(loss)
saver = tf.train.Saver()
sess = tf.Session()
sess.run(tf.global_variables_initializer())
# train for 50 epochs on all 100 training examples, with a batchsize of 1.
for _ in range(50):
for i in range(100):
batch_data = train_data[i,:]
batch_labels = train_labels[i,:]
feed_dict = {train_data_node: batch_data, train_labels_node: batch_labels}
sess.run([train_op, loss, weights], feed_dict=feed_dict)
saver.save(sess, 'results/model')
print("Trained Weights")
print(sess.run(weights))
def apply_model():
sess = tf.Session()
new_saver = tf.train.import_meta_graph('results/model.meta')
new_saver.restore(sess, tf.train.latest_checkpoint('results'))
print("Loaded Weights")
print(sess.run(['a:0','b:0']))
prediction = tf.get_default_graph().get_tensor_by_name("prediction:0")
train_data_node = tf.get_default_graph().get_tensor_by_name("train_data_node:0")
test_data = np.random.random(5).astype(np.float32)
pred = sess.run([prediction],feed_dict={train_data_node:test_data})
print("Prediction")
print(pred)
def model(data, weights):
# multiply the matrix weights['a'] with the vector data
l1 = tf.matmul(tf.expand_dims(data,0), weights['a'])
l1 = l1 + weights['b']
return l1
def defineWeights():
weights = {
'a': tf.Variable(tf.random_normal([5, 5],
stddev=0.01,
dtype = tf.float32),
name = 'a'),
'b': tf.Variable(tf.random_normal([5]), name = 'b'),
}
return weights
def main(_):
run()
if __name__ == '__main__':
tf.app.run(main=main)
Output:
Trained Weights
{'a': array([[ 0.01243415, -0.42879951, 0.0174435 , -0.24622701, 0.35309449],
[ 0.03154161, -0.08194152, 0.09223857, -0.15719411, -0.06323836],
[-0.03263358, 0.05096304, 0.1769278 , -0.17564282, 0.04325204],
[-0.17412457, -0.00338688, 0.08468977, -0.06877152, -0.02180972],
[ 0.25160244, -0.19224152, 0.14535131, -0.20594895, -0.03813718]], dtype=float32), 'b': array([ 0.33825615, 0.79861975, 0.30609566, 0.91897982, 0.20577262], dtype=float32)}
I tensorflow/core/common_runtime/gpu/gpu_device.cc:975] Creating TensorFlow device (/gpu:0) -> (device: 0, name: GeForce GTX 1060 6GB, pci bus id: 0000:01:00.0)
Loaded Weights
[array([[ 0.01243415, -0.42879951, 0.0174435 , -0.24622701, 0.35309449],
[ 0.03154161, -0.08194152, 0.09223857, -0.15719411, -0.06323836],
[-0.03263358, 0.05096304, 0.1769278 , -0.17564282, 0.04325204],
[-0.17412457, -0.00338688, 0.08468977, -0.06877152, -0.02180972],
[ 0.25160244, -0.19224152, 0.14535131, -0.20594895, -0.03813718]], dtype=float32), array([ 0.33825615, 0.79861975, 0.30609566, 0.91897982, 0.20577262], dtype=float32)]
Prediction
[array([[ 0.3465074 , 0.42139536, 0.71310139, 0.30854774, 0.32671657]], dtype=float32)]
Explanation:
Name the tensors which you want to access after restoring.
Restore the graph and restore variables that you named - shown in apply_model()
Feed the new test_data into placeholder using feed_dict
Issues:
I tried to use sess.run(tf.global_variables_initializer()) but it is re-initializing variables to new random values. (Using TF 1.0)
I hope this helps !

OutOfRangeError: RandomShuffleQueue

Hi I am trying to run a conv. neural network addapted from MINST2 tutorial in tensorflow. I am having the following error, but i am not sure what is going on:
W tensorflow/core/framework/op_kernel.cc:909] Invalid argument: Shape mismatch in tuple component 0. Expected [784], got [6272]
W tensorflow/core/framework/op_kernel.cc:909] Invalid argument: Shape mismatch in tuple component 0. Expected [784], got [6272]
Traceback (most recent call last):
File "4_Treino_Rede_Neural.py", line 161, in <module>
train_accuracy = accuracy.eval(feed_dict={keep_prob: 1.0})
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 555, in eval
return _eval_using_default_session(self, feed_dict, self.graph, session)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 3498, in _eval_using_default_session
return session.run(tensors, feed_dict)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 372, in run
run_metadata_ptr)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 636, in _run
feed_dict_string, options, run_metadata)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 708, in _do_run
target_list, options, run_metadata)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 728, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors.OutOfRangeError: RandomShuffleQueue '_0_input/shuffle_batch/random_shuffle_queue' is closed and has insufficient elements (requested 100, current size 0)
[[Node: input/shuffle_batch = QueueDequeueMany[_class=["loc:#input/shuffle_batch/random_shuffle_queue"], component_types=[DT_FLOAT, DT_INT32], timeout_ms=-1, _device="/job:localhost/replica:0/task:0/cpu:0"](input/shuffle_batch/random_shuffle_queue, input/shuffle_batch/n)]]
Caused by op u'input/shuffle_batch', defined at:
File "4_Treino_Rede_Neural.py", line 113, in <module>
x, y_ = inputs(train=True, batch_size=FLAGS.batch_size, num_epochs=FLAGS.num_epochs)
File "4_Treino_Rede_Neural.py", line 93, in inputs
min_after_dequeue=1000)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/input.py", line 779, in shuffle_batch
dequeued = queue.dequeue_many(batch_size, name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/data_flow_ops.py", line 400, in dequeue_many
self._queue_ref, n=n, component_types=self._dtypes, name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/gen_data_flow_ops.py", line 465, in _queue_dequeue_many
timeout_ms=timeout_ms, name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/op_def_library.py", line 704, in apply_op
op_def=op_def)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 2260, in create_op
original_op=self._default_original_op, op_def=op_def)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 1230, in __init__
self._traceback = _extract_stack()
My program is:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os.path
import time
import numpy as np
import tensorflow as tf
# Basic model parameters as external flags.
flags = tf.app.flags
FLAGS = flags.FLAGS
flags.DEFINE_integer('num_epochs', 2, 'Number of epochs to run trainer.')
flags.DEFINE_integer('batch_size', 100, 'Batch size.')
flags.DEFINE_string('train_dir', '/root/data', 'Directory with the training data.')
#flags.DEFINE_string('train_dir', '/root/data2', 'Directory with the training data.')
# Constants used for dealing with the files, matches convert_to_records.
TRAIN_FILE = 'train.tfrecords'
VALIDATION_FILE = 'validation.tfrecords'
# Set-up dos pacotes
sess = tf.InteractiveSession()
def read_and_decode(filename_queue):
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
features = tf.parse_single_example(
serialized_example,
# Defaults are not specified since both keys are required.
features={
'image_raw': tf.FixedLenFeature([], tf.string),
'label': tf.FixedLenFeature([], tf.int64),
})
# Convert from a scalar string tensor (whose single string has
# length mnist.IMAGE_PIXELS) to a uint8 tensor with shape
# [mnist.IMAGE_PIXELS].
image = tf.decode_raw(features['image_raw'], tf.uint8)
image.set_shape([784])
# OPTIONAL: Could reshape into a 28x28 image and apply distortions
# here. Since we are not applying any distortions in this
# example, and the next step expects the image to be flattened
# into a vector, we don't bother.
# Convert from [0, 255] -> [-0.5, 0.5] floats.
image = tf.cast(image, tf.float32) * (1. / 255) - 0.5
# Convert label from a scalar uint8 tensor to an int32 scalar.
label = tf.cast(features['label'], tf.int32)
return image, label
def inputs(train, batch_size, num_epochs):
"""Reads input data num_epochs times.
Args:
train: Selects between the training (True) and validation (False) data.
batch_size: Number of examples per returned batch.
num_epochs: Number of times to read the input data, or 0/None to
train forever.
Returns:
A tuple (images, labels), where:
* images is a float tensor with shape [batch_size, 30,26,1]
in the range [-0.5, 0.5].
* labels is an int32 tensor with shape [batch_size] with the true label,
a number in the range [0, char letras).
Note that an tf.train.QueueRunner is added to the graph, which
must be run using e.g. tf.train.start_queue_runners().
"""
if not num_epochs: num_epochs = None
filename = os.path.join(FLAGS.train_dir,
TRAIN_FILE if train else VALIDATION_FILE)
with tf.name_scope('input'):
filename_queue = tf.train.string_input_producer(
[filename], num_epochs=num_epochs)
# Even when reading in multiple threads, share the filename
# queue.
image, label = read_and_decode(filename_queue)
# Shuffle the examples and collect them into batch_size batches.
# (Internally uses a RandomShuffleQueue.)
# We run this in two threads to avoid being a bottleneck.
images, sparse_labels = tf.train.shuffle_batch(
[image, label], batch_size=batch_size, num_threads=2,
capacity=1000 + 3 * batch_size,
# Ensures a minimum amount of shuffling of examples.
min_after_dequeue=1000)
return images, sparse_labels
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME')
#Variaveis
x, y_ = inputs(train=True, batch_size=FLAGS.batch_size, num_epochs=FLAGS.num_epochs)
#onehot_y_ = tf.one_hot(y_, 36, dtype=tf.float32)
#y_ = tf.string_to_number(y_, out_type=tf.int32)
#Layer 1
W_conv1 = weight_variable([5, 5, 1, 32])
b_conv1 = bias_variable([32])
x_image = tf.reshape(x, [-1,28,28,1])
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)
#Layer 2
W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)
#Densely Connected Layer
W_fc1 = weight_variable([7 * 7 * 64, 1024])
b_fc1 = bias_variable([1024])
h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
#Dropout - reduz overfitting
keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
#Readout layer
W_fc2 = weight_variable([1024, 36])
b_fc2 = bias_variable([36])
#y_conv=tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)
y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
#Train and evaluate
#cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y_conv), reduction_indices=[1]))
#cross_entropy = tf.reduce_mean(-tf.reduce_sum(onehot_y_ * tf.log(y_conv), reduction_indices=[1]))
cross_entropy = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(y_conv, y_))
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
sess.run(tf.initialize_all_variables())
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
for i in range(20000):
if i%100 == 0:
train_accuracy = accuracy.eval(feed_dict={keep_prob: 1.0})
print("step %d, training accuracy %g"%(i, train_accuracy))
train_step.run(feed_dict={keep_prob: 0.5})
x, y_ = inputs(train=True, batch_size=2000)
#y_ = tf.string_to_number(y_, out_type=tf.int32)
print("test accuracy %g"%accuracy.eval(feed_dict={keep_prob: 1.0}))
coord.join(threads)
sess.close()
I have tried changing the num_epochs to 10000 and to None but the same error message appears. I am wondering if anyone knows how to solve this.
Thanks
Marcelo
This looks like an issue with your image.set_shape([784]). The error is saying that it was expecting something of size [784] but it got [6272]. I'm semi-familiar with this tutorial and the images should be 28x28 which would give you a size of 784 but maybe there are 6272 images and your dimensions are confused because the first dimension should be the amount of observations and not the size of a single observation? Sorry this isn't a concrete answer but I would start there.

TensorFlow placeholder variable for integer or boolean isn't working

I am using the following code snippet in TensorFlow to conditionally pull data from one source or another:
if __name__ == '__main__':
with tf.device("/gpu:0"):
with tf.Graph().as_default():
with tf.variable_scope("cifar_conv_model"):
is_train = tf.placeholder(tf.int32) # placeholder for whether to pull from train or val data
keep_prob = tf.placeholder(tf.float32) # dropout probability
x, y = tf.cond(tf.equal(is_train, tf.constant(1, dtype=tf.int32)), distorted_inputs, inputs)
output = inference(x, keep_prob)
cost = loss(output, y)
global_step = tf.Variable(0, name='global_step', trainable=False)
train_op = training(cost, global_step)
eval_op = evaluate(output, y)
summary_op = tf.merge_all_summaries()
saver = tf.train.Saver()
summary_writer = tf.train.SummaryWriter("conv_cifar_logs/", graph_def=sess.graph_def)
init_op = tf.initialize_all_variables()
sess = tf.Session()
sess.run(init_op)
tf.train.start_queue_runners(sess=sess)
# Training cycle
for epoch in range(training_epochs):
avg_cost = 0.
total_batch = int(cifar10_input.NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN/batch_size)
# Loop over all batches
for i in range(total_batch):
# Fit training using batch data
_, new_cost = sess.run([train_op, cost], feed_dict={is_train: 1, keep_prob: 0.5})
# Compute average loss
avg_cost += new_cost/total_batch
print "Epoch %d, minibatch %d of %d. Average cost = %0.4f." %(epoch, i, total_batch, avg_cost)
I keep obtaining error vomit as threads spazz out, but the recurring theme is the following error:
InvalidArgumentError: You must feed a value for placeholder tensor 'cifar_conv_model/Placeholder' with dtype int32
[[Node: cifar_conv_model/Placeholder = Placeholder[dtype=DT_INT32, shape=[], _device="/job:localhost/replica:0/task:0/cpu:0"]()]]
[[Node: cifar_conv_model/Placeholder/_151 = _HostRecv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/gpu:0", send_device="/job:localhost/replica:0/task:0/cpu:0", send_device_incarnation=1, tensor_name="edge_407_cifar_conv_model/Placeholder", tensor_type=DT_INT32, _device="/job:localhost/replica:0/task:0/gpu:0"]()]]
Caused by op u'cifar_conv_model/Placeholder', defined at:
File "convnet_cifar.py", line 134, in <module>
is_train = tf.placeholder(tf.int32) # placeholder for whether to pull from train or val data
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/array_ops.py", line 743, in placeholder
name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/gen_array_ops.py", line 607, in _placeholder
name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/op_def_library.py", line 655, in apply_op
op_def=op_def)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 2040, in create_op
original_op=self._default_original_op, op_def=op_def)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 1087, in __init__
self._traceback = _extract_stack()
Any help would very much be appreciated!
A solution is mentionned here:
http://andyljones.tumblr.com/.
You have to change your placeholder is_train = tf.placeholder(tf.int32) for a tf.Variable:is_train = tf.Variable(True, name='training').
So you will be able to initialize through your call at sess.run(tf.initialize_all_variables()) before making a call too tf.train.start_queue_runners(sess=sess).

Categories