I have replicated a deep CNN from a research paper. When I originally constructed the model, I assumed that the batch size would be one. However, now that I have learned more about batch sizes, I want to use a batch size of 40.
Here is the Github Repository
This is a very deep network, so I will show a more basic version of the project below:
x = tf.placeholder(tf.float32, shape=[None, 7168])
y_ = tf.placeholder(tf.float32, shape=[None, 7168, 3])
#MANY CONVOLUTIONS OMITTED HERE
#one of many transpose convolutions, the 40 here is a change I made for the batch size
w = tf.Variable(tf.constant(1.,shape=[2,2,4,1,192]))
DeConnv1 = tf.nn.conv3d_transpose(layer1, filter = w, output_shape = [40,32,32,7,1], strides = [1,2,2,2,1], padding = 'SAME')
#I reshape the final convolution's batch size because I was getting errors
final = tf.reshape(final, [40, 7168])
#Accuracy and loss functions omitted because they do not deal with batch size
#Lastly, I train the model where a and be are size [40][7169][3] 40 is the batch size
train_step.run(feed_dict={x: a, y_: b, keep_prob: .5})
When I run the code from the repository, I get this error. What more changes do I need to make so that the batch size is 40?
Traceback (most recent call last):
File "<stdin>", line 31, in <module>
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 2042, in run
_run_using_default_session(self, feed_dict, self.graph, session)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 4490, in _run_using_default_session
session.run(operation, feed_dict)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 889, in run
run_metadata_ptr)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 1120, in _run
feed_dict_tensor, options, run_metadata)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 1317, in _do_run
options, run_metadata)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 1336, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InternalError: cuDNN Backward Data function launch failure : input shape([320,4,4,1,896]) filter shape([3,3,1,896,800])
[[Node: gradients/conv3d_2/Conv3D_grad/Conv3DBackpropInputV2 = Conv3DBackpropInputV2[T=DT_FLOAT, data_format="NDHWC", padding="VALID", strides=[1, 1, 1, 1, 1], _device="/job:localhost/replica:0/task:0/device:GPU:0"](gradients/conv3d_2/Conv3D_grad/Shape, conv3d_1/kernel/read, gradients/conv3d_2/BatchToSpaceND_grad/SpaceToBatchND)]]
Caused by op u'gradients/conv3d_2/Conv3D_grad/Conv3DBackpropInputV2', defined at:
File "<stdin>", line 1, in <module>
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/optimizer.py", line 343, in minimize
grad_loss=grad_loss)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/optimizer.py", line 414, in compute_gradients
colocate_gradients_with_ops=colocate_gradients_with_ops)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/gradients_impl.py", line 581, in gradients
grad_scope, op, func_call, lambda: grad_fn(op, *out_grads))
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/gradients_impl.py", line 353, in _MaybeCompile
return grad_fn() # Exit early
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/gradients_impl.py", line 581, in <lambda>
grad_scope, op, func_call, lambda: grad_fn(op, *out_grads))
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/nn_grad.py", line 82, in _Conv3DGrad
data_format=data_format),
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/gen_nn_ops.py", line 1084, in conv3d_backprop_input_v2
data_format=data_format, name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
op_def=op_def)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 2956, in create_op
op_def=op_def)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 1470, in __init__
self._traceback = self._graph._extract_stack() # pylint: disable=protected-access
...which was originally created as op u'conv3d_2/Conv3D', defined at:
File "<stdin>", line 2, in <module>
File "<stdin>", line 2, in conv3d_dilation
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/layers/convolutional.py", line 809, in conv3d
return layer.apply(inputs)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/layers/base.py", line 671, in apply
return self.__call__(inputs, *args, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/layers/base.py", line 575, in __call__
outputs = self.call(inputs, *args, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/layers/convolutional.py", line 167, in call
outputs = self._convolution_op(inputs, self.kernel)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/nn_ops.py", line 835, in __call__
return self.conv_op(inp, filter)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/nn_ops.py", line 499, in __call__
return self.call(inp, filter)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/nn_ops.py", line 492, in _with_space_to_batch_call
result = self.op(input_converted, filter)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/nn_ops.py", line 187, in __call__
name=self.name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/gen_nn_ops.py", line 847, in conv3d
padding=padding, data_format=data_format, name=name)
InternalError (see above for traceback): cuDNN Backward Data function launch failure : input shape([320,4,4,1,896]) filter shape([3,3,1,896,800])
[[Node: gradients/conv3d_2/Conv3D_grad/Conv3DBackpropInputV2 = Conv3DBackpropInputV2[T=DT_FLOAT, data_format="NDHWC", padding="VALID", strides=[1, 1, 1, 1, 1], _device="/job:localhost/replica:0/task:0/device:GPU:0"](gradients/conv3d_2/Conv3D_grad/Shape, conv3d_1/kernel/read, gradients/conv3d_2/BatchToSpaceND_grad/SpaceToBatchND)]]
Try this:
shape = tf.shape(tf.reshape(x, [-1, 32, 32, 7, 1]))
DeConnv1 = tf.nn.conv3d_transpose(layer1, filter=w, output_shape=shape, strides=[1,2,2,2,1], padding='SAME')
final = tf.reshape(final, [-1, 7168])
This way you don't hard-code 40 in the model, but able to feed any batch size you want, including 40.
Best practice is to avoid hard coding the batch size in the graph. As discussed here (see "How do I build a graph that works with variable batch sizes?") you should specify shapes as [None, nx, ny, nz], and retrieve batch sizes using tf.shape(input)[0]. Also, for reshaping you can use a form like this: tf.reshape(input, [-1, nx, ny, nz]) where the -1 specifies that the batch dimension should be set to the appropriate size during runtime.
Related
I want to use TensorFlow to solve a supervised problem. It is simple:
A list of strings inputs. The string has a format: "words_10-06-2017"
A list of floats outputs. The label is a float: 3.0
I want to use a string set with that format and a set of float labels to train a model.
System information
Windows 7 x64
Tensorflow 1.2
Python 3.5.2
IDE Pycharm 2017.1.1
I am using this piece of code:
# Input placeholder
x = tf.placeholder(tf.string, shape=[None, 1, 1])
...
# Define a lstm cell with tensorflow
lstm_cell = tf.nn.rnn_cell.LSTMCell(self.first_label_neurons, forget_bias=1)
# Get lstm cell output
outputs, states = tf.nn.dynamic_rnn(lstm_cell, x, dtype=tf.string)
dropout = tf.nn.dropout(outputs[-1], keep_probably)
# Linear activation, using rnn inner loop last output
y_prediction = tf.matmul(dropout, weights) + biases
The problem is in this line:
outputs, states = tf.nn.dynamic_rnn(lstm_cell, x, dtype=tf.string)
This is the output:
ValueError: An initializer for variable rnn/lstm_cell/kernel of is required
Could this be a TensorFlow problem?
Thanks for your attention
The all output:
File "D:\ComputerScience\workspaces\model.py", line 602, in rnn_lstm
outputs, states = tf.nn.dynamic_rnn(lstm_cell, x, dtype=tf.string)
File "C:\Python352\lib\site-packages\tensorflow\python\ops\rnn.py", line 574, in dynamic_rnn
dtype=dtype)
File "C:\Python352\lib\site-packages\tensorflow\python\ops\rnn.py", line 737, in _dynamic_rnn_loop
swap_memory=swap_memory)
File "C:\Python352\lib\site-packages\tensorflow\python\ops\control_flow_ops.py", line 2770, in while_loop
result = context.BuildLoop(cond, body, loop_vars, shape_invariants)
File "C:\Python352\lib\site-packages\tensorflow\python\ops\control_flow_ops.py", line 2599, in BuildLoop
pred, body, original_loop_vars, loop_vars, shape_invariants)
File "C:\Python352\lib\site-packages\tensorflow\python\ops\control_flow_ops.py", line 2549, in _BuildLoop
body_result = body(*packed_vars_for_body)
File "C:\Python352\lib\site-packages\tensorflow\python\ops\rnn.py", line 722, in _time_step
(output, new_state) = call_cell()
File "C:\Python352\lib\site-packages\tensorflow\python\ops\rnn.py", line 708, in <lambda>
call_cell = lambda: cell(input_t, state)
File "C:\Python352\lib\site-packages\tensorflow\python\ops\rnn_cell_impl.py", line 180, in __call__
return super(RNNCell, self).__call__(inputs, state)
File "C:\Python352\lib\site-packages\tensorflow\python\layers\base.py", line 441, in __call__
outputs = self.call(inputs, *args, **kwargs)
File "C:\Python352\lib\site-packages\tensorflow\python\ops\rnn_cell_impl.py", line 542, in call
lstm_matrix = _linear([inputs, m_prev], 4 * self._num_units, bias=True)
File "C:\Python352\lib\site-packages\tensorflow\python\ops\rnn_cell_impl.py", line 1017, in _linear
initializer=kernel_initializer)
File "C:\Python352\lib\site-packages\tensorflow\python\ops\variable_scope.py", line 1065, in get_variable
use_resource=use_resource, custom_getter=custom_getter)
File "C:\Python352\lib\site-packages\tensorflow\python\ops\variable_scope.py", line 962, in get_variable
use_resource=use_resource, custom_getter=custom_getter)
File "C:\Python352\lib\site-packages\tensorflow\python\ops\variable_scope.py", line 360, in get_variable
validate_shape=validate_shape, use_resource=use_resource)
File "C:\Python352\lib\site-packages\tensorflow\python\ops\rnn_cell_impl.py", line 183, in _rnn_get_variable
variable = getter(*args, **kwargs)
File "C:\Python352\lib\site-packages\tensorflow\python\ops\variable_scope.py", line 352, in _true_getter
use_resource=use_resource)
File "C:\Python352\lib\site-packages\tensorflow\python\ops\variable_scope.py", line 690, in _get_single_variable
name=name, shape=shape, dtype=dtype)
File "C:\Python352\lib\site-packages\tensorflow\python\ops\variable_scope.py", line 770, in _get_default_initializer
% (name, dtype.base_dtype))
ValueError: An initializer for variable rnn/lstm_cell/kernel of <dtype: 'string'> is required
Try to add a word to vector embedding for your inputs. Basically tensor flow can deal with only int and float inputs as of now. So make a vector representation of your input word sequences
tf.contrib.crf doesn't seem to support sequences of length 1.
For example, if I run the example on https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/crf (mirror) and replace num_words = 20 by num_words = 1:
import numpy as np
import tensorflow as tf
# Data settings.
num_examples = 10
num_words = 1
num_features = 100
num_tags = 5
# Random features.
x = np.random.rand(num_examples, num_words, num_features).astype(np.float32)
# Random tag indices representing the gold sequence.
y = np.random.randint(num_tags, size=[num_examples, num_words]).astype(np.int32)
# All sequences in this example have the same length, but they can be variable in a real model.
sequence_lengths = np.full(num_examples, num_words - 1, dtype=np.int32)
# Train and evaluate the model.
with tf.Graph().as_default():
with tf.Session() as session:
# Add the data to the TensorFlow graph.
x_t = tf.constant(x)
y_t = tf.constant(y)
sequence_lengths_t = tf.constant(sequence_lengths)
# Compute unary scores from a linear layer.
weights = tf.get_variable("weights", [num_features, num_tags])
matricized_x_t = tf.reshape(x_t, [-1, num_features])
matricized_unary_scores = tf.matmul(matricized_x_t, weights)
unary_scores = tf.reshape(matricized_unary_scores,
[num_examples, num_words, num_tags])
# Compute the log-likelihood of the gold sequences and keep the transition
# params for inference at test time.
log_likelihood, transition_params = tf.contrib.crf.crf_log_likelihood(
unary_scores, y_t, sequence_lengths_t)
# Add a training op to tune the parameters.
loss = tf.reduce_mean(-log_likelihood)
train_op = tf.train.GradientDescentOptimizer(0.01).minimize(loss)
# Train for a fixed number of iterations.
session.run(tf.global_variables_initializer())
for i in range(1000):
tf_unary_scores, tf_transition_params, _ = session.run(
[unary_scores, transition_params, train_op])
if i % 100 == 0:
correct_labels = 0
total_labels = 0
for tf_unary_scores_, y_, sequence_length_ in zip(tf_unary_scores, y,
sequence_lengths):
# Remove padding from the scores and tag sequence.
tf_unary_scores_ = tf_unary_scores_[:sequence_length_]
y_ = y_[:sequence_length_]
# Compute the highest scoring sequence.
viterbi_sequence, _ = tf.contrib.crf.viterbi_decode(
tf_unary_scores_, tf_transition_params)
# Evaluate word-level accuracy.
correct_labels += np.sum(np.equal(viterbi_sequence, y_))
total_labels += sequence_length_
accuracy = 100.0 * correct_labels / float(total_labels)
print("Accuracy: %.2f%%" % accuracy)
I get the error message:
Traceback (most recent call last):
File "C:\Anaconda\envs\py35\lib\site-packages\tensorflow\python\client\session.py", line 1022, in _do_call
return fn(*args)
File "C:\Anaconda\envs\py35\lib\site-packages\tensorflow\python\client\session.py", line 1004, in _run_fn
status, run_metadata)
File "C:\Anaconda\envs\py35\lib\contextlib.py", line 66, in __exit__
next(self.gen)
File "C:\Anaconda\envs\py35\lib\site-packages\tensorflow\python\framework\errors_impl.py", line 469, in raise_exception_on_not_ok_status
pywrap_tensorflow.TF_GetCode(status))
tensorflow.python.framework.errors_impl.UnimplementedError: TensorArray has size zero, but element shape <unknown> is not fully defined. Currently only static shapes are supported when packing zero-size TensorArrays.
[[Node: gradients/rnn/TensorArrayUnstack/TensorArrayScatter/TensorArrayScatterV3_grad/TensorArrayGatherV3 = TensorArrayGatherV3[_class=["loc:#rnn/TensorArray_1"], dtype=DT_FLOAT, element_shape=<unknown>, _device="/job:localhost/replica:0/task:0/cpu:0"](gradients/rnn/TensorArrayUnstack/TensorArrayScatter/TensorArrayScatterV3_grad/TensorArrayGrad/TensorArrayGradV3, rnn/TensorArrayUnstack/range, gradients/rnn/TensorArrayUnstack/TensorArrayScatter/TensorArrayScatterV3_grad/TensorArrayGrad/gradient_flow)]]
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Users\Francky\Documents\GitHub\nlp\neurodeid\test\CRF_v2.py", line 47, in <module>
[unary_scores, transition_params, train_op])
File "C:\Anaconda\envs\py35\lib\site-packages\tensorflow\python\client\session.py", line 767, in run
run_metadata_ptr)
File "C:\Anaconda\envs\py35\lib\site-packages\tensorflow\python\client\session.py", line 965, in _run
feed_dict_string, options, run_metadata)
File "C:\Anaconda\envs\py35\lib\site-packages\tensorflow\python\client\session.py", line 1015, in _do_run
target_list, options, run_metadata)
File "C:\Anaconda\envs\py35\lib\site-packages\tensorflow\python\client\session.py", line 1035, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.UnimplementedError: TensorArray has size zero, but element shape <unknown> is not fully defined. Currently only static shapes are supported when packing zero-size TensorArrays.
[[Node: gradients/rnn/TensorArrayUnstack/TensorArrayScatter/TensorArrayScatterV3_grad/TensorArrayGatherV3 = TensorArrayGatherV3[_class=["loc:#rnn/TensorArray_1"], dtype=DT_FLOAT, element_shape=<unknown>, _device="/job:localhost/replica:0/task:0/cpu:0"](gradients/rnn/TensorArrayUnstack/TensorArrayScatter/TensorArrayScatterV3_grad/TensorArrayGrad/TensorArrayGradV3, rnn/TensorArrayUnstack/range, gradients/rnn/TensorArrayUnstack/TensorArrayScatter/TensorArrayScatterV3_grad/TensorArrayGrad/gradient_flow)]]
Caused by op 'gradients/rnn/TensorArrayUnstack/TensorArrayScatter/TensorArrayScatterV3_grad/TensorArrayGatherV3', defined at:
File "C:\Users\Francky\Documents\GitHub\nlp\neurodeid\test\CRF_v2.py", line 41, in <module>
train_op = tf.train.GradientDescentOptimizer(0.01).minimize(loss)
File "C:\Anaconda\envs\py35\lib\site-packages\tensorflow\python\training\optimizer.py", line 288, in minimize
grad_loss=grad_loss)
File "C:\Anaconda\envs\py35\lib\site-packages\tensorflow\python\training\optimizer.py", line 354, in compute_gradients
colocate_gradients_with_ops=colocate_gradients_with_ops)
File "C:\Anaconda\envs\py35\lib\site-packages\tensorflow\python\ops\gradients_impl.py", line 482, in gradients
in_grads = grad_fn(op, *out_grads)
File "C:\Anaconda\envs\py35\lib\site-packages\tensorflow\python\ops\tensor_array_grad.py", line 186, in _TensorArrayScatterGrad
grad = g.gather(indices)
File "C:\Anaconda\envs\py35\lib\site-packages\tensorflow\python\ops\tensor_array_ops.py", line 348, in gather
element_shape=element_shape)
File "C:\Anaconda\envs\py35\lib\site-packages\tensorflow\python\ops\gen_data_flow_ops.py", line 2226, in _tensor_array_gather_v3
element_shape=element_shape, name=name)
File "C:\Anaconda\envs\py35\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 763, in apply_op
op_def=op_def)
File "C:\Anaconda\envs\py35\lib\site-packages\tensorflow\python\framework\ops.py", line 2395, in create_op
original_op=self._default_original_op, op_def=op_def)
File "C:\Anaconda\envs\py35\lib\site-packages\tensorflow\python\framework\ops.py", line 1264, in __init__
self._traceback = _extract_stack()
...which was originally created as op 'rnn/TensorArrayUnstack/TensorArrayScatter/TensorArrayScatterV3', defined at:
File "C:\Users\Francky\Documents\GitHub\nlp\neurodeid\test\CRF_v2.py", line 37, in <module>
unary_scores, y_t, sequence_lengths_t)
File "C:\Anaconda\envs\py35\lib\site-packages\tensorflow\contrib\crf\python\ops\crf.py", line 156, in crf_log_likelihood
log_norm = crf_log_norm(inputs, sequence_lengths, transition_params)
File "C:\Anaconda\envs\py35\lib\site-packages\tensorflow\contrib\crf\python\ops\crf.py", line 123, in crf_log_norm
dtype=dtypes.float32)
File "C:\Anaconda\envs\py35\lib\site-packages\tensorflow\python\ops\rnn.py", line 545, in dynamic_rnn
dtype=dtype)
File "C:\Anaconda\envs\py35\lib\site-packages\tensorflow\python\ops\rnn.py", line 663, in _dynamic_rnn_loop
for ta, input_ in zip(input_ta, flat_input))
File "C:\Anaconda\envs\py35\lib\site-packages\tensorflow\python\ops\rnn.py", line 663, in <genexpr>
for ta, input_ in zip(input_ta, flat_input))
File "C:\Anaconda\envs\py35\lib\site-packages\tensorflow\python\ops\tensor_array_ops.py", line 400, in unstack
indices=math_ops.range(0, num_elements), value=value, name=name)
File "C:\Anaconda\envs\py35\lib\site-packages\tensorflow\python\ops\tensor_array_ops.py", line 428, in scatter
name=name)
File "C:\Anaconda\envs\py35\lib\site-packages\tensorflow\python\ops\gen_data_flow_ops.py", line 2492, in _tensor_array_scatter_v3
name=name)
File "C:\Anaconda\envs\py35\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 763, in apply_op
op_def=op_def)
UnimplementedError (see above for traceback): TensorArray has size zero, but element shape <unknown> is not fully defined. Currently only static shapes are supported when packing zero-size TensorArrays.
[[Node: gradients/rnn/TensorArrayUnstack/TensorArrayScatter/TensorArrayScatterV3_grad/TensorArrayGatherV3 = TensorArrayGatherV3[_class=["loc:#rnn/TensorArray_1"], dtype=DT_FLOAT, element_shape=<unknown>, _device="/job:localhost/replica:0/task:0/cpu:0"](gradients/rnn/TensorArrayUnstack/TensorArrayScatter/TensorArrayScatterV3_grad/TensorArrayGrad/TensorArrayGradV3, rnn/TensorArrayUnstack/range, gradients/rnn/TensorArrayUnstack/TensorArrayScatter/TensorArrayScatterV3_grad/TensorArrayGrad/gradient_flow)]]
num_words = 2 or num_words = 5 work. I see on
https://github.com/tensorflow/tensorflow/blob/e121667dc609de978a223c56ee906368d2c4ceef/tensorflow/contrib/crf/python/ops/crf.py#L121 (mirror) is already decrementing the sequence_length by 1:
# Compute the alpha values in the forward algorithm in order to get the
# partition function.
forward_cell = CrfForwardRnnCell(transition_params)
_, alphas = rnn.dynamic_rnn(
cell=forward_cell,
inputs=rest_of_input,
sequence_length=sequence_lengths - 1,
initial_state=first_input,
dtype=dtypes.float32)
log_norm = math_ops.reduce_logsumexp(alphas, [1])
return log_norm
However, changing sequence_lengths = np.full(num_examples, num_words - 1, dtype=np.int32) to sequence_lengths = np.full(num_examples, num_words, dtype=np.int32) does not solve the issue when num_words = 1 .
How to fix this issue so that the CRF layer supports sequences of length 1?
Tested with TensorFlow 1.0.0 on Windows 7 SP1 x64 Ultimate and TensorFlow-GPU 1.0.0 on Ubuntu 14.04.4 LTS x64. I created an issue in the TensorFlow repository but it got closed due to inactivity: https://github.com/tensorflow/tensorflow/issues/7751 (mirror)
My graph looks like this
with graph.as_default():
train_inputs = tf.placeholder(tf.int32, shape=[None, None])
with tf.device('/cpu:0'):
embeddings = tf.Variable(tf.zeros([vocab_size, options.embed_size]))
restorer = tf.train.Saver({'embeddings': embeddings})
init = tf.variables_initializer([embeddings])
uninit = tf.report_uninitialized_variables()
embed = tf.nn.embedding_lookup(embeddings, train_inputs)
# length() returns a [batch_szie,] tensor of true lengths of sentences (lengths before zero-padding)
sequence_length = length(embed)
lstm = tf.nn.rnn_cell.LSTMCell(options.rnn_size)
output, _ = tf.nn.dynamic_rnn(
lstm,
embed,
dtype=tf.float32,
swequence_length=sequence_length
)
And my session:
with tf.Session(graph=graph) as session:
restorer.restore(session, options.restore_path)
# tf.global_variables_initializer.run()
init.run()
print session.run([uninit])
while len(data.ids):
# data.generate_batch returns a list of size [batch_size, max_length], and zero-padding is used, when the sentences are shorter than max_length. For example, batch_inputs = [[1,2,3,4], [3,2,1,0], [1,2,0,0]]
batch_inputs, _ = data.generate_batch(options.batch_size)
feed_dict = {train_inputs: batch_inputs}
test = session.run([tf.shape(output)], feed_dict=feed_dict)
print test
Function length():
def length(self, sequence):
length = tf.sign(sequence)
length = tf.reduce_sum(length, reduction_indices=1)
length = tf.cast(length, tf.int32)
return length
The error i got:
Traceback (most recent call last):
File "rnn.py", line 103, in <module>
test = session.run([tf.shape(output)], feed_dict=feed_dict)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 766, in run
run_metadata_ptr)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 964, in _run
feed_dict_string, options, run_metadata)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 1014, in _do_run
target_list, options, run_metadata)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 1034, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.FailedPreconditionError: Attempting to use uninitialized value RNN/LSTMCell/W_0
[[Node: RNN/LSTMCell/W_0/read = Identity[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"](RNN/LSTMCell/W_0)]]
Caused by op u'RNN/LSTMCell/W_0/read', defined at:
File "rnn.py", line 75, in <module>
sequence_length=sequence_length,
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/rnn.py", line 845, in dynamic_rnn
dtype=dtype)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/rnn.py", line 1012, in _dynamic_rnn_loop
swap_memory=swap_memory)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/control_flow_ops.py", line 2636, in while_loop
result = context.BuildLoop(cond, body, loop_vars, shape_invariants)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/control_flow_ops.py", line 2469, in BuildLoop
pred, body, original_loop_vars, loop_vars, shape_invariants)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/control_flow_ops.py", line 2419, in _BuildLoop
body_result = body(*packed_vars_for_body)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/rnn.py", line 995, in _time_step
skip_conditionals=True)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/rnn.py", line 403, in _rnn_step
new_output, new_state = call_cell()
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/rnn.py", line 983, in <lambda>
call_cell = lambda: cell(input_t, state)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/rnn_cell.py", line 496, in __call__
dtype, self._num_unit_shards)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/rnn_cell.py", line 329, in _get_concat_variable
sharded_variable = _get_sharded_variable(name, shape, dtype, num_shards)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/rnn_cell.py", line 359, in _get_sharded_variable
dtype=dtype))
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/variable_scope.py", line 1024, in get_variable
custom_getter=custom_getter)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/variable_scope.py", line 850, in get_variable
custom_getter=custom_getter)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/variable_scope.py", line 346, in get_variable
validate_shape=validate_shape)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/variable_scope.py", line 331, in _true_getter
caching_device=caching_device, validate_shape=validate_shape)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/variable_scope.py", line 677, in _get_single_variable
expected_shape=shape)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/variables.py", line 224, in __init__
expected_shape=expected_shape)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/variables.py", line 367, in _init_from_args
self._snapshot = array_ops.identity(self._variable, name="read")
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/gen_array_ops.py", line 1424, in identity
result = _op_def_lib.apply_op("Identity", input=input, name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/op_def_library.py", line 759, in apply_op
op_def=op_def)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 2240, in create_op
original_op=self._default_original_op, op_def=op_def)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 1128, in __init__
self._traceback = _extract_stack()
FailedPreconditionError (see above for traceback): Attempting to use uninitialized value RNN/LSTMCell/W_0
[[Node: RNN/LSTMCell/W_0/read = Identity[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"](RNN/LSTMCell/W_0)]]
However when I printed out the uninitialized variables, i got [array([], dtype=object)]
When i replaced init.run() with tf.global_variables_initializer.run(), it worked.
Any idea why init.run() doesn't work?
You defined init as follows:
init = tf.variables_initializer([embeddings])
This definition means that init initializes only the embeddings variable. Calling the tf.nn.dynamic_rnn() function creates more variables, representing the various internal weights in the LSTM, and these are not initialized by init.
By contrast, tf.global_variables_initializer() returns an operation that, when run, will initialize all of the (global) variables in your model, including those created for the LSTM.
I'm kind of new to working with TensorFlow and my problem may be easy to solve, at least I hope so.
I'm trying to work with LSTMCell to predict the next label in a sequence.
Here is the code I'm using :
import tensorflow as tf
max_sequence_length = 1000
vector_length = 1
number_of_classes = 1000
batch_size = 50
num_hidden = 24
# Define graph
data = tf.placeholder(tf.int64, [None, max_sequence_length, vector_length])
# 0 must be a free class so that the mask can work
target = tf.placeholder(tf.int64, [None, max_sequence_length, number_of_classes + 1])
labels = tf.argmax(target, 2)
cell = tf.nn.rnn_cell.LSTMCell(num_hidden, state_is_tuple=True)
Then I try to get the real length of each sequence in the batch
no_of_batches = tf.shape(data)[0]
sequence_lengths = tf.zeros([batch_size])
for i in xrange(max_sequence_length):
data_at_t = tf.squeeze(tf.slice(data, [0,i,0],[-1,1,-1]))
t = tf.scalar_mul(i, tf.ones([batch_size]))
boolean = tf.not_equal(data_at_t, tf.zeros([no_of_batches, batch_size], dtype = tf.int64))
sequence_lengths = tf.select(boolean, t, sequence_lengths)
And finally I try to call tf.nn.dynamic_rnn :
outputs, state = tf.nn.dynamic_rnn(
cell = cell,
inputs = data,
sequence_length = max_sequence_length,
dtype = tf.float64
)
Then, I get a TypeError:
Traceback (most recent call last):
File "<stdin>", line 5, in <module>
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/rnn.py", line 830, in dynamic_rnn
dtype=dtype)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/rnn.py", line 997, in _dynamic_rnn_loop
swap_memory=swap_memory)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/control_flow_ops.py", line 1973, in while_loop
result = context.BuildLoop(cond, body, loop_vars)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/control_flow_ops.py", line 1860, in BuildLoop
pred, body, original_loop_vars, loop_vars)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/control_flow_ops.py", line 1810, in _BuildLoop
body_result = body(*packed_vars_for_body)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/rnn.py", line 980, in _time_step
skip_conditionals=True)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/rnn.py", line 394, in _rnn_step
new_output, new_state = call_cell()
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/rnn.py", line 968, in <lambda>
call_cell = lambda: cell(input_t, state)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/rnn_cell.py", line 489, in __call__
dtype, self._num_unit_shards)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/rnn_cell.py", line 323, in _get_concat_variable
sharded_variable = _get_sharded_variable(name, shape, dtype, num_shards)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/rnn_cell.py", line 353, in _get_sharded_variable
dtype=dtype))
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/variable_scope.py", line 830, in get_variable
custom_getter=custom_getter)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/variable_scope.py", line 673, in get_variable
custom_getter=custom_getter)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/variable_scope.py", line 217, in get_variable
validate_shape=validate_shape)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/variable_scope.py", line 202, in _true_getter
caching_device=caching_device, validate_shape=validate_shape)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/variable_scope.py", line 536, in _get_single_variable
validate_shape=validate_shape)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/variables.py", line 211, in __init__
dtype=dtype)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/variables.py", line 281, in _init_from_args
self._initial_value = ops.convert_to_tensor(initial_value(),
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/variable_scope.py", line 526, in <lambda>
init_val = lambda: initializer(shape.as_list(), dtype=dtype)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/init_ops.py", line 210, in _initializer
dtype, seed=seed)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/random_ops.py", line 235, in random_uniform
minval = ops.convert_to_tensor(minval, dtype=dtype, name="min")
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 621, in convert_to_tensor
ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/constant_op.py", line 180, in _constant_tensor_conversion_function
return constant(v, dtype=dtype, name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/constant_op.py", line 163, in constant
tensor_util.make_tensor_proto(value, dtype=dtype, shape=shape))
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/tensor_util.py", line 353, in make_tensor_proto
_AssertCompatible(values, dtype)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/tensor_util.py", line 290, in _AssertCompatible
(dtype.name, repr(mismatch), type(mismatch).__name__))
TypeError: Expected int64, got -0.34641016151377546 of type 'float' instead.
I don't understand where this float comes from as all other values in the script are integers. How can I solve this problem ?
1) The RNN cell state is tf.float64. You set this explicitly within the tf.nn.dynamic_rnn call (dtype). The tensorflow engine then initialized the states with RNN's default random_uniform initializer. This is why you have the -0.34 float value there.
I'm not sure what you wanted to achieve. Please refer to https://www.tensorflow.org/versions/r0.11/api_docs/python/nn.html#dynamic_rnn
2) The sequence_length = max_sequence_length must be an int32/int64 vector sized [batch_size] instead of scalar 1000
3) You may want to initialize the LSTMCell state too:
cell = tf.nn.rnn_cell.LSTMCell(num_hidden, state_is_tuple=True),
initializer=tf.constant_initializer(value=0, dtype=tf.int32))
i have a similar problem with my code, i fixed it by replacing the int32 at the placeholder as float, check if that works
Hi I'm running the Linux CPU version of tensorflow on Ubuntu 14.04 and I'm running out of memory when I try to save my model. I'm using the tutorial for Deep MNIST that builds a convolution network. You can find it here:
https://www.tensorflow.org/versions/r0.9/tutorials/mnist/pros/index.html#deep-mnist-for-experts
I changed a couple of things and tried to add a Saver to export the model weights. However when I run it I get an error that says I am out of memory. Which doesn't make sense to me because it can train the data forever but saving it somehow uses too much memory?
Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
step 0, training accuracy 0.06
W tensorflow/core/framework/op_kernel.cc:909] Resource exhausted: OOM when allocating tensor with shape[10000,28,28,32]
Traceback (most recent call last):
File "/home/mgump/Lambda_Project/MNIST_TRAINER.py", line 66, in <module>
x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0}))
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 555, in eval
return _eval_using_default_session(self, feed_dict, self.graph, session)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 3498, in _eval_using_default_session
return session.run(tensors, feed_dict)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 372, in run
run_metadata_ptr)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 636, in _run
feed_dict_string, options, run_metadata)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 708, in _do_run
target_list, options, run_metadata)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 728, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors.ResourceExhaustedError: OOM when allocating tensor with shape[10000,28,28,32]
[[Node: Conv2D = Conv2D[T=DT_FLOAT, data_format="NHWC", padding="SAME", strides=[1, 1, 1, 1], use_cudnn_on_gpu=true, _device="/job:localhost/replica:0/task:0/cpu:0"](Reshape, Variable/read)]]
Caused by op u'Conv2D', defined at:
File "/home/mgump/Lambda_Project/MNIST_TRAINER.py", line 28, in <module>
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
File "/home/mgump/Lambda_Project/MNIST_TRAINER.py", line 18, in conv2d
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
File "/usr/local/lib/python2.7/dist- packages/tensorflow/python/ops/gen_nn_ops.py", line 394, in conv2d
data_format=data_format, name=name)
File "/usr/local/lib/python2.7/dist- packages/tensorflow/python/ops/op_def_library.py", line 704, in apply_op
op_def=op_def)
File "/usr/local/lib/python2.7/dist- packages/tensorflow/python/framework/ops.py", line 2260, in create_op
original_op=self._default_original_op, op_def=op_def)
File "/usr/local/lib/python2.7/dist- packages/tensorflow/python/framework/ops.py", line 1230, in __init__
self._traceback = _extract_stack()`
This is what it outputs when I run it thanks so much!