Use two different LSTM cell in Tensorflow - python

I am building a neural machine translator, and I have to use two different LSTM cells (one for the encoder, and one for the decode).
The two cells have differents shapes:
the encoder (first one) is fed with the token of the input sentence and produces a state vector
the decoder (second one) is fed with the previous state vector, and the tokens generated by itself
I writed this in Tensorflow, and when I run the script, I got the following error (raised during the decoder phase):
outputs, states = tf.nn.rnn(cell_backward, inputs, initial_state=initial_state)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/rnn.py", line 158, in rnn
(output, state) = call_cell()
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/rnn.py", line 145, in <lambda>
call_cell = lambda: cell(input_, state)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/rnn_cell.py", line 520, in __call__
dtype, self._num_unit_shards)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/rnn_cell.py", line 357, in _get_concat_variable
sharded_variable = _get_sharded_variable(name, shape, dtype, num_shards)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/rnn_cell.py", line 387, in _get_sharded_variable
dtype=dtype))
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/variable_scope.py", line 732, in get_variable
partitioner=partitioner, validate_shape=validate_shape)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/variable_scope.py", line 596, in get_variable
partitioner=partitioner, validate_shape=validate_shape)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/variable_scope.py", line 161, in get_variable
caching_device=caching_device, validate_shape=validate_shape)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/variable_scope.py", line 437, in _get_single_variable
name, "".join(traceback.format_list(tb))))
ValueError: Variable backward/RNN/LSTMCell/W_0 already exists, disallowed. Did you mean to set reuse=True in VarScope? Originally defined at:
File "/home/alexis/Documents/NMT/NMT.py", line 88, in dense_to_vector_state
outputs, states = tf.nn.rnn(cell_forward, inputs, initial_state=initial_state)
How can I explicitly specify that I want to create a totally new LSTM cell ?
Thanks in advance !
Alexis

Use variable scopes
with tf.variable_scope('enc'):
cell_enc = LSTMCell(hidden_size)
with tf.variable_scope('dec'):
cell_dec = LSTMCell(hidden_size)

I am trying to do machine translation. Here is my encoder and decoder. You just need to use different variable scopes for each rnn. Rather than using the MultiRNNCell cell for the encoder I unroll each layer manually which lets me alternate directions between layers. See how each layer gets its own scope.
with tf.variable_scope('encoder'):
rnn_cell = tf.nn.rnn_cell.LSTMCell(512, num_proj = 256, state_is_tuple = True)
for level in range(3):
with tf.variable_scope('level_%d' % level) as scope:
state = [tf.zeros((BATCH_SIZE, sz)) for sz in rnn_cell.state_size]
for t in range(TIME_STEPS) if level % 2 else reversed(range(TIME_STEPS)):
y[t], state = rnn_cell(y[t], state)
scope.reuse_variables()
with tf.variable_scope('decoder') as scope:
rnn_cell = tf.nn.rnn_cell.MultiRNNCell \
([
tf.nn.rnn_cell.LSTMCell(512, num_proj = 256, state_is_tuple = True),
tf.nn.rnn_cell.LSTMCell(512, num_proj = WORD_VEC_SIZE, state_is_tuple = True)
], state_is_tuple = True)
state = [[tf.zeros((BATCH_SIZE, sz)) for sz in sz_outer] for sz_outer in rnn_cell.state_size]
W_soft = tf.get_variable('W_soft', shape = (NWORDS, WORD_VEC_SIZE), initializer = tf.truncated_normal_initializer(0.0, 1 / np.sqrt(WORD_VEC_SIZE)))
b_soft = tf.get_variable('b_soft', shape = (NWORDS,), initializer = tf.truncated_normal_initializer(0.0, 0.01))
cost = 0
output = [None] * TIME_STEPS
for t in range(TIME_STEPS):
if t:
last = y_[t - 1] if TRAINING else y[t - 1]
else:
last = tf.zeros((BATCH_SIZE, WORD_VEC_SIZE))
y[t] = tf.concat(1, (y[t], last))
y[t], state = rnn_cell(y[t], state)
cost += tf.reduce_mean(tf.nn.sampled_softmax_loss(W_soft, b_soft, y[t], target_output[:, t : t + 1], 1000, NWORDS))
output[t] = tf.reshape(tf.nn.softmax(tf.matmul(y[t], W_soft, transpose_b = True) + b_soft), (BATCH_SIZE, 1, NWORDS))
scope.reuse_variables()
output = tf.concat(1, output)
cost /= TIME_STEPS

Related

Perceptual loss for image sequences in Keras

I want to implement perceptual loss for sequential image data of the shape [batch_size, sequence_length, height, width, channels]
The predictions of my model also have the same shape as the input.
My problem is, that I'm not able to properly feed in my predictions to the VGG16 in order to calculate the loss.
Firstly, I build my vgg as follows:
def build_vgg_model(self, weights="imagenet"):
# Input image to extract features from
img = Input(shape=(self.img_rows, self.img_cols, 3))
# Mean center and rescale by variance as in PyTorch
processed = Lambda(lambda x: (x - self.mean) / self.std)(img)
# If inference only, just return empty model
if self.inference_only:
model = Model(inputs=img, outputs=[img for _ in range(len(self.vgg_layers))])
model.trainable = False
model.compile(loss='mse', optimizer='adam')
return model
# Get the vgg network from Keras applications
if weights in ['imagenet', None]:
vgg = VGG16(weights=weights, include_top=False)
# model = Model(inputs=base_model.input, outputs=base_model.get_layer('block4_pool').output)
else:
vgg = VGG16(weights=None, include_top=False)
vgg.load_weights(weights, by_name=True)
# Output the first three pooling layers
vgg.outputs = [vgg.layers[i].output for i in self.vgg_layers]
# Create model and compile
model = Model(inputs=img, outputs=vgg(processed))
model.trainable = False
return model
Then I define my loss function:
def total_loss(self, mask):
def loss(y_true, y_pred):
# Compute predicted image with non-hole pixels set to ground truth
y_comp = mask * y_true + (1 - mask) * y_pred
# Compute the vgg features.
if self.vgg_device:
with tf.device(self.vgg_device):
vgg_out = self.vgg(y_pred)
vgg_gt = self.vgg(y_true)
vgg_comp = self.vgg(y_comp)
else:
vgg_out = self.vgg(y_pred)
vgg_gt = self.vgg(y_true)
vgg_comp = self.vgg(y_comp)
# Compute loss components
l1 = self.loss_valid(mask, y_true, y_pred)
l2 = self.loss_hole(mask, y_true, y_pred)
l3 = self.loss_perceptual(vgg_out, vgg_gt, vgg_comp)
# Return loss function
return l1 + 6 * l2 + 0.05 * l3
return loss
def loss_perceptual(self, vgg_out, vgg_gt, vgg_comp):
loss = 0
for o, c, g in zip(vgg_out, vgg_comp, vgg_gt):
loss += self.l1(o, g) + self.l1(c, g)
return loss
If I now run my code, I get the following error:
Traceback (most recent call last):
File "convlstm_main.py", line 51, in <module>
model = ConvLSTM(64, 64, 3, 5)
File "/mnt/workspace/lm78463/RecurrentDiFoRem/src/model/conv_lstm.py", line 40, in __init__
self.model = self.compile()
File "/mnt/workspace/lm78463/RecurrentDiFoRem/src/model/conv_lstm.py", line 116, in compile
self.model.compile(loss=self.loss_total, optimizer='adadelta')
File "/usr/local/lib/python3.6/dist-packages/keras/engine/training.py", line 229, in compile
self.total_loss = self._prepare_total_loss(masks)
File "/usr/local/lib/python3.6/dist-packages/keras/engine/training.py", line 692, in _prepare_total_loss
y_true, y_pred, sample_weight=sample_weight)
File "/usr/local/lib/python3.6/dist-packages/keras/losses.py", line 71, in __call__
losses = self.call(y_true, y_pred)
File "/usr/local/lib/python3.6/dist-packages/keras/losses.py", line 132, in call
return self.fn(y_true, y_pred, **self._fn_kwargs)
File "/mnt/workspace/lm78463/RecurrentDiFoRem/src/model/conv_lstm.py", line 180, in loss_total
vgg_gts = self.vgg(y_trues)
File "/usr/local/lib/python3.6/dist-packages/keras/engine/base_layer.py", line 489, in __call__
output = self.call(inputs, **kwargs)
File "/usr/local/lib/python3.6/dist-packages/keras/engine/network.py", line 583, in call
output_tensors, _, _ = self.run_internal_graph(inputs, masks)
File "/usr/local/lib/python3.6/dist-packages/keras/engine/network.py", line 740, in run_internal_graph
layer.call(computed_tensor, **kwargs))
File "/usr/local/lib/python3.6/dist-packages/keras/engine/network.py", line 583, in call
output_tensors, _, _ = self.run_internal_graph(inputs, masks)
File "/usr/local/lib/python3.6/dist-packages/keras/engine/network.py", line 740, in run_internal_graph
layer.call(computed_tensor, **kwargs))
File "/usr/local/lib/python3.6/dist-packages/keras/layers/convolutional.py", line 171, in call
dilation_rate=self.dilation_rate)
File "/usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py", line 3717, in conv2d
**kwargs)
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/nn_ops.py", line 917, in convolution
name=name)
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/nn_ops.py", line 979, in convolution_internal
strides = _get_sequence(strides, n, channel_index, "strides")
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/nn_ops.py", line 74, in _get_sequence
name, n, n + 2, current_n))
ValueError: strides should be of length 1, 3 or 5 but was 2

How to freeze a keras model with custom keras layers(.h5) to tensorflow graph(.pb)?

I am trying to implement a Faster-RCNN model for object detection written by Yinghan Xu. After I have trained and saved the model with model_all.save('filename.h5'), I am trying to freeze the Keras model as TensorFlow graph (as .pb) for inference using keras_to_tensorflow.py written by Amir Abdi. But when I try to convert it, I get a ValueError: Unknown layer: roipoolingconv due to a custom RoiPoolingConv layer:
class RoiPoolingConv(Layer):
'''ROI pooling layer for 2D inputs.
See Spatial Pyramid Pooling in Deep Convolutional Networks for Visual Recognition,
K. He, X. Zhang, S. Ren, J. Sun
# Arguments
pool_size: int
Size of pooling region to use. pool_size = 7 will result in a 7x7 region.
num_rois: number of regions of interest to be used
# Input shape
list of two 4D tensors [X_img,X_roi] with shape:
X_img:
`(1, rows, cols, channels)`
X_roi:
`(1,num_rois,4)` list of rois, with ordering (x,y,w,h)
# Output shape
3D tensor with shape:
`(1, num_rois, channels, pool_size, pool_size)`
'''
def __init__(self, pool_size, num_rois, **kwargs):
self.dim_ordering = K.image_dim_ordering()
self.pool_size = pool_size
self.num_rois = num_rois
super(RoiPoolingConv, self).__init__(**kwargs)
def build(self, input_shape):
self.nb_channels = input_shape[0][3]
def compute_output_shape(self, input_shape):
return None, self.num_rois, self.pool_size, self.pool_size, self.nb_channels
def call(self, x, mask=None):
assert(len(x) == 2)
# x[0] is image with shape (rows, cols, channels)
img = x[0]
# x[1] is roi with shape (num_rois,4) with ordering (x,y,w,h)
rois = x[1]
input_shape = K.shape(img)
outputs = []
for roi_idx in range(self.num_rois):
x = rois[0, roi_idx, 0]
y = rois[0, roi_idx, 1]
w = rois[0, roi_idx, 2]
h = rois[0, roi_idx, 3]
x = K.cast(x, 'int32')
y = K.cast(y, 'int32')
w = K.cast(w, 'int32')
h = K.cast(h, 'int32')
# Resized roi of the image to pooling size (7x7)
rs = tf.image.resize_images(img[:, y:y+h, x:x+w, :], (self.pool_size, self.pool_size))
outputs.append(rs)
final_output = K.concatenate(outputs, axis=0)
# Reshape to (1, num_rois, pool_size, pool_size, nb_channels)
# Might be (1, 4, 7, 7, 3)
final_output = K.reshape(final_output, (1, self.num_rois, self.pool_size, self.pool_size, self.nb_channels))
# permute_dimensions is similar to transpose
final_output = K.permute_dimensions(final_output, (0, 1, 2, 3, 4))
return final_output
def get_config(self):
config = {'pool_size': self.pool_size,
'num_rois': self.num_rois}
base_config = super(RoiPoolingConv, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
I have looked at most of the resources out there and almost all of them suggest to comment out this layer. But since this layer is important for object detection, I was wondering if a workaround is possible or not.
The complete traceback of error (note: I've saved filename as freezekeras.py, contents are same as keras_to_tensorflow.py):
Using TensorFlow backend.
Traceback (most recent call last):
File "freezekeras.py", line 181, in <module>
app.run(main)
File "/usr/local/lib/python3.5/dist-packages/absl/app.py", line 300, in run
_run_main(main, args)
File "/usr/local/lib/python3.5/dist-packages/absl/app.py", line 251, in _run_main
sys.exit(main(argv))
File "freezekeras.py", line 127, in main
model = load_model(FLAGS.input_model, FLAGS.input_model_json, FLAGS.input_model_yaml)
File "freezekeras.py", line 105, in load_model
raise wrong_file_err
File "freezekeras.py", line 62, in load_model
model = keras.models.load_model(input_model_path)
File "/usr/local/lib/python3.5/dist-packages/keras/engine/saving.py", line 419, in load_model
model = _deserialize_model(f, custom_objects, compile)
File "/usr/local/lib/python3.5/dist-packages/keras/engine/saving.py", line 225, in _deserialize_model
model = model_from_config(model_config, custom_objects=custom_objects)
File "/usr/local/lib/python3.5/dist-packages/keras/engine/saving.py", line 458, in model_from_config
return deserialize(config, custom_objects=custom_objects)
File "/usr/local/lib/python3.5/dist-packages/keras/layers/__init__.py", line 55, in deserialize
printable_module_name='layer')
File "/usr/local/lib/python3.5/dist-packages/keras/utils/generic_utils.py", line 145, in deserialize_keras_object
list(custom_objects.items())))
File "/usr/local/lib/python3.5/dist-packages/keras/engine/network.py", line 1022, in from_config
process_layer(layer_data)
File "/usr/local/lib/python3.5/dist-packages/keras/engine/network.py", line 1008, in process_layer
custom_objects=custom_objects)
File "/usr/local/lib/python3.5/dist-packages/keras/layers/__init__.py", line 55, in deserialize
printable_module_name='layer')
File "/usr/local/lib/python3.5/dist-packages/keras/utils/generic_utils.py", line 138, in deserialize_keras_object
': ' + class_name)
ValueError: Unknown layer: RoiPoolingConv
Try to specify the custom layer explicitly:
model = load_model('my_model.h5', custom_objects={'RoiPoolingConv': RoiPoolingConv})
Obviously, you have to re-write the keras_to_tensorflow.py script. See Handling custom layers (or other custom objects) in saved models section under Keras FAQ.
Solution
specify custom layer while loading model in keras_to_tensorflow.py
model = keras.models.load_model(input_model_path, custom_objects={'RoiPoolingConv':RoiPoolingConv})
import RoiPoolingConv.py to keras_to_tensorflow project
specify default pool_size, num_rois for RoiPoolingConv
def __init__(self, pool_size = 7, num_rois = 32, **kwargs):

InvalidArgumentError: input_1:0 is both fed and fetched

I found this GitHub example about print activation maps. Code is quite simple. All I did was copy pasted the function.
def get_activations(model, model_inputs, print_shape_only=False, layer_name=None):
print('----- activations -----')
activations = []
inp = model.input
model_multi_inputs_cond = True
if not isinstance(inp, list):
# only one input! let's wrap it in a list.
inp = [inp]
model_multi_inputs_cond = False
outputs = [layer.output for layer in model.layers if
layer.name == layer_name or layer_name is None] # all layer outputs
funcs = [K.function(inp + [K.learning_phase()], [out]) for out in outputs] # evaluation functions
if model_multi_inputs_cond:
list_inputs = []
list_inputs.extend(model_inputs)
list_inputs.append(0.)
else:
list_inputs = [model_inputs, 0.]
print list_inputs
layer_outputs = [func(list_inputs)[0] for func in funcs]
for layer_activations in layer_outputs:
activations.append(layer_activations)
if print_shape_only:
print(layer_activations.shape)
else:
print(layer_activations)
return activations
And then I passed my model and inputs. However, it generates this error
Traceback (most recent call last):
File "test_cnn_128.py", line 80, in <module>
get_activations(model, test_x)
File "test_cnn_128.py", line 45, in get_activations
layer_outputs = [func(list_inputs)[0] for func in funcs]
File "/usr/local/lib/python2.7/dist-packages/keras/backend/tensorflow_backend.py", line 2666, in __call__
return self._call(inputs)
File "/usr/local/lib/python2.7/dist-packages/keras/backend/tensorflow_backend.py", line 2635, in _call
session)
File "/usr/local/lib/python2.7/dist-packages/keras/backend/tensorflow_backend.py", line 2587, in _make_callable
callable_fn = session._make_callable_from_options(callable_opts)
File "/home/fatima/.local/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 1414, in _make_callable_from_options
return BaseSession._Callable(self, callable_options)
File "/home/fatima/.local/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 1368, in __init__
session._session, options_ptr, status)
File "/home/fatima/.local/lib/python2.7/site-packages/tensorflow/python/framework/errors_impl.py", line 519, in __exit__
c_api.TF_GetCode(self.status.status))
tensorflow.python.framework.errors_impl.InvalidArgumentError: input_1:0 is both fed and fetched.
Exception tensorflow.python.framework.errors_impl.InvalidArgumentError: InvalidArgumentError() in <bound method _Callable.__del__ of <tensorflow.python.client.session._Callable object at 0x7f44de0cd210>> ignored
I am unsure of how to fix this.
As I posted on the thread Keras, How to get the output of each layer?, the way to solve this is to replace the line
outputs = [
layer.output
for layer in model.layers
if layer.name == layer_name or layer_name is None
]
with
outputs = [
layer.output
for layer in model.layers
if layer.name == layer_name or layer_name is None
][1:]
...in order to skip the input layer.
Exclude the input layer from your output/fetch layers (which is why the error says "fed & fetch" - can't get output(fetch) for input(feed) from the input layer (feed) itself)
outputs = [layer.output for layer in model.layers if
layer.name == layer_name or layer_name is None][1:]
OR by dropping layer name stuff altogether;
outputs = [layer.output for layer in model.layers][1:]
should fix the issue (notice --> [1:] at the end which is the simple fix for your issue)

AttributeError: 'LSTMStateTuple' object has no attribute 'get_shape' in tf.contrib.seq2seq.dynamic_decode(decoder)

I don't know why I am getting this error.
I saw a some posts to change state_is_tuple=False but it was giving me some other error. I think the error is in the way I defined lstm cell but not sure what should I change? I followed this link which has similar code structure.
Here is my code:
Required placeholders
n_hidden = args.rnn_size
n_layers = args.num_layers
max_sequence_length = args.max_sequence_length
encoderEmbeddingsize = args.encoderEmbeddingsize
decoderEmbeddingsize = args.decoderEmbeddingsize
queVocabsize = len(question_vocab_to_int)
ansVocabsize = len(answer_vocab_to_int)
batch_size = args.batch_size
# Input Embedding for Encoder ## CHECK THE VOCAB SIZE!!!
encoder_input = tf.contrib.layers.embed_sequence(input_data, queVocabsize, encoderEmbeddingsize,
initializer=tf.random_uniform_initializer(0, 1))
print('encoder_input', encoder_input)
# Layers for the model
lstm_cell = rnn.BasicLSTMCell(n_hidden) # lstm layer
dropout = rnn.DropoutWrapper(lstm_cell, input_keep_prob=keep_prob) # dropout layer
# Encoder Model
# Make two layer encoder
encoder_multirnn_cell = rnn.MultiRNNCell([dropout]*n_layers)
# Make it bidirectional
print(sequence_length)
encoder_output, encoder_state = tf.nn.dynamic_rnn(encoder_multirnn_cell,
inputs=encoder_input, dtype=tf.float32) # sequence_length=sequence_length,
print('encoder_output', encoder_output)
print('encoder_state', encoder_state)
# preprocessing encoder input
initial_tensor = tf.strided_slice(target, [0, 0], [batch_size, -1], [1, 1])
decoder_input = tf.concat([tf.fill([batch_size, 1], question_vocab_to_int['<GO>']), initial_tensor], 1)
print('decoder_input', decoder_input)
## Input Embedding for the Decoder
decoder_embedding = tf.Variable(tf.random_uniform([queVocabsize+1, decoderEmbeddingsize], 0, 1))
decoder_embedded_input = tf.nn.embedding_lookup(decoder_embedding, decoder_input)
print('check')
print(decoder_embedded_input)
print(decoder_embedding)
## Decoder Model
#with tf.variable_scope("decoding") as decoding_scope:
lstm_decoder_cell = rnn.BasicLSTMCell(n_hidden) # lstm layer
dropout_decoder = rnn.DropoutWrapper(lstm_decoder_cell, input_keep_prob=keep_prob) # droput layer
# decoder
# Make two layer encoder
decoder_multirnn_cell = rnn.MultiRNNCell([dropout_decoder] * n_layers)
# weights = tf.truncated_normal_initializer(stddev=0.1)
# biases = tf.zeros_initializer()
output_layer_function = layers_core.Dense(
ansVocabsize, use_bias=False) #lambda x: tf.contrib.layers.fully_connected(x, queVocabsize, scope=decoding_scope,
# weights_initializer=weights,
# biases_initializer=biases)
#print(decoder_multirnn_cell.output_size)
#decoding_scope.reuse_variables()
print('output_kayer_function', output_layer_function)
# training vs inference!
encoder_output = tf.transpose(encoder_output, [1, 0, 2])
attention_state = tf.zeros([batch_size, 1, decoder_multirnn_cell.output_size * 2])
attention_mechanism = tf.contrib.seq2seq.BahdanauAttention(
num_units=decoder_multirnn_cell.output_size, memory=encoder_output)
lstm_decoder_cell = tf.contrib.seq2seq.AttentionWrapper(lstm_decoder_cell,
attention_mechanism=attention_mechanism)
attn_zero = lstm_decoder_cell.zero_state(batch_size=batch_size, dtype=tf.float32)
init_state = attn_zero.clone(cell_state=encoder_state)
print(('sequence!!!!!!!!1', sequence_length))
helper = tf.contrib.seq2seq.TrainingHelper(decoder_embedded_input, sequence_length)
# decoder
decoder = tf.contrib.seq2seq.BasicDecoder(lstm_decoder_cell, helper, initial_state=init_state,
output_layer= output_layer_function)
print(decoder)
final_outputs, _final_state, _final_sequence_lengths = tf.contrib.seq2seq.dynamic_decode(decoder)
train_pred_drop = tf.nn.dropout(final_outputs, keep_prob)
logits = train_pred_drop.rnn_output
Now, I am getting the error in tf.contrib.seq2seq.dynamic_decode(decoder), as shown below:
Traceback (most recent call last):
File "test_model.py", line 272, in <module>
train_logits, infer_logits = load_model(args, tf.reverse(input_data, [-1]), target, learning_rate, sequence_length, question_vocab_to_int, answer_vocab_to_int, keep_prob ) ## load model here!
File "test_model.py", line 165, in load_model
final_outputs, _final_state, _final_sequence_lengths = tf.contrib.seq2seq.dynamic_decode(decoder)
File "/home/saurabh/tfnightly/lib/python3.5/site-packages/tensorflow/contrib/seq2seq/python/ops/decoder.py", line 286, in dynamic_decode
swap_memory=swap_memory)
File "/home/saurabh/tfnightly/lib/python3.5/site-packages/tensorflow/python/ops/control_flow_ops.py", line 2816, in while_loop
result = loop_context.BuildLoop(cond, body, loop_vars, shape_invariants)
File "/home/saurabh/tfnightly/lib/python3.5/site-packages/tensorflow/python/ops/control_flow_ops.py", line 2640, in BuildLoop
pred, body, original_loop_vars, loop_vars, shape_invariants)
File "/home/saurabh/tfnightly/lib/python3.5/site-packages/tensorflow/python/ops/control_flow_ops.py", line 2590, in _BuildLoop
body_result = body(*packed_vars_for_body)
File "/home/saurabh/tfnightly/lib/python3.5/site-packages/tensorflow/contrib/seq2seq/python/ops/decoder.py", line 234, in body
decoder_finished) = decoder.step(time, inputs, state)
File "/home/saurabh/tfnightly/lib/python3.5/site-packages/tensorflow/contrib/seq2seq/python/ops/basic_decoder.py", line 138, in step
cell_outputs, cell_state = self._cell(inputs, state)
File "/home/saurabh/tfnightly/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 183, in __call__
return super(RNNCell, self).__call__(inputs, state)
File "/home/saurabh/tfnightly/lib/python3.5/site-packages/tensorflow/python/layers/base.py", line 575, in __call__
outputs = self.call(inputs, *args, **kwargs)
File "/home/saurabh/tfnightly/lib/python3.5/site-packages/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py", line 1295, in call
cell_output, next_cell_state = self._cell(cell_inputs, cell_state)
File "/home/saurabh/tfnightly/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 183, in __call__
return super(RNNCell, self).__call__(inputs, state)
File "/home/saurabh/tfnightly/lib/python3.5/site-packages/tensorflow/python/layers/base.py", line 575, in __call__
outputs = self.call(inputs, *args, **kwargs)
File "/home/saurabh/tfnightly/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 438, in call
self._linear = _Linear([inputs, h], 4 * self._num_units, True)
File "/home/saurabh/tfnightly/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 1154, in __init__
shapes = [a.get_shape() for a in args]
File "/home/saurabh/tfnightly/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 1154, in <listcomp>
shapes = [a.get_shape() for a in args]
AttributeError: 'LSTMStateTuple' object has no attribute 'get_shape'

lstm dimension not match by tensorflow

I construct a LSTM network, and my input's dimension is 100*100*83 ( batch_size=100, steps = 100, char_vector = 83). I build a two LSTM layers which has 512 hidden units.
# coding: utf-8
from __future__ import print_function
import tensorflow as tf
import numpy as np
import time
class CharRNN:
def __init__(self, num_classes, batch_size=64, num_steps=50, lstm_size=128, num_layers =2,\
learning_rate = 0.001, grad_clip=5, keep_prob=0.001,sampling= False):
# True for SGD
if sampling == True:
self.batch_size, self.num_steps = 1,1
else:
self.batch_size, self.num_steps = batch_size, num_steps
tf.reset_default_graph()
self.inputs, self.targets, self.keep_prob = self.build_inputs(self.batch_size,self.num_steps)
self.keep_prob = keep_prob
self.cell, self.initial_state = self.build_lstm(lstm_size,num_layers,self.batch_size,self.keep_prob)
# print(self.cell.state_size)
x_one_hot = tf.one_hot(self.inputs, num_classes)
print("cell state size: ",self.cell.state_size)
print("cell initial state: ",self.initial_state)
print("this is inputs", self.inputs)
print("x_one_hot: ",x_one_hot)
outputs, state = tf.nn.dynamic_rnn(self.cell, x_one_hot, initial_state= self.initial_state)
def build_inputs(self, num_seqs, num_steps):
inputs = tf.placeholder(tf.int32, shape=(num_seqs, num_steps), name = "inputs")
targets = tf.placeholder(tf.int32, shape= (num_seqs, num_steps), name="targets")
print('inputs shape: ',inputs.shape)
keep_prob = tf.placeholder(tf.float32, name="keep_prob")
return inputs, targets, keep_prob
def build_lstm(self, lstm_size, num_layers, batch_size, keep_prob):
# construct lstm cell
lstm = tf.nn.rnn_cell.BasicLSTMCell(lstm_size)
# add dropout
drop = tf.nn.rnn_cell.DropoutWrapper(lstm, output_keep_prob= keep_prob)
# stack multiple rnn cells
cell = tf.nn.rnn_cell.MultiRNNCell([drop for _ in range(num_layers)])
initial_state = cell.zero_state(batch_size, tf.float32)
return cell, initial_state
if __name__ == '__main__':
len_vocab = 83
batch_size = 100
num_steps = 100
lstm_size = 512
num_layers = 2
learning_rate = 0.001
keep_prob = 0.5
epochs = 20
save_every_n = 200
print("h1")
model = CharRNN(len_vocab, batch_size = batch_size, num_steps=num_steps, lstm_size = lstm_size,num_layers=num_layers\
,learning_rate=learning_rate,sampling= False,keep_prob = keep_prob
I get a dimension not match error at tf.nn.dynamic_rnn.
error message is like this:
inputs shape: (100, 100)
cell state size: (LSTMStateTuple(c=512, h=512), LSTMStateTuple(c=512, h=512))
cell initial state: (LSTMStateTuple(c=<tf.Tensor 'MultiRNNCellZeroState/DropoutWrapperZeroState/BasicLSTMCellZeroState/zeros:0' shape=(100, 512) dtype=float32>, h=<tf.Tensor 'MultiRNNCellZeroState/DropoutWrapperZeroState/BasicLSTMCellZeroState/zeros_1:0' shape=(100, 512) dtype=float32>), LSTMStateTuple(c=<tf.Tensor 'MultiRNNCellZeroState/DropoutWrapperZeroState_1/BasicLSTMCellZeroState/zeros:0' shape=(100, 512) dtype=float32>, h=<tf.Tensor 'MultiRNNCellZeroState/DropoutWrapperZeroState_1/BasicLSTMCellZeroState/zeros_1:0' shape=(100, 512) dtype=float32>))
this is inputs Tensor("inputs:0", shape=(100, 100), dtype=int32)
x_one_hot: Tensor("one_hot:0", shape=(100, 100, 83), dtype=float32)
Traceback (most recent call last):
File "./seq2_minimal.py", line 70, in <module>
,learning_rate=learning_rate,sampling= False,keep_prob = keep_prob)
File "./seq2_minimal.py", line 32, in __init__
outputs, state = tf.nn.dynamic_rnn(self.cell, x_one_hot, initial_state= self.initial_state)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/rnn.py", line 614, in dynamic_rnn
dtype=dtype)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/rnn.py", line 777, in _dynamic_rnn_loop
swap_memory=swap_memory)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/control_flow_ops.py", line 2816, in while_loop
result = loop_context.BuildLoop(cond, body, loop_vars, shape_invariants)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/control_flow_ops.py", line 2640, in BuildLoop
pred, body, original_loop_vars, loop_vars, shape_invariants)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/control_flow_ops.py", line 2590, in _BuildLoop
body_result = body(*packed_vars_for_body)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/rnn.py", line 762, in _time_step
(output, new_state) = call_cell()
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/rnn.py", line 748, in <lambda>
call_cell = lambda: cell(input_t, state)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 183, in __call__
return super(RNNCell, self).__call__(inputs, state)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/layers/base.py", line 575, in __call__
outputs = self.call(inputs, *args, **kwargs)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 1066, in call
cur_inp, new_state = cell(cur_inp, cur_state)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 891, in __call__
output, new_state = self._cell(inputs, state, scope)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 183, in __call__
return super(RNNCell, self).__call__(inputs, state)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/layers/base.py", line 575, in __call__
outputs = self.call(inputs, *args, **kwargs)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 441, in call
value=self._linear([inputs, h]), num_or_size_splits=4, axis=1)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 1189, in __call__
res = math_ops.matmul(array_ops.concat(args, 1), self._weights)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/math_ops.py", line 1891, in matmul
a, b, transpose_a=transpose_a, transpose_b=transpose_b, name=name)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/gen_math_ops.py", line 2437, in _mat_mul
name=name)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
op_def=op_def)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 2958, in create_op
set_shapes_for_outputs(ret)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 2209, in set_shapes_for_outputs
shapes = shape_func(op)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 2159, in call_with_requiring
return call_cpp_shape_fn(op, require_shape_fn=True)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/framework/common_shapes.py", line 627, in call_cpp_shape_fn
require_shape_fn)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/framework/common_shapes.py", line 691, in _call_cpp_shape_fn_impl
raise ValueError(err.message)
ValueError: Dimensions must be equal, but are 1024 and 595 for 'rnn/while/rnn/multi_rnn_cell/cell_0/cell_0/basic_lstm_cell/MatMul_1' (op: 'MatMul') with input shapes: [100,1024], [595,2048].
I search that and find that tensorflow's lstm cell should adjust its input size automatically. But error message said this.
It shows
input size is [100, 1024] and lstm is [595, 2048].
Thanks firstly.
cell = tf.nn.rnn_cell.MultiRNNCell([drop for _ in range(num_layers)])
TO
cell = tf.nn.rnn_cell.MultiRNNCell([drop])
because your given input tensor and produces tensor are not the same.

Categories