InvalidArgumentError: input_1:0 is both fed and fetched - python

I found this GitHub example about print activation maps. Code is quite simple. All I did was copy pasted the function.
def get_activations(model, model_inputs, print_shape_only=False, layer_name=None):
print('----- activations -----')
activations = []
inp = model.input
model_multi_inputs_cond = True
if not isinstance(inp, list):
# only one input! let's wrap it in a list.
inp = [inp]
model_multi_inputs_cond = False
outputs = [layer.output for layer in model.layers if
layer.name == layer_name or layer_name is None] # all layer outputs
funcs = [K.function(inp + [K.learning_phase()], [out]) for out in outputs] # evaluation functions
if model_multi_inputs_cond:
list_inputs = []
list_inputs.extend(model_inputs)
list_inputs.append(0.)
else:
list_inputs = [model_inputs, 0.]
print list_inputs
layer_outputs = [func(list_inputs)[0] for func in funcs]
for layer_activations in layer_outputs:
activations.append(layer_activations)
if print_shape_only:
print(layer_activations.shape)
else:
print(layer_activations)
return activations
And then I passed my model and inputs. However, it generates this error
Traceback (most recent call last):
File "test_cnn_128.py", line 80, in <module>
get_activations(model, test_x)
File "test_cnn_128.py", line 45, in get_activations
layer_outputs = [func(list_inputs)[0] for func in funcs]
File "/usr/local/lib/python2.7/dist-packages/keras/backend/tensorflow_backend.py", line 2666, in __call__
return self._call(inputs)
File "/usr/local/lib/python2.7/dist-packages/keras/backend/tensorflow_backend.py", line 2635, in _call
session)
File "/usr/local/lib/python2.7/dist-packages/keras/backend/tensorflow_backend.py", line 2587, in _make_callable
callable_fn = session._make_callable_from_options(callable_opts)
File "/home/fatima/.local/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 1414, in _make_callable_from_options
return BaseSession._Callable(self, callable_options)
File "/home/fatima/.local/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 1368, in __init__
session._session, options_ptr, status)
File "/home/fatima/.local/lib/python2.7/site-packages/tensorflow/python/framework/errors_impl.py", line 519, in __exit__
c_api.TF_GetCode(self.status.status))
tensorflow.python.framework.errors_impl.InvalidArgumentError: input_1:0 is both fed and fetched.
Exception tensorflow.python.framework.errors_impl.InvalidArgumentError: InvalidArgumentError() in <bound method _Callable.__del__ of <tensorflow.python.client.session._Callable object at 0x7f44de0cd210>> ignored
I am unsure of how to fix this.

As I posted on the thread Keras, How to get the output of each layer?, the way to solve this is to replace the line
outputs = [
layer.output
for layer in model.layers
if layer.name == layer_name or layer_name is None
]
with
outputs = [
layer.output
for layer in model.layers
if layer.name == layer_name or layer_name is None
][1:]
...in order to skip the input layer.

Exclude the input layer from your output/fetch layers (which is why the error says "fed & fetch" - can't get output(fetch) for input(feed) from the input layer (feed) itself)
outputs = [layer.output for layer in model.layers if
layer.name == layer_name or layer_name is None][1:]
OR by dropping layer name stuff altogether;
outputs = [layer.output for layer in model.layers][1:]
should fix the issue (notice --> [1:] at the end which is the simple fix for your issue)

Related

Training Deep Neural Network Using Tensorflow

I am trying to immplement the VGG16 network using Tensorflow.
to test the model, i want to classify a dataset of images.
i sarted by creating a train_data with tensorflow.data.Dataset:
<TensorSliceDataset element_spec=(TensorSpec(shape=(215, 160, 3), dtype=tf.float64, name=None), TensorSpec(shape=(20,), dtype=tf.float64, name=None))>
(their is 20 ouput classes)
created a custom 2d Conv Layer:
`class CustomConv2d(Layer):
def __init__(self,filters,kernel_size,padding,name):
super(CustomConv2d,self).__init__()
self.conv = Conv2D(filters=filters,
kernel_size=kernel_size,
activation='relu',
padding =padding,
name=name,
)
self.batchN = BatchNormalization()
def call(self,x,training=True):
output = self.conv(x)
output = self.batchN(output)
return output`
Created a Sub Model Class
`class VGG16(Model):
def __init__(self,input_shape,NUM_OF_CLASSES=20,dropout_parameters=0.5):
super(VGG16,self).__init__()
self.dropout = Dropout(dropout_parameters)
### First Conv Block
self.conv_11 = Conv2D(filters=53,
kernel_size=(3,3),
activation='relu',
padding ='same',
name='conv11',
input_shape=input_shape
)
self.conv_12 = CustomConv2d(64,(3,3),padding='same',name='conv_12')
self.maxpool = MaxPool2D(pool_size=(2,2),padding='same')`
## Second Conv Block
self.conv21 = CustomConv2d(64,(3,3),padding='same',name='conv_21')
self.conv22 = CustomConv2d(64,(3,3),padding='same',name='conv_22')
## Third Conv Block
self.conv31 = CustomConv2d(256,(3,3),padding='same',name='conv_31')
self.conv32 = CustomConv2d(256,(3,3),padding='same',name='conv_32')
self.conv33 = CustomConv2d(256,(3,3),padding='same',name='conv_33')
## Fourth Conv Block
self.conv41 = CustomConv2d(512,(3,3),padding='same',name='conv_41')
self.conv42 = CustomConv2d(512,(3,3),padding='same',name='conv_42')
self.conv43 = CustomConv2d(512,(3,3),padding='same',name='conv_43')
## Fifth CConv Block
self.conv51 = CustomConv2d(512,(3,3),padding='same',name='conv_51')
self.conv52 = CustomConv2d(512,(3,3),padding='same',name='conv_52')
self.conv53 = CustomConv2d(512,(3,3),padding='same',name='conv_53')
#####
self.flatten = Flatten()
self.dense1 = Dense(1024,activation='relu',name='Dense_1')
self.dense2 = Dense(512,activation='relu',name='Dense_2')
self.dense3 = Dense(NUM_OF_CLASSES,activation='softmax',name='Dense_3')
def call(self,x,training=True):
x = self.maxpool(self.conv_12(self.conv_11(x)))
x = self.maxpool(self.conv22(self.conv21(x)))
x = self.maxpool(self.conv33(self.conv32(self.conv31(x))))
x = self.maxpool(self.conv43(self.conv42(self.conv41(x))))
x = self.maxpool(self.conv53(self.conv52(self.conv51(x))))
x = self.flatten(x)
x = self.dense3(self.dense2(self.dense1(x)))
return x
model = VGG16((215,160,3,1))
`
by the way i dont know why i have to put 1 at the end of shape
After Compiling the model
when i try to fit the data to the model i have this error:
`ValueError: in user code:
` File "/usr/local/lib/python3.8/dist-packages/keras/engine/training.py", line 1021, in train_function *
return step_function(self, iterator)
File "/usr/local/lib/python3.8/dist-packages/keras/engine/training.py", line 1010, in step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
File "/usr/local/lib/python3.8/dist-packages/keras/engine/training.py", line 1000, in run_step **
outputs = model.train_step(data)
File "/usr/local/lib/python3.8/dist-packages/keras/engine/training.py", line 859, in train_step
y_pred = self(x, training=True)
File "/usr/local/lib/python3.8/dist-packages/keras/utils/traceback_utils.py", line 67, in error_handler
raise e.with_traceback(filtered_tb) from None
ValueError: Exception encountered when calling layer "vgg16_2" (type VGG16).
in user code:
File "/tmp/ipykernel_49926/980605695.py", line 38, in call *
x = self.maxpool(self.conv_12(self.conv_11(x)))
File "/usr/local/lib/python3.8/dist-packages/keras/utils/traceback_utils.py", line 67, in error_handler **
raise e.with_traceback(filtered_tb) from None
File "/usr/local/lib/python3.8/dist-packages/keras/engine/input_spec.py", line 228, in assert_input_compatibility
raise ValueError(f'Input {input_index} of layer "{layer_name}" '
ValueError: Input 0 of layer "conv11" is incompatible with the layer: expected min_ndim=4, found ndim=3. Full shape received: (215, 160, 3)
Call arguments received:
• x=tf.Tensor(shape=(215, 160, 3), dtype=float32)
• training=True``
Tought maybe their is problem in the shape of the Dataset
Edit:
the problem was solved after spliting the data into batches with:
trainDataset = trainDataset.shuffle(buffer_size=20).prefetch(buffer_size=15).batch(32)
but now the training is very slow. is it normal to take about 5 sec on each image of the batch?
It should be
model = VGG16((215,160,3))
Because conv2d input shape parameter take 3 values. (Height,width,channel)
you can input model like model(np.ones((1,215,160,3))) where 1 is the batch size

Concatenate two pretrained model Error: The name "input_1" is used 2 times in the model

I'm trying to concatenate two pre-trained models with the same name in Keras. so far I've renamed all the layers as you can see as follows:
model1 = load_model('./new/CNN_Level1_2Dense_1024ML_Prep_ALL')
model1.input._name = model1.input.name + '_1'
for layer in model1.layers:
layer._name = layer.name + str("_1")
model1._name = 'pretrained_model1'
print('=====================================')
model2 = load_model('./new/CNN_Level1.1_2Dense_1024ML_Prep_ALL')
model2.input._name = model2.input.name + '_2'
for layer in model2.layers:
layer._name = layer.name + str("_2")
model2._name = 'pretrained_model2'
out1 = model1.output
out2 = model2.output
m_model = concatenate([out1, out2])
relu_dense = Dense(19, activation='relu')(m_model)
output = Dense(19, activation='softmax')(relu_dense)
merged_model = Model(inputs=[model1.input,model2.input], outputs=output)
but I still get this error:
File "Ali_text_classification-Copy1.py", line 196, in train_network
merged_model = Model(inputs=[model1.input,model2.input], outputs=output)
File "/root/.local/lib/python3.6/site-packages/tensorflow/python/training/tracking/base.py", line 517, in _method_wrapper
result = method(self, *args, **kwargs)
File "/root/.local/lib/python3.6/site-packages/tensorflow/python/keras/engine/functional.py", line 120, in init
self._init_graph_network(inputs, outputs)
File "/root/.local/lib/python3.6/site-packages/tensorflow/python/training/tracking/base.py", line 517, in _method_wrapper
result = method(self, *args, **kwargs)
File "/root/.local/lib/python3.6/site-packages/tensorflow/python/keras/engine/functional.py", line 204, in _init_graph_network
self.inputs, self.outputs)
File "/root/.local/lib/python3.6/site-packages/tensorflow/python/keras/engine/functional.py", line 1001, in _map_graph_network
str(all_names.count(name)) + ' times in the model. '
ValueError: The name "input_1" is used 2 times in the model. All layer names should be unique.
How can I fix this?
As of April 2021, based on one explanation here, I was able to modify FPN model to Siamese FPN. The link as below:
https://github.com/asifpatankar/Siamese_FPN
Disappeared 2 times error. Maybe it can help!

Unable to use MSE of VGG features in loss function

I'm using keras (tf.keras) in tensorflow 2.0.0
I've a network, whose input is an image and output is also an image. I want to use a combination of MSE, MSE in VGG feature space and some other losses, which depend on intermediate layer output. I'm defining a custom loss function. I'm able to build the model, compile with the custom loss. But when I train using fit_generator, I'm getting a SymbolicException saying Inputs to eager execution function cannot be Keras symbolic tensors
Full Code:
Train File:
def __init__(self, gray_images: bool, verbose: bool = True):
super().__init__(gray_images, verbose)
self.model = None
self.vgg_feature_extractor = VggFeaturesExtractor(model_name='vgg16', layers=[3, 6, 10])
def build_model():
image_input = Input(shape=(None, None, num_input_channels))
out1 = self.build_out1_model(image_input, num_filters, depth_t)
out2 = self.build_out2_model(image_input, num_filters, depth_n, use_bnorm)
enhanced_image = ... # Some function of image_input, out1 and out2
self.model = Model(inputs=image_input, outputs=enhanced_image)
self.model.add_loss(loss_weights[1] * self.loss2(out2))
self.model.compile(optimizer='adam', loss=self.vgg_loss)
def vgg_loss(self, gt_image, est_image):
gt_features = self.vgg_feature_extractor.extract_features(gt_image)
est_features = self.vgg_feature_extractor.extract_features(est_image)
loss = tf.reduce_mean(tf.square(gt_features[0] - est_features[0])) + \
tf.reduce_mean(tf.square(gt_features[1] - est_features[1])) + \
tf.reduce_mean(tf.square(gt_features[2] - est_features[2]))
return loss
VggFeatures.py:
class VggFeaturesExtractor:
def __init__(self, model_name: str, layers: List[int]):
self.model_name = model_name
self.layers = layers
if model_name == 'vgg16':
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
vgg_model = VGG16(include_top=False)
self.preprocess_input = preprocess_input
elif model_name == 'vgg19':
from tensorflow.keras.applications.vgg19 import VGG19, preprocess_input
vgg_model = VGG19(include_top=False)
self.preprocess_input = preprocess_input
else:
raise RuntimeError(f'Unknown Model: {model_name}')
outputs = []
for layer_num in layers:
outputs.append(vgg_model.layers[layer_num].output)
self.feature_extractor = keras.Model(inputs=vgg_model.input, outputs=outputs)
def extract_features(self, images: numpy.ndarray):
preprocessed_images = self.preprocess_input(images)
features = self.feature_extractor(preprocessed_images)
return features
Stack trace:
Epoch 1/1000
Traceback (most recent call last):
File "/media/nagabhushan/Data02/SoftwareFiles/Anaconda/anaconda3/envs/.../lib/python3.7/site-packages/tensorflow_core/python/eager/execute.py", line 61, in quick_execute
num_outputs)
TypeError: An op outside of the function building code is being passed
a "Graph" tensor. It is possible to have Graph tensors
leak out of the function building context by including a
tf.init_scope in your function building code.
For example, the following function will fail:
#tf.function
def has_init_scope():
my_constant = tf.constant(1.)
with tf.init_scope():
added = my_constant * 2
The graph tensor has name: StridedSliceGrad:0
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/media/nagabhushan/Data02/SNB/IISc/Research/.../Workspace/Ideas/01_Supervised/src/N09.py", line 363, in <module>
main()
File "/media/nagabhushan/Data02/SNB/IISc/Research/.../Workspace/Ideas/01_Supervised/src/N09.py", line 343, in main
args.save_interval)
File "/media/nagabhushan/Data02/SNB/IISc/Research/.../Workspace/Ideas/01_Supervised/src/N09.py", line 92, in train_model
verbose=self.verbose)
File "/media/nagabhushan/Data02/SoftwareFiles/Anaconda/anaconda3/envs/.../lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training.py", line 1297, in fit_generator
steps_name='steps_per_epoch')
File "/media/nagabhushan/Data02/SoftwareFiles/Anaconda/anaconda3/envs/.../lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_generator.py", line 265, in model_iteration
batch_outs = batch_function(*batch_data)
File "/media/nagabhushan/Data02/SoftwareFiles/Anaconda/anaconda3/envs/.../lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training.py", line 973, in train_on_batch
class_weight=class_weight, reset_metrics=reset_metrics)
File "/media/nagabhushan/Data02/SoftwareFiles/Anaconda/anaconda3/envs/.../lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2_utils.py", line 264, in train_on_batch
output_loss_metrics=model._output_loss_metrics)
File "/media/nagabhushan/Data02/SoftwareFiles/Anaconda/anaconda3/envs/.../lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_eager.py", line 311, in train_on_batch
output_loss_metrics=output_loss_metrics))
File "/media/nagabhushan/Data02/SoftwareFiles/Anaconda/anaconda3/envs/.../lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_eager.py", line 268, in _process_single_batch
grads = tape.gradient(scaled_total_loss, trainable_weights)
File "/media/nagabhushan/Data02/SoftwareFiles/Anaconda/anaconda3/envs/.../lib/python3.7/site-packages/tensorflow_core/python/eager/backprop.py", line 1014, in gradient
unconnected_gradients=unconnected_gradients)
File "/media/nagabhushan/Data02/SoftwareFiles/Anaconda/anaconda3/envs/.../lib/python3.7/site-packages/tensorflow_core/python/eager/imperative_grad.py", line 76, in imperative_grad
compat.as_str(unconnected_gradients.value))
File "/media/nagabhushan/Data02/SoftwareFiles/Anaconda/anaconda3/envs/.../lib/python3.7/site-packages/tensorflow_core/python/eager/function.py", line 911, in _backward_function_wrapper
processed_args, remapped_captures)
File "/media/nagabhushan/Data02/SoftwareFiles/Anaconda/anaconda3/envs/.../lib/python3.7/site-packages/tensorflow_core/python/eager/function.py", line 1224, in _call_flat
ctx, args, cancellation_manager=cancellation_manager)
File "/media/nagabhushan/Data02/SoftwareFiles/Anaconda/anaconda3/envs/.../lib/python3.7/site-packages/tensorflow_core/python/eager/function.py", line 511, in call
ctx=ctx)
File "/media/nagabhushan/Data02/SoftwareFiles/Anaconda/anaconda3/envs/.../lib/python3.7/site-packages/tensorflow_core/python/eager/execute.py", line 75, in quick_execute
"tensors, but found {}".format(keras_symbolic_tensors))
tensorflow.python.eager.core._SymbolicException: Inputs to eager execution function cannot be Keras symbolic tensors, but found [<tf.Tensor 'StridedSliceGrad:0' shape=(16, 64, 64, 3) dtype=float32>]
Process finished with exit code 1
Note:
1. If I replace self.model.compile(optimizer='adam', loss=self.vgg_loss) with self.model.compile(optimizer='adam', loss='mse'), code works fine, which implies the other part of code is working correctly.
2. Almost every question I found on SO regarding VGG loss advises to append VGG network to the main network, set trainable=False for VGG network and then train with MSE loss. But I can't do that, since I have many components in my loss function.
I was able to fix this issue by disabling eager execution. In tensorflow 2.0, eager execution is enabled by default.
tf.compat.v1.disable_eager_execution()
I didn't understand how this was able to fix the issue though. If anybody stumbles on a similar problem, you can try disabling eager execution.

AttributeError: 'LSTMStateTuple' object has no attribute 'get_shape' in tf.contrib.seq2seq.dynamic_decode(decoder)

I don't know why I am getting this error.
I saw a some posts to change state_is_tuple=False but it was giving me some other error. I think the error is in the way I defined lstm cell but not sure what should I change? I followed this link which has similar code structure.
Here is my code:
Required placeholders
n_hidden = args.rnn_size
n_layers = args.num_layers
max_sequence_length = args.max_sequence_length
encoderEmbeddingsize = args.encoderEmbeddingsize
decoderEmbeddingsize = args.decoderEmbeddingsize
queVocabsize = len(question_vocab_to_int)
ansVocabsize = len(answer_vocab_to_int)
batch_size = args.batch_size
# Input Embedding for Encoder ## CHECK THE VOCAB SIZE!!!
encoder_input = tf.contrib.layers.embed_sequence(input_data, queVocabsize, encoderEmbeddingsize,
initializer=tf.random_uniform_initializer(0, 1))
print('encoder_input', encoder_input)
# Layers for the model
lstm_cell = rnn.BasicLSTMCell(n_hidden) # lstm layer
dropout = rnn.DropoutWrapper(lstm_cell, input_keep_prob=keep_prob) # dropout layer
# Encoder Model
# Make two layer encoder
encoder_multirnn_cell = rnn.MultiRNNCell([dropout]*n_layers)
# Make it bidirectional
print(sequence_length)
encoder_output, encoder_state = tf.nn.dynamic_rnn(encoder_multirnn_cell,
inputs=encoder_input, dtype=tf.float32) # sequence_length=sequence_length,
print('encoder_output', encoder_output)
print('encoder_state', encoder_state)
# preprocessing encoder input
initial_tensor = tf.strided_slice(target, [0, 0], [batch_size, -1], [1, 1])
decoder_input = tf.concat([tf.fill([batch_size, 1], question_vocab_to_int['<GO>']), initial_tensor], 1)
print('decoder_input', decoder_input)
## Input Embedding for the Decoder
decoder_embedding = tf.Variable(tf.random_uniform([queVocabsize+1, decoderEmbeddingsize], 0, 1))
decoder_embedded_input = tf.nn.embedding_lookup(decoder_embedding, decoder_input)
print('check')
print(decoder_embedded_input)
print(decoder_embedding)
## Decoder Model
#with tf.variable_scope("decoding") as decoding_scope:
lstm_decoder_cell = rnn.BasicLSTMCell(n_hidden) # lstm layer
dropout_decoder = rnn.DropoutWrapper(lstm_decoder_cell, input_keep_prob=keep_prob) # droput layer
# decoder
# Make two layer encoder
decoder_multirnn_cell = rnn.MultiRNNCell([dropout_decoder] * n_layers)
# weights = tf.truncated_normal_initializer(stddev=0.1)
# biases = tf.zeros_initializer()
output_layer_function = layers_core.Dense(
ansVocabsize, use_bias=False) #lambda x: tf.contrib.layers.fully_connected(x, queVocabsize, scope=decoding_scope,
# weights_initializer=weights,
# biases_initializer=biases)
#print(decoder_multirnn_cell.output_size)
#decoding_scope.reuse_variables()
print('output_kayer_function', output_layer_function)
# training vs inference!
encoder_output = tf.transpose(encoder_output, [1, 0, 2])
attention_state = tf.zeros([batch_size, 1, decoder_multirnn_cell.output_size * 2])
attention_mechanism = tf.contrib.seq2seq.BahdanauAttention(
num_units=decoder_multirnn_cell.output_size, memory=encoder_output)
lstm_decoder_cell = tf.contrib.seq2seq.AttentionWrapper(lstm_decoder_cell,
attention_mechanism=attention_mechanism)
attn_zero = lstm_decoder_cell.zero_state(batch_size=batch_size, dtype=tf.float32)
init_state = attn_zero.clone(cell_state=encoder_state)
print(('sequence!!!!!!!!1', sequence_length))
helper = tf.contrib.seq2seq.TrainingHelper(decoder_embedded_input, sequence_length)
# decoder
decoder = tf.contrib.seq2seq.BasicDecoder(lstm_decoder_cell, helper, initial_state=init_state,
output_layer= output_layer_function)
print(decoder)
final_outputs, _final_state, _final_sequence_lengths = tf.contrib.seq2seq.dynamic_decode(decoder)
train_pred_drop = tf.nn.dropout(final_outputs, keep_prob)
logits = train_pred_drop.rnn_output
Now, I am getting the error in tf.contrib.seq2seq.dynamic_decode(decoder), as shown below:
Traceback (most recent call last):
File "test_model.py", line 272, in <module>
train_logits, infer_logits = load_model(args, tf.reverse(input_data, [-1]), target, learning_rate, sequence_length, question_vocab_to_int, answer_vocab_to_int, keep_prob ) ## load model here!
File "test_model.py", line 165, in load_model
final_outputs, _final_state, _final_sequence_lengths = tf.contrib.seq2seq.dynamic_decode(decoder)
File "/home/saurabh/tfnightly/lib/python3.5/site-packages/tensorflow/contrib/seq2seq/python/ops/decoder.py", line 286, in dynamic_decode
swap_memory=swap_memory)
File "/home/saurabh/tfnightly/lib/python3.5/site-packages/tensorflow/python/ops/control_flow_ops.py", line 2816, in while_loop
result = loop_context.BuildLoop(cond, body, loop_vars, shape_invariants)
File "/home/saurabh/tfnightly/lib/python3.5/site-packages/tensorflow/python/ops/control_flow_ops.py", line 2640, in BuildLoop
pred, body, original_loop_vars, loop_vars, shape_invariants)
File "/home/saurabh/tfnightly/lib/python3.5/site-packages/tensorflow/python/ops/control_flow_ops.py", line 2590, in _BuildLoop
body_result = body(*packed_vars_for_body)
File "/home/saurabh/tfnightly/lib/python3.5/site-packages/tensorflow/contrib/seq2seq/python/ops/decoder.py", line 234, in body
decoder_finished) = decoder.step(time, inputs, state)
File "/home/saurabh/tfnightly/lib/python3.5/site-packages/tensorflow/contrib/seq2seq/python/ops/basic_decoder.py", line 138, in step
cell_outputs, cell_state = self._cell(inputs, state)
File "/home/saurabh/tfnightly/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 183, in __call__
return super(RNNCell, self).__call__(inputs, state)
File "/home/saurabh/tfnightly/lib/python3.5/site-packages/tensorflow/python/layers/base.py", line 575, in __call__
outputs = self.call(inputs, *args, **kwargs)
File "/home/saurabh/tfnightly/lib/python3.5/site-packages/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py", line 1295, in call
cell_output, next_cell_state = self._cell(cell_inputs, cell_state)
File "/home/saurabh/tfnightly/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 183, in __call__
return super(RNNCell, self).__call__(inputs, state)
File "/home/saurabh/tfnightly/lib/python3.5/site-packages/tensorflow/python/layers/base.py", line 575, in __call__
outputs = self.call(inputs, *args, **kwargs)
File "/home/saurabh/tfnightly/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 438, in call
self._linear = _Linear([inputs, h], 4 * self._num_units, True)
File "/home/saurabh/tfnightly/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 1154, in __init__
shapes = [a.get_shape() for a in args]
File "/home/saurabh/tfnightly/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 1154, in <listcomp>
shapes = [a.get_shape() for a in args]
AttributeError: 'LSTMStateTuple' object has no attribute 'get_shape'

Use two different LSTM cell in Tensorflow

I am building a neural machine translator, and I have to use two different LSTM cells (one for the encoder, and one for the decode).
The two cells have differents shapes:
the encoder (first one) is fed with the token of the input sentence and produces a state vector
the decoder (second one) is fed with the previous state vector, and the tokens generated by itself
I writed this in Tensorflow, and when I run the script, I got the following error (raised during the decoder phase):
outputs, states = tf.nn.rnn(cell_backward, inputs, initial_state=initial_state)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/rnn.py", line 158, in rnn
(output, state) = call_cell()
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/rnn.py", line 145, in <lambda>
call_cell = lambda: cell(input_, state)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/rnn_cell.py", line 520, in __call__
dtype, self._num_unit_shards)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/rnn_cell.py", line 357, in _get_concat_variable
sharded_variable = _get_sharded_variable(name, shape, dtype, num_shards)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/rnn_cell.py", line 387, in _get_sharded_variable
dtype=dtype))
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/variable_scope.py", line 732, in get_variable
partitioner=partitioner, validate_shape=validate_shape)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/variable_scope.py", line 596, in get_variable
partitioner=partitioner, validate_shape=validate_shape)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/variable_scope.py", line 161, in get_variable
caching_device=caching_device, validate_shape=validate_shape)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/variable_scope.py", line 437, in _get_single_variable
name, "".join(traceback.format_list(tb))))
ValueError: Variable backward/RNN/LSTMCell/W_0 already exists, disallowed. Did you mean to set reuse=True in VarScope? Originally defined at:
File "/home/alexis/Documents/NMT/NMT.py", line 88, in dense_to_vector_state
outputs, states = tf.nn.rnn(cell_forward, inputs, initial_state=initial_state)
How can I explicitly specify that I want to create a totally new LSTM cell ?
Thanks in advance !
Alexis
Use variable scopes
with tf.variable_scope('enc'):
cell_enc = LSTMCell(hidden_size)
with tf.variable_scope('dec'):
cell_dec = LSTMCell(hidden_size)
I am trying to do machine translation. Here is my encoder and decoder. You just need to use different variable scopes for each rnn. Rather than using the MultiRNNCell cell for the encoder I unroll each layer manually which lets me alternate directions between layers. See how each layer gets its own scope.
with tf.variable_scope('encoder'):
rnn_cell = tf.nn.rnn_cell.LSTMCell(512, num_proj = 256, state_is_tuple = True)
for level in range(3):
with tf.variable_scope('level_%d' % level) as scope:
state = [tf.zeros((BATCH_SIZE, sz)) for sz in rnn_cell.state_size]
for t in range(TIME_STEPS) if level % 2 else reversed(range(TIME_STEPS)):
y[t], state = rnn_cell(y[t], state)
scope.reuse_variables()
with tf.variable_scope('decoder') as scope:
rnn_cell = tf.nn.rnn_cell.MultiRNNCell \
([
tf.nn.rnn_cell.LSTMCell(512, num_proj = 256, state_is_tuple = True),
tf.nn.rnn_cell.LSTMCell(512, num_proj = WORD_VEC_SIZE, state_is_tuple = True)
], state_is_tuple = True)
state = [[tf.zeros((BATCH_SIZE, sz)) for sz in sz_outer] for sz_outer in rnn_cell.state_size]
W_soft = tf.get_variable('W_soft', shape = (NWORDS, WORD_VEC_SIZE), initializer = tf.truncated_normal_initializer(0.0, 1 / np.sqrt(WORD_VEC_SIZE)))
b_soft = tf.get_variable('b_soft', shape = (NWORDS,), initializer = tf.truncated_normal_initializer(0.0, 0.01))
cost = 0
output = [None] * TIME_STEPS
for t in range(TIME_STEPS):
if t:
last = y_[t - 1] if TRAINING else y[t - 1]
else:
last = tf.zeros((BATCH_SIZE, WORD_VEC_SIZE))
y[t] = tf.concat(1, (y[t], last))
y[t], state = rnn_cell(y[t], state)
cost += tf.reduce_mean(tf.nn.sampled_softmax_loss(W_soft, b_soft, y[t], target_output[:, t : t + 1], 1000, NWORDS))
output[t] = tf.reshape(tf.nn.softmax(tf.matmul(y[t], W_soft, transpose_b = True) + b_soft), (BATCH_SIZE, 1, NWORDS))
scope.reuse_variables()
output = tf.concat(1, output)
cost /= TIME_STEPS

Categories