Perceptual loss for image sequences in Keras - python

I want to implement perceptual loss for sequential image data of the shape [batch_size, sequence_length, height, width, channels]
The predictions of my model also have the same shape as the input.
My problem is, that I'm not able to properly feed in my predictions to the VGG16 in order to calculate the loss.
Firstly, I build my vgg as follows:
def build_vgg_model(self, weights="imagenet"):
# Input image to extract features from
img = Input(shape=(self.img_rows, self.img_cols, 3))
# Mean center and rescale by variance as in PyTorch
processed = Lambda(lambda x: (x - self.mean) / self.std)(img)
# If inference only, just return empty model
if self.inference_only:
model = Model(inputs=img, outputs=[img for _ in range(len(self.vgg_layers))])
model.trainable = False
model.compile(loss='mse', optimizer='adam')
return model
# Get the vgg network from Keras applications
if weights in ['imagenet', None]:
vgg = VGG16(weights=weights, include_top=False)
# model = Model(inputs=base_model.input, outputs=base_model.get_layer('block4_pool').output)
else:
vgg = VGG16(weights=None, include_top=False)
vgg.load_weights(weights, by_name=True)
# Output the first three pooling layers
vgg.outputs = [vgg.layers[i].output for i in self.vgg_layers]
# Create model and compile
model = Model(inputs=img, outputs=vgg(processed))
model.trainable = False
return model
Then I define my loss function:
def total_loss(self, mask):
def loss(y_true, y_pred):
# Compute predicted image with non-hole pixels set to ground truth
y_comp = mask * y_true + (1 - mask) * y_pred
# Compute the vgg features.
if self.vgg_device:
with tf.device(self.vgg_device):
vgg_out = self.vgg(y_pred)
vgg_gt = self.vgg(y_true)
vgg_comp = self.vgg(y_comp)
else:
vgg_out = self.vgg(y_pred)
vgg_gt = self.vgg(y_true)
vgg_comp = self.vgg(y_comp)
# Compute loss components
l1 = self.loss_valid(mask, y_true, y_pred)
l2 = self.loss_hole(mask, y_true, y_pred)
l3 = self.loss_perceptual(vgg_out, vgg_gt, vgg_comp)
# Return loss function
return l1 + 6 * l2 + 0.05 * l3
return loss
def loss_perceptual(self, vgg_out, vgg_gt, vgg_comp):
loss = 0
for o, c, g in zip(vgg_out, vgg_comp, vgg_gt):
loss += self.l1(o, g) + self.l1(c, g)
return loss
If I now run my code, I get the following error:
Traceback (most recent call last):
File "convlstm_main.py", line 51, in <module>
model = ConvLSTM(64, 64, 3, 5)
File "/mnt/workspace/lm78463/RecurrentDiFoRem/src/model/conv_lstm.py", line 40, in __init__
self.model = self.compile()
File "/mnt/workspace/lm78463/RecurrentDiFoRem/src/model/conv_lstm.py", line 116, in compile
self.model.compile(loss=self.loss_total, optimizer='adadelta')
File "/usr/local/lib/python3.6/dist-packages/keras/engine/training.py", line 229, in compile
self.total_loss = self._prepare_total_loss(masks)
File "/usr/local/lib/python3.6/dist-packages/keras/engine/training.py", line 692, in _prepare_total_loss
y_true, y_pred, sample_weight=sample_weight)
File "/usr/local/lib/python3.6/dist-packages/keras/losses.py", line 71, in __call__
losses = self.call(y_true, y_pred)
File "/usr/local/lib/python3.6/dist-packages/keras/losses.py", line 132, in call
return self.fn(y_true, y_pred, **self._fn_kwargs)
File "/mnt/workspace/lm78463/RecurrentDiFoRem/src/model/conv_lstm.py", line 180, in loss_total
vgg_gts = self.vgg(y_trues)
File "/usr/local/lib/python3.6/dist-packages/keras/engine/base_layer.py", line 489, in __call__
output = self.call(inputs, **kwargs)
File "/usr/local/lib/python3.6/dist-packages/keras/engine/network.py", line 583, in call
output_tensors, _, _ = self.run_internal_graph(inputs, masks)
File "/usr/local/lib/python3.6/dist-packages/keras/engine/network.py", line 740, in run_internal_graph
layer.call(computed_tensor, **kwargs))
File "/usr/local/lib/python3.6/dist-packages/keras/engine/network.py", line 583, in call
output_tensors, _, _ = self.run_internal_graph(inputs, masks)
File "/usr/local/lib/python3.6/dist-packages/keras/engine/network.py", line 740, in run_internal_graph
layer.call(computed_tensor, **kwargs))
File "/usr/local/lib/python3.6/dist-packages/keras/layers/convolutional.py", line 171, in call
dilation_rate=self.dilation_rate)
File "/usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py", line 3717, in conv2d
**kwargs)
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/nn_ops.py", line 917, in convolution
name=name)
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/nn_ops.py", line 979, in convolution_internal
strides = _get_sequence(strides, n, channel_index, "strides")
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/nn_ops.py", line 74, in _get_sequence
name, n, n + 2, current_n))
ValueError: strides should be of length 1, 3 or 5 but was 2

Related

Problem completing BERT model for sentiment classification

I am trying to figure out sentiment classification on movie reviews using BERT, transformers and tensorflow. This is the code I currently have:
def read_dataset(filename, model_name="bert-base-uncased"):
"""Reads a dataset from the specified path and returns sentences and labels"""
tokenizer = BertTokenizer.from_pretrained(model_name)
with open(filename, "r", encoding="utf-8") as f:
lines = f.readlines()
# preallocate memory for the data
sents, labels = list(), np.empty((len(lines), 1), dtype=int)
for i, line in enumerate(lines):
text, str_label, _ = line.split("\t")
labels[i] = int(str_label.split("=")[1] == "POS")
sents.append(text)
return dict(tokenizer(sents, padding=True, truncation=True, return_tensors="tf")), labels
class BertMLP(tf.keras.Model):
def __init__(self, embed_batch_size=100, model_name="bert-base-cased"):
super(BertMLP, self).__init__()
self.bs = embed_batch_size
self.model = TFBertModel.from_pretrained(model_name)
self.classification_head = tf.keras.models.Sequential(
layers = [
tf.keras.Input(shape=(self.model.config.hidden_size,)),
tf.keras.layers.Dense(350, activation="tanh"),
tf.keras.layers.Dense(200, activation="tanh"),
tf.keras.layers.Dense(50, activation="tanh"),
tf.keras.layers.Dense(1, activation="sigmoid", use_bias=False)
]
)
def call(self, inputs):
outputs = self.model(inputs)
return outputs
def evaluate(model, inputs, labels, loss_func):
mean_loss = tf.keras.metrics.Mean(name="train_loss")
accuracy = tf.keras.metrics.BinaryAccuracy(name="train_accuracy")
predictions = model(inputs)
mean_loss(loss_func(labels, predictions))
accuracy(labels, predictions)
return mean_loss.result(), accuracy.result() * 100
if __name__ == "__main__":
train = read_dataset("datasets/rt-polarity.train.vecs")
dev = read_dataset("datasets/rt-polarity.dev.vecs")
test = read_dataset("datasets/rt-polarity.test.vecs")
mlp = BertMLP()
mlp.compile(tf.keras.optimizers.SGD(learning_rate=0.01), loss='mse')
dev_loss, dev_acc = evaluate(mlp, *dev, tf.keras.losses.MeanSquaredError())
print("Before training:", f"Dev Loss: {dev_loss}, Dev Acc: {dev_acc}")
mlp.fit(*train, epochs=10, batch_size=10)
dev_loss, dev_acc = evaluate(mlp, *dev, tf.keras.losses.MeanSquaredError())
print("After training:", f"Dev Loss: {dev_loss}, Dev Acc: {dev_acc}")
However, when I run this code, I get an error:
Traceback (most recent call last):
File "C:\Users\home\anaconda3\lib\site-packages\spyder_kernels\py3compat.py", line 356, in compat_exec
exec(code, globals, locals)
File "c:\users\home\downloads\mlp.py", line 60, in <module>
dev_loss, dev_acc = evaluate(mlp, *dev, tf.keras.losses.MeanSquaredError())
File "c:\users\home\downloads\mlp.py", line 46, in evaluate
predictions = model(inputs)
File "C:\Users\home\anaconda3\lib\site-packages\keras\utils\traceback_utils.py", line 67, in error_handler
raise e.with_traceback(filtered_tb) from None
File "c:\users\home\downloads\mlp.py", line 39, in call
outputs = self.model(inputs)
File "C:\Users\home\anaconda3\lib\site-packages\transformers\modeling_tf_utils.py", line 409, in run_call_with_unpacked_inputs
return func(self, **unpacked_inputs)
File "C:\Users\home\anaconda3\lib\site-packages\transformers\models\bert\modeling_tf_bert.py", line 1108, in call
outputs = self.bert(
File "C:\Users\home\anaconda3\lib\site-packages\transformers\modeling_tf_utils.py", line 409, in run_call_with_unpacked_inputs
return func(self, **unpacked_inputs)
File "C:\Users\home\anaconda3\lib\site-packages\transformers\models\bert\modeling_tf_bert.py", line 781, in call
embedding_output = self.embeddings(
File "C:\Users\home\anaconda3\lib\site-packages\transformers\models\bert\modeling_tf_bert.py", line 203, in call
inputs_embeds = tf.gather(params=self.weight, indices=input_ids)
InvalidArgumentError: Exception encountered when calling layer "embeddings" (type TFBertEmbeddings).
indices[1174,8] = 29550 is not in [0, 28996) [Op:ResourceGather]
Call arguments received:
• input_ids=tf.Tensor(shape=(1599, 73), dtype=int32)
• position_ids=None
• token_type_ids=tf.Tensor(shape=(1599, 73), dtype=int32)
• inputs_embeds=None
• past_key_values_length=0
• training=False
I googled for a while, and I can't find anything conclusive. I am pretty sure it has something to do with this part:
def call(self, inputs):
outputs = self.model(inputs)
return outputs
But again, I have tried a lot of different things, including limiting dataset size and installing different versions of transformers and tensorflow, but to no avail. Please let me know what I'm doing wrong. Thank you!
OP was using bert-base-cased for their model, and bert-base-uncased for their tokenizer, causing issues during training when the vocab size of the model and the tokenized data differed.

new bug in a variational autoencoder (keras)

I used to use this code to train variational autoencoder (I found the code on a forum and adapted it to my needs) :
import pickle
from pylab import mpl,plt
#lecture des résultats
filename=r'XXX.pic'
data_file=open(filename,'rb')
X_sec = pickle.load(data_file)#[:,3000:]
data_file.close()
size=X_sec.shape[0]
prop=0.75
cut=int(size*prop)
X_train=X_sec[:cut]
X_test=X_sec[cut:]
std=X_train.std()
X_train /= std
X_test /= std
import keras
from keras import layers
from keras import backend as K
from keras.models import Model
import numpy as np
#encoding_dim = 12
sig_shape = (3600,)
batch_size = 128
latent_dim = 12
input_sig = keras.Input(shape=sig_shape)
x = layers.Dense(128, activation='relu')(input_sig)
x = layers.Dense(64, activation='relu')(x)
shape_before_flattening = K.int_shape(x)
x = layers.Dense(32, activation='relu')(x)
z_mean = layers.Dense(latent_dim)(x)
z_log_var = layers.Dense(latent_dim)(x)
encoder=Model(input_sig,[z_mean,z_log_var])
def sampling(args):
z_mean, z_log_var = args
epsilon = K.random_normal(shape=(K.shape(z_mean)[0], latent_dim),
mean=0., stddev=1.)
return z_mean + K.exp(z_log_var) * epsilon
z = layers.Lambda(sampling)([z_mean, z_log_var])
decoder_input = layers.Input(K.int_shape(z)[1:])
x = layers.Dense(np.prod(shape_before_flattening[1:]),activation='relu')(decoder_input)
x = layers.Reshape(shape_before_flattening[1:])(x)
x = layers.Dense(128, activation='relu')(x)
x = layers.Dense(3600, activation='linear')(x)
decoder = Model(decoder_input, x)
z_decoded = decoder(z)
class CustomVariationalLayer(keras.layers.Layer):
def vae_loss(self, x, z_decoded):
x = K.flatten(x)
z_decoded = K.flatten(z_decoded)
xent_loss = keras.metrics.mae(x, z_decoded)
kl_loss = -5e-4 * K.mean(
1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1)
return K.mean(xent_loss + kl_loss)
def call(self, inputs):
x = inputs[0]
z_decoded = inputs[1]
loss = self.vae_loss(x, z_decoded)
self.add_loss(loss, inputs=inputs)
return x
y = CustomVariationalLayer()([input_sig, z_decoded])
vae = Model(input_sig, y)
vae.compile(optimizer='rmsprop', loss=None)
vae.summary()
vae.fit(x=X_train, y=None,shuffle=True,epochs=100,batch_size=batch_size,validation_data=(X_test, None))
it used to work smoothly but I have updated my librairies and now I get this error :
File
"C:\Users\user\AppData\Local\conda\conda\envs\my_root\lib\site-packages\tensorflow_core\python\framework\ops.py",
line 1619, in _create_c_op
c_op = c_api.TF_FinishOperation(op_desc)
InvalidArgumentError: Duplicate node name in graph:
'lambda_1/random_normal/shape'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File
"I:\Documents\Nico\Python\finance\travail_amont\autoencoder_variationnel_bruit.py",
line 74, in
z = layers.Lambda(sampling)([z_mean, z_log_var])
File
"C:\Users\user\AppData\Local\conda\conda\envs\my_root\lib\site-packages\keras\backend\tensorflow_backend.py",
line 75, in symbolic_fn_wrapper
return func(*args, **kwargs)
File
"C:\Users\user\AppData\Local\conda\conda\envs\my_root\lib\site-packages\keras\engine\base_layer.py",
line 506, in call
output_shape = self.compute_output_shape(input_shape)
File
"C:\Users\user\AppData\Local\conda\conda\envs\my_root\lib\site-packages\keras\layers\core.py",
line 674, in compute_output_shape
x = self.call(xs)
File
"C:\Users\user\AppData\Local\conda\conda\envs\my_root\lib\site-packages\keras\layers\core.py",
line 716, in call
return self.function(inputs, **arguments)
File
"I:\Documents\Nico\Python\finance\travail_amont\autoencoder_variationnel_bruit.py",
line 71, in sampling
mean=0., stddev=1.)
File
"C:\Users\user\AppData\Local\conda\conda\envs\my_root\lib\site-packages\keras\backend\tensorflow_backend.py",
line 4329, in random_normal
shape, mean=mean, stddev=stddev, dtype=dtype, seed=seed)
File
"C:\Users\user\AppData\Local\conda\conda\envs\my_root\lib\site-packages\tensorflow_core\python\keras\backend.py",
line 5602, in random_normal
shape, mean=mean, stddev=stddev, dtype=dtype, seed=seed)
File
"C:\Users\user\AppData\Local\conda\conda\envs\my_root\lib\site-packages\tensorflow_core\python\ops\random_ops.py",
line 69, in random_normal
shape_tensor = tensor_util.shape_tensor(shape)
File
"C:\Users\user\AppData\Local\conda\conda\envs\my_root\lib\site-packages\tensorflow_core\python\framework\tensor_util.py",
line 994, in shape_tensor
return ops.convert_to_tensor(shape, dtype=dtype, name="shape")
File
"C:\Users\user\AppData\Local\conda\conda\envs\my_root\lib\site-packages\tensorflow_core\python\framework\ops.py",
line 1314, in convert_to_tensor
ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
File
"C:\Users\user\AppData\Local\conda\conda\envs\my_root\lib\site-packages\tensorflow_core\python\ops\array_ops.py",
line 1368, in _autopacking_conversion_function
return _autopacking_helper(v, dtype, name or "packed")
File
"C:\Users\user\AppData\Local\conda\conda\envs\my_root\lib\site-packages\tensorflow_core\python\ops\array_ops.py",
line 1304, in _autopacking_helper
return gen_array_ops.pack(elems_as_tensors, name=scope)
File
"C:\Users\user\AppData\Local\conda\conda\envs\my_root\lib\site-packages\tensorflow_core\python\ops\gen_array_ops.py",
line 5704, in pack
"Pack", values=values, axis=axis, name=name)
File
"C:\Users\user\AppData\Local\conda\conda\envs\my_root\lib\site-packages\tensorflow_core\python\framework\op_def_library.py",
line 742, in _apply_op_helper
attrs=attr_protos, op_def=op_def)
File
"C:\Users\user\AppData\Local\conda\conda\envs\my_root\lib\site-packages\tensorflow_core\python\framework\func_graph.py",
line 595, in _create_op_internal
compute_device)
File
"C:\Users\user\AppData\Local\conda\conda\envs\my_root\lib\site-packages\tensorflow_core\python\framework\ops.py",
line 3322, in _create_op_internal
op_def=op_def)
File
"C:\Users\user\AppData\Local\conda\conda\envs\my_root\lib\site-packages\tensorflow_core\python\framework\ops.py",
line 1786, in init
control_input_ops)
File
"C:\Users\user\AppData\Local\conda\conda\envs\my_root\lib\site-packages\tensorflow_core\python\framework\ops.py",
line 1622, in _create_c_op
raise ValueError(str(e))
ValueError: Duplicate node name in graph:
'lambda_1/random_normal/shape'
I do not know this error : "Duplicate node name in graph". Does anyone has a clue ? Thanks.
If you're using tf 2.x, then import your keras modules as follows.
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.kerasimport backend as K
from tensorflow.keras.models import Model
More related on this, #36509, #130

How to freeze a keras model with custom keras layers(.h5) to tensorflow graph(.pb)?

I am trying to implement a Faster-RCNN model for object detection written by Yinghan Xu. After I have trained and saved the model with model_all.save('filename.h5'), I am trying to freeze the Keras model as TensorFlow graph (as .pb) for inference using keras_to_tensorflow.py written by Amir Abdi. But when I try to convert it, I get a ValueError: Unknown layer: roipoolingconv due to a custom RoiPoolingConv layer:
class RoiPoolingConv(Layer):
'''ROI pooling layer for 2D inputs.
See Spatial Pyramid Pooling in Deep Convolutional Networks for Visual Recognition,
K. He, X. Zhang, S. Ren, J. Sun
# Arguments
pool_size: int
Size of pooling region to use. pool_size = 7 will result in a 7x7 region.
num_rois: number of regions of interest to be used
# Input shape
list of two 4D tensors [X_img,X_roi] with shape:
X_img:
`(1, rows, cols, channels)`
X_roi:
`(1,num_rois,4)` list of rois, with ordering (x,y,w,h)
# Output shape
3D tensor with shape:
`(1, num_rois, channels, pool_size, pool_size)`
'''
def __init__(self, pool_size, num_rois, **kwargs):
self.dim_ordering = K.image_dim_ordering()
self.pool_size = pool_size
self.num_rois = num_rois
super(RoiPoolingConv, self).__init__(**kwargs)
def build(self, input_shape):
self.nb_channels = input_shape[0][3]
def compute_output_shape(self, input_shape):
return None, self.num_rois, self.pool_size, self.pool_size, self.nb_channels
def call(self, x, mask=None):
assert(len(x) == 2)
# x[0] is image with shape (rows, cols, channels)
img = x[0]
# x[1] is roi with shape (num_rois,4) with ordering (x,y,w,h)
rois = x[1]
input_shape = K.shape(img)
outputs = []
for roi_idx in range(self.num_rois):
x = rois[0, roi_idx, 0]
y = rois[0, roi_idx, 1]
w = rois[0, roi_idx, 2]
h = rois[0, roi_idx, 3]
x = K.cast(x, 'int32')
y = K.cast(y, 'int32')
w = K.cast(w, 'int32')
h = K.cast(h, 'int32')
# Resized roi of the image to pooling size (7x7)
rs = tf.image.resize_images(img[:, y:y+h, x:x+w, :], (self.pool_size, self.pool_size))
outputs.append(rs)
final_output = K.concatenate(outputs, axis=0)
# Reshape to (1, num_rois, pool_size, pool_size, nb_channels)
# Might be (1, 4, 7, 7, 3)
final_output = K.reshape(final_output, (1, self.num_rois, self.pool_size, self.pool_size, self.nb_channels))
# permute_dimensions is similar to transpose
final_output = K.permute_dimensions(final_output, (0, 1, 2, 3, 4))
return final_output
def get_config(self):
config = {'pool_size': self.pool_size,
'num_rois': self.num_rois}
base_config = super(RoiPoolingConv, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
I have looked at most of the resources out there and almost all of them suggest to comment out this layer. But since this layer is important for object detection, I was wondering if a workaround is possible or not.
The complete traceback of error (note: I've saved filename as freezekeras.py, contents are same as keras_to_tensorflow.py):
Using TensorFlow backend.
Traceback (most recent call last):
File "freezekeras.py", line 181, in <module>
app.run(main)
File "/usr/local/lib/python3.5/dist-packages/absl/app.py", line 300, in run
_run_main(main, args)
File "/usr/local/lib/python3.5/dist-packages/absl/app.py", line 251, in _run_main
sys.exit(main(argv))
File "freezekeras.py", line 127, in main
model = load_model(FLAGS.input_model, FLAGS.input_model_json, FLAGS.input_model_yaml)
File "freezekeras.py", line 105, in load_model
raise wrong_file_err
File "freezekeras.py", line 62, in load_model
model = keras.models.load_model(input_model_path)
File "/usr/local/lib/python3.5/dist-packages/keras/engine/saving.py", line 419, in load_model
model = _deserialize_model(f, custom_objects, compile)
File "/usr/local/lib/python3.5/dist-packages/keras/engine/saving.py", line 225, in _deserialize_model
model = model_from_config(model_config, custom_objects=custom_objects)
File "/usr/local/lib/python3.5/dist-packages/keras/engine/saving.py", line 458, in model_from_config
return deserialize(config, custom_objects=custom_objects)
File "/usr/local/lib/python3.5/dist-packages/keras/layers/__init__.py", line 55, in deserialize
printable_module_name='layer')
File "/usr/local/lib/python3.5/dist-packages/keras/utils/generic_utils.py", line 145, in deserialize_keras_object
list(custom_objects.items())))
File "/usr/local/lib/python3.5/dist-packages/keras/engine/network.py", line 1022, in from_config
process_layer(layer_data)
File "/usr/local/lib/python3.5/dist-packages/keras/engine/network.py", line 1008, in process_layer
custom_objects=custom_objects)
File "/usr/local/lib/python3.5/dist-packages/keras/layers/__init__.py", line 55, in deserialize
printable_module_name='layer')
File "/usr/local/lib/python3.5/dist-packages/keras/utils/generic_utils.py", line 138, in deserialize_keras_object
': ' + class_name)
ValueError: Unknown layer: RoiPoolingConv
Try to specify the custom layer explicitly:
model = load_model('my_model.h5', custom_objects={'RoiPoolingConv': RoiPoolingConv})
Obviously, you have to re-write the keras_to_tensorflow.py script. See Handling custom layers (or other custom objects) in saved models section under Keras FAQ.
Solution
specify custom layer while loading model in keras_to_tensorflow.py
model = keras.models.load_model(input_model_path, custom_objects={'RoiPoolingConv':RoiPoolingConv})
import RoiPoolingConv.py to keras_to_tensorflow project
specify default pool_size, num_rois for RoiPoolingConv
def __init__(self, pool_size = 7, num_rois = 32, **kwargs):

AttributeError: 'LSTMStateTuple' object has no attribute 'get_shape' in tf.contrib.seq2seq.dynamic_decode(decoder)

I don't know why I am getting this error.
I saw a some posts to change state_is_tuple=False but it was giving me some other error. I think the error is in the way I defined lstm cell but not sure what should I change? I followed this link which has similar code structure.
Here is my code:
Required placeholders
n_hidden = args.rnn_size
n_layers = args.num_layers
max_sequence_length = args.max_sequence_length
encoderEmbeddingsize = args.encoderEmbeddingsize
decoderEmbeddingsize = args.decoderEmbeddingsize
queVocabsize = len(question_vocab_to_int)
ansVocabsize = len(answer_vocab_to_int)
batch_size = args.batch_size
# Input Embedding for Encoder ## CHECK THE VOCAB SIZE!!!
encoder_input = tf.contrib.layers.embed_sequence(input_data, queVocabsize, encoderEmbeddingsize,
initializer=tf.random_uniform_initializer(0, 1))
print('encoder_input', encoder_input)
# Layers for the model
lstm_cell = rnn.BasicLSTMCell(n_hidden) # lstm layer
dropout = rnn.DropoutWrapper(lstm_cell, input_keep_prob=keep_prob) # dropout layer
# Encoder Model
# Make two layer encoder
encoder_multirnn_cell = rnn.MultiRNNCell([dropout]*n_layers)
# Make it bidirectional
print(sequence_length)
encoder_output, encoder_state = tf.nn.dynamic_rnn(encoder_multirnn_cell,
inputs=encoder_input, dtype=tf.float32) # sequence_length=sequence_length,
print('encoder_output', encoder_output)
print('encoder_state', encoder_state)
# preprocessing encoder input
initial_tensor = tf.strided_slice(target, [0, 0], [batch_size, -1], [1, 1])
decoder_input = tf.concat([tf.fill([batch_size, 1], question_vocab_to_int['<GO>']), initial_tensor], 1)
print('decoder_input', decoder_input)
## Input Embedding for the Decoder
decoder_embedding = tf.Variable(tf.random_uniform([queVocabsize+1, decoderEmbeddingsize], 0, 1))
decoder_embedded_input = tf.nn.embedding_lookup(decoder_embedding, decoder_input)
print('check')
print(decoder_embedded_input)
print(decoder_embedding)
## Decoder Model
#with tf.variable_scope("decoding") as decoding_scope:
lstm_decoder_cell = rnn.BasicLSTMCell(n_hidden) # lstm layer
dropout_decoder = rnn.DropoutWrapper(lstm_decoder_cell, input_keep_prob=keep_prob) # droput layer
# decoder
# Make two layer encoder
decoder_multirnn_cell = rnn.MultiRNNCell([dropout_decoder] * n_layers)
# weights = tf.truncated_normal_initializer(stddev=0.1)
# biases = tf.zeros_initializer()
output_layer_function = layers_core.Dense(
ansVocabsize, use_bias=False) #lambda x: tf.contrib.layers.fully_connected(x, queVocabsize, scope=decoding_scope,
# weights_initializer=weights,
# biases_initializer=biases)
#print(decoder_multirnn_cell.output_size)
#decoding_scope.reuse_variables()
print('output_kayer_function', output_layer_function)
# training vs inference!
encoder_output = tf.transpose(encoder_output, [1, 0, 2])
attention_state = tf.zeros([batch_size, 1, decoder_multirnn_cell.output_size * 2])
attention_mechanism = tf.contrib.seq2seq.BahdanauAttention(
num_units=decoder_multirnn_cell.output_size, memory=encoder_output)
lstm_decoder_cell = tf.contrib.seq2seq.AttentionWrapper(lstm_decoder_cell,
attention_mechanism=attention_mechanism)
attn_zero = lstm_decoder_cell.zero_state(batch_size=batch_size, dtype=tf.float32)
init_state = attn_zero.clone(cell_state=encoder_state)
print(('sequence!!!!!!!!1', sequence_length))
helper = tf.contrib.seq2seq.TrainingHelper(decoder_embedded_input, sequence_length)
# decoder
decoder = tf.contrib.seq2seq.BasicDecoder(lstm_decoder_cell, helper, initial_state=init_state,
output_layer= output_layer_function)
print(decoder)
final_outputs, _final_state, _final_sequence_lengths = tf.contrib.seq2seq.dynamic_decode(decoder)
train_pred_drop = tf.nn.dropout(final_outputs, keep_prob)
logits = train_pred_drop.rnn_output
Now, I am getting the error in tf.contrib.seq2seq.dynamic_decode(decoder), as shown below:
Traceback (most recent call last):
File "test_model.py", line 272, in <module>
train_logits, infer_logits = load_model(args, tf.reverse(input_data, [-1]), target, learning_rate, sequence_length, question_vocab_to_int, answer_vocab_to_int, keep_prob ) ## load model here!
File "test_model.py", line 165, in load_model
final_outputs, _final_state, _final_sequence_lengths = tf.contrib.seq2seq.dynamic_decode(decoder)
File "/home/saurabh/tfnightly/lib/python3.5/site-packages/tensorflow/contrib/seq2seq/python/ops/decoder.py", line 286, in dynamic_decode
swap_memory=swap_memory)
File "/home/saurabh/tfnightly/lib/python3.5/site-packages/tensorflow/python/ops/control_flow_ops.py", line 2816, in while_loop
result = loop_context.BuildLoop(cond, body, loop_vars, shape_invariants)
File "/home/saurabh/tfnightly/lib/python3.5/site-packages/tensorflow/python/ops/control_flow_ops.py", line 2640, in BuildLoop
pred, body, original_loop_vars, loop_vars, shape_invariants)
File "/home/saurabh/tfnightly/lib/python3.5/site-packages/tensorflow/python/ops/control_flow_ops.py", line 2590, in _BuildLoop
body_result = body(*packed_vars_for_body)
File "/home/saurabh/tfnightly/lib/python3.5/site-packages/tensorflow/contrib/seq2seq/python/ops/decoder.py", line 234, in body
decoder_finished) = decoder.step(time, inputs, state)
File "/home/saurabh/tfnightly/lib/python3.5/site-packages/tensorflow/contrib/seq2seq/python/ops/basic_decoder.py", line 138, in step
cell_outputs, cell_state = self._cell(inputs, state)
File "/home/saurabh/tfnightly/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 183, in __call__
return super(RNNCell, self).__call__(inputs, state)
File "/home/saurabh/tfnightly/lib/python3.5/site-packages/tensorflow/python/layers/base.py", line 575, in __call__
outputs = self.call(inputs, *args, **kwargs)
File "/home/saurabh/tfnightly/lib/python3.5/site-packages/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py", line 1295, in call
cell_output, next_cell_state = self._cell(cell_inputs, cell_state)
File "/home/saurabh/tfnightly/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 183, in __call__
return super(RNNCell, self).__call__(inputs, state)
File "/home/saurabh/tfnightly/lib/python3.5/site-packages/tensorflow/python/layers/base.py", line 575, in __call__
outputs = self.call(inputs, *args, **kwargs)
File "/home/saurabh/tfnightly/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 438, in call
self._linear = _Linear([inputs, h], 4 * self._num_units, True)
File "/home/saurabh/tfnightly/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 1154, in __init__
shapes = [a.get_shape() for a in args]
File "/home/saurabh/tfnightly/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 1154, in <listcomp>
shapes = [a.get_shape() for a in args]
AttributeError: 'LSTMStateTuple' object has no attribute 'get_shape'

Tensorflow - Incompatible Shape

Classify MNIST Digits with Tensorflow by a 2-layer RNN approach. Training works fine, but when evaluating accuracy, incompatible shape of test data is reported.
import tensorflow as tf
import inspect
import numpy as np
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot = True)
hm_epochs = 1
n_classes = 10
batch_size = 128
chunk_size = 28
n_chunks = 28
rnn_size = 128
x = tf.placeholder('float', [None, n_chunks,chunk_size])
y = tf.placeholder('float')
def lstm_cell():
if 'reuse' in inspect.getargspec(
tf.contrib.rnn.BasicLSTMCell.__init__).args:
return tf.contrib.rnn.BasicLSTMCell(
rnn_size, forget_bias=0.0, state_is_tuple=True,
reuse=tf.get_variable_scope().reuse)
else:
return tf.contrib.rnn.BasicLSTMCell(
rnn_size, forget_bias=0.0, state_is_tuple=True)
def attn_cell():
return tf.contrib.rnn.DropoutWrapper(
lstm_cell())
def recurrent_neural_network(x):
layer = {'weights':tf.Variable(tf.random_normal([rnn_size,n_classes])),
'biases':tf.Variable(tf.random_normal([n_classes]))}
x = tf.transpose(x, [1,0,2])
x = tf.reshape(x, [-1, chunk_size])
x = tf.split(x, n_chunks, 0)
stacked_lstm = tf.contrib.rnn.MultiRNNCell([attn_cell(),attn_cell()], state_is_tuple=True)
initial_state = state = stacked_lstm.zero_state(batch_size, tf.float32)
outputs, states = tf.contrib.rnn.static_rnn(stacked_lstm, x,state)
output = tf.matmul(outputs[-1],layer['weights']) + layer['biases']
return output
def train_neural_network(x):
prediction = recurrent_neural_network(x)
cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(labels=y,logits=prediction) )
optimizer = tf.train.AdamOptimizer().minimize(cost)
with tf.Session() as sess:
sess.run(tf.initialize_all_variables())
for epoch in range(hm_epochs):
epoch_loss = 0
for _ in range(int(mnist.train.num_examples/batch_size)):
epoch_x, epoch_y = mnist.train.next_batch(batch_size)
epoch_x = epoch_x.reshape((batch_size,n_chunks,chunk_size))
_, c = sess.run([optimizer, cost], feed_dict={x: epoch_x, y: epoch_y})
epoch_loss += c
print('Epoch', epoch, 'completed out of',hm_epochs,'loss:',epoch_loss)
correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
testdata= np.reshape( mnist.test.images, (10000, n_chunks, chunk_size))
print("Testdata ",testdata.shape)
print("x ",x)
print('Accuracy:',accuracy.eval({x:testdata, y:mnist.test.labels}))
train_neural_network(x)
However, the shapes of test data and placeholders are printed as follows. Aren't they compatible?
Epoch 0 completed out of 1 loss: 228.159379691
Testdata (10000, 28, 28)
x Tensor("Placeholder:0", shape=(?, 28, 28), dtype=float32)
Error:
Caused by op 'rnn/rnn/multi_rnn_cell/cell_0/cell_0/basic_lstm_cell/basic_lstm_ce
ll/concat', defined at:
File "main.py", line 90, in <module>
train_neural_network(x)
File "main.py", line 59, in train_neural_network
prediction = recurrent_neural_network(x)
File "main.py", line 52, in recurrent_neural_network
outputs, states = tf.contrib.rnn.static_rnn(stacked_lstm, x,state)
File "C:\Users\henry\Anaconda3\lib\site-packages\tensorflow\python\ops\rnn.py"
, line 1212, in static_rnn
(output, state) = call_cell()
File "C:\Users\henry\Anaconda3\lib\site-packages\tensorflow\python\ops\rnn.py"
, line 1199, in <lambda>
call_cell = lambda: cell(input_, state)
File "C:\Users\henry\Anaconda3\lib\site-packages\tensorflow\python\ops\rnn_cel
l_impl.py", line 180, in __call__
return super(RNNCell, self).__call__(inputs, state)
File "C:\Users\henry\Anaconda3\lib\site-packages\tensorflow\python\layers\base
.py", line 441, in __call__
outputs = self.call(inputs, *args, **kwargs)
File "C:\Users\henry\Anaconda3\lib\site-packages\tensorflow\python\ops\rnn_cel
l_impl.py", line 916, in call
cur_inp, new_state = cell(cur_inp, cur_state)
File "C:\Users\henry\Anaconda3\lib\site-packages\tensorflow\python\ops\rnn_cel
l_impl.py", line 752, in __call__
output, new_state = self._cell(inputs, state, scope)
File "C:\Users\henry\Anaconda3\lib\site-packages\tensorflow\python\ops\rnn_cel
l_impl.py", line 180, in __call__
return super(RNNCell, self).__call__(inputs, state)
File "C:\Users\henry\Anaconda3\lib\site-packages\tensorflow\python\layers\base
.py", line 441, in __call__
outputs = self.call(inputs, *args, **kwargs)
File "C:\Users\henry\Anaconda3\lib\site-packages\tensorflow\python\ops\rnn_cel
l_impl.py", line 383, in call
concat = _linear([inputs, h], 4 * self._num_units, True)
File "C:\Users\henry\Anaconda3\lib\site-packages\tensorflow\python\ops\rnn_cel
l_impl.py", line 1021, in _linear
res = math_ops.matmul(array_ops.concat(args, 1), weights)
File "C:\Users\henry\Anaconda3\lib\site-packages\tensorflow\python\ops\array_o
ps.py", line 1048, in concat
name=name)
File "C:\Users\henry\Anaconda3\lib\site-packages\tensorflow\python\ops\gen_arr
ay_ops.py", line 495, in _concat_v2
name=name)
File "C:\Users\henry\Anaconda3\lib\site-packages\tensorflow\python\framework\o
p_def_library.py", line 767, in apply_op
op_def=op_def)
File "C:\Users\henry\Anaconda3\lib\site-packages\tensorflow\python\framework\o
ps.py", line 2506, in create_op
original_op=self._default_original_op, op_def=op_def)
File "C:\Users\henry\Anaconda3\lib\site-packages\tensorflow\python\framework\o
ps.py", line 1269, in __init__
self._traceback = _extract_stack()
InvalidArgumentError (see above for traceback): ConcatOp : Dimensions of inputs
should match: shape[0] = [10000,28] vs. shape[1] = [128,128]
[[Node: rnn/rnn/multi_rnn_cell/cell_0/cell_0/basic_lstm_cell/basic_lstm
_cell/concat = ConcatV2[N=2, T=DT_FLOAT, Tidx=DT_INT32, _device="/job:localhost/
replica:0/task:0/cpu:0"](split, MultiRNNCellZeroState/DropoutWrapperZeroState/Ba
sicLSTMCellZeroState/zeros_1, rnn/rnn/multi_rnn_cell/cell_0/cell_0/basic_lstm_ce
ll/basic_lstm_cell/concat/axis)]]
When I print the shape of training data it is (128,28,28). I am confused that why the test data leads to the error because both training data and test data are formatted in the same way, that is (?,n_chunks,chunk_size). Thanks in advance.
The issue is that you always create the initial state with shape set to the training batch size instead of the eval batch size.
This is the culprit line:
initial_state = state = stacked_lstm.zero_state(batch_size, tf.float32)

Categories