I used to use this code to train variational autoencoder (I found the code on a forum and adapted it to my needs) :
import pickle
from pylab import mpl,plt
#lecture des résultats
filename=r'XXX.pic'
data_file=open(filename,'rb')
X_sec = pickle.load(data_file)#[:,3000:]
data_file.close()
size=X_sec.shape[0]
prop=0.75
cut=int(size*prop)
X_train=X_sec[:cut]
X_test=X_sec[cut:]
std=X_train.std()
X_train /= std
X_test /= std
import keras
from keras import layers
from keras import backend as K
from keras.models import Model
import numpy as np
#encoding_dim = 12
sig_shape = (3600,)
batch_size = 128
latent_dim = 12
input_sig = keras.Input(shape=sig_shape)
x = layers.Dense(128, activation='relu')(input_sig)
x = layers.Dense(64, activation='relu')(x)
shape_before_flattening = K.int_shape(x)
x = layers.Dense(32, activation='relu')(x)
z_mean = layers.Dense(latent_dim)(x)
z_log_var = layers.Dense(latent_dim)(x)
encoder=Model(input_sig,[z_mean,z_log_var])
def sampling(args):
z_mean, z_log_var = args
epsilon = K.random_normal(shape=(K.shape(z_mean)[0], latent_dim),
mean=0., stddev=1.)
return z_mean + K.exp(z_log_var) * epsilon
z = layers.Lambda(sampling)([z_mean, z_log_var])
decoder_input = layers.Input(K.int_shape(z)[1:])
x = layers.Dense(np.prod(shape_before_flattening[1:]),activation='relu')(decoder_input)
x = layers.Reshape(shape_before_flattening[1:])(x)
x = layers.Dense(128, activation='relu')(x)
x = layers.Dense(3600, activation='linear')(x)
decoder = Model(decoder_input, x)
z_decoded = decoder(z)
class CustomVariationalLayer(keras.layers.Layer):
def vae_loss(self, x, z_decoded):
x = K.flatten(x)
z_decoded = K.flatten(z_decoded)
xent_loss = keras.metrics.mae(x, z_decoded)
kl_loss = -5e-4 * K.mean(
1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1)
return K.mean(xent_loss + kl_loss)
def call(self, inputs):
x = inputs[0]
z_decoded = inputs[1]
loss = self.vae_loss(x, z_decoded)
self.add_loss(loss, inputs=inputs)
return x
y = CustomVariationalLayer()([input_sig, z_decoded])
vae = Model(input_sig, y)
vae.compile(optimizer='rmsprop', loss=None)
vae.summary()
vae.fit(x=X_train, y=None,shuffle=True,epochs=100,batch_size=batch_size,validation_data=(X_test, None))
it used to work smoothly but I have updated my librairies and now I get this error :
File
"C:\Users\user\AppData\Local\conda\conda\envs\my_root\lib\site-packages\tensorflow_core\python\framework\ops.py",
line 1619, in _create_c_op
c_op = c_api.TF_FinishOperation(op_desc)
InvalidArgumentError: Duplicate node name in graph:
'lambda_1/random_normal/shape'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File
"I:\Documents\Nico\Python\finance\travail_amont\autoencoder_variationnel_bruit.py",
line 74, in
z = layers.Lambda(sampling)([z_mean, z_log_var])
File
"C:\Users\user\AppData\Local\conda\conda\envs\my_root\lib\site-packages\keras\backend\tensorflow_backend.py",
line 75, in symbolic_fn_wrapper
return func(*args, **kwargs)
File
"C:\Users\user\AppData\Local\conda\conda\envs\my_root\lib\site-packages\keras\engine\base_layer.py",
line 506, in call
output_shape = self.compute_output_shape(input_shape)
File
"C:\Users\user\AppData\Local\conda\conda\envs\my_root\lib\site-packages\keras\layers\core.py",
line 674, in compute_output_shape
x = self.call(xs)
File
"C:\Users\user\AppData\Local\conda\conda\envs\my_root\lib\site-packages\keras\layers\core.py",
line 716, in call
return self.function(inputs, **arguments)
File
"I:\Documents\Nico\Python\finance\travail_amont\autoencoder_variationnel_bruit.py",
line 71, in sampling
mean=0., stddev=1.)
File
"C:\Users\user\AppData\Local\conda\conda\envs\my_root\lib\site-packages\keras\backend\tensorflow_backend.py",
line 4329, in random_normal
shape, mean=mean, stddev=stddev, dtype=dtype, seed=seed)
File
"C:\Users\user\AppData\Local\conda\conda\envs\my_root\lib\site-packages\tensorflow_core\python\keras\backend.py",
line 5602, in random_normal
shape, mean=mean, stddev=stddev, dtype=dtype, seed=seed)
File
"C:\Users\user\AppData\Local\conda\conda\envs\my_root\lib\site-packages\tensorflow_core\python\ops\random_ops.py",
line 69, in random_normal
shape_tensor = tensor_util.shape_tensor(shape)
File
"C:\Users\user\AppData\Local\conda\conda\envs\my_root\lib\site-packages\tensorflow_core\python\framework\tensor_util.py",
line 994, in shape_tensor
return ops.convert_to_tensor(shape, dtype=dtype, name="shape")
File
"C:\Users\user\AppData\Local\conda\conda\envs\my_root\lib\site-packages\tensorflow_core\python\framework\ops.py",
line 1314, in convert_to_tensor
ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
File
"C:\Users\user\AppData\Local\conda\conda\envs\my_root\lib\site-packages\tensorflow_core\python\ops\array_ops.py",
line 1368, in _autopacking_conversion_function
return _autopacking_helper(v, dtype, name or "packed")
File
"C:\Users\user\AppData\Local\conda\conda\envs\my_root\lib\site-packages\tensorflow_core\python\ops\array_ops.py",
line 1304, in _autopacking_helper
return gen_array_ops.pack(elems_as_tensors, name=scope)
File
"C:\Users\user\AppData\Local\conda\conda\envs\my_root\lib\site-packages\tensorflow_core\python\ops\gen_array_ops.py",
line 5704, in pack
"Pack", values=values, axis=axis, name=name)
File
"C:\Users\user\AppData\Local\conda\conda\envs\my_root\lib\site-packages\tensorflow_core\python\framework\op_def_library.py",
line 742, in _apply_op_helper
attrs=attr_protos, op_def=op_def)
File
"C:\Users\user\AppData\Local\conda\conda\envs\my_root\lib\site-packages\tensorflow_core\python\framework\func_graph.py",
line 595, in _create_op_internal
compute_device)
File
"C:\Users\user\AppData\Local\conda\conda\envs\my_root\lib\site-packages\tensorflow_core\python\framework\ops.py",
line 3322, in _create_op_internal
op_def=op_def)
File
"C:\Users\user\AppData\Local\conda\conda\envs\my_root\lib\site-packages\tensorflow_core\python\framework\ops.py",
line 1786, in init
control_input_ops)
File
"C:\Users\user\AppData\Local\conda\conda\envs\my_root\lib\site-packages\tensorflow_core\python\framework\ops.py",
line 1622, in _create_c_op
raise ValueError(str(e))
ValueError: Duplicate node name in graph:
'lambda_1/random_normal/shape'
I do not know this error : "Duplicate node name in graph". Does anyone has a clue ? Thanks.
If you're using tf 2.x, then import your keras modules as follows.
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.kerasimport backend as K
from tensorflow.keras.models import Model
More related on this, #36509, #130
Related
I was trying to use keras to build a customized attention block after LSTM and got an error. Without the attention block the code is ok to run. The input code is as below, I omitted some irrelevant part.
import tensorflow as tf
import pandas as pd
import os
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.layers import Layer
import numpy as np
from sklearn.model_selection import train_test_split
from nltk.tokenize import word_tokenize
from tensorflow.keras.preprocessing.text import one_hot
from tensorflow.keras.preprocessing.sequence import pad_sequences
import pickle
import random
import time
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.keras import backend as K
class attention(Layer):
def __init__(self, **kwargs):
super(attention, self).__init__(**kwargs)
def build(self, input_shape):
self.W = self.add_weight(shape=(input_shape[-1], 1),
initializer='random_normal', trainable=True)
self.b = self.add_weight(shape=(input_shape[1], 1),
initializer='zeros', trainable=True)
super(attention, self).build(input_shape)
def call(self, x):
# Alignment scores. Pass them through tanh function
e = K.tanh(K.dot(x, self.W) + self.b)
# Remove dimension of size 1
e = K.squeeze(e, axis=-1)
# Compute the weights
alpha = K.softmax(e)
# Reshape to tensorFlow format
alpha = K.expand_dims(alpha, axis=-1)
# Compute the context vector
context = x * alpha
context = K.sum(context, axis=1)
return context
Input_rnn = keras.Input(shape=(None, 1))
LSTM_1 = layers.LSTM(32, activation='relu', return_sequences=True)(Input_rnn)
Dropout_1 = layers.Dropout(0.2)(LSTM_1)
LSTM_2 = layers.LSTM(32, activation='relu', return_sequences=True)(Dropout_1)
Dropout_2 = layers.Dropout(0.2)(LSTM_2)
LSTM_3 = layers.LSTM(32, activation='relu', return_sequences=True)(Dropout_2)
Dropout_3 = layers.Dropout(0.2)(LSTM_3)
attention_layer = attention()(Dropout_3)
Dense_1 = layers.Dense(64, activation='relu')(attention_layer)
Dense_2 = layers.Dense(16, activation='relu')(Dense_1)
Dense_3 = layers.Dense(8, activation='relu')(Dense_2)
Dense_4 = layers.Dense(1, activation='sigmoid')(Dense_3)
The error is:
2021-11-13 21:06:12.520715: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda-11.0/lib64:/usr/local/cuda-11.0/extras/CUPTI/lib64:/usr/local/cudnn8.0-11.0/lib64:
2021-11-13 21:06:12.520735: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2021-11-13 21:06:18.627597: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
2021-11-13 21:06:18.627719: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda-11.0/lib64:/usr/local/cuda-11.0/extras/CUPTI/lib64:/usr/local/cudnn8.0-11.0/lib64:
2021-11-13 21:06:18.627731: W tensorflow/stream_executor/cuda/cuda_driver.cc:326] failed call to cuInit: UNKNOWN ERROR (303)
2021-11-13 21:06:18.627746: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (janus0.ihpc.uts.edu.au): /proc/driver/nvidia/version does not exist
2021-11-13 21:06:18.629462: I tensorflow/compiler/jit/xla_gpu_device.cc:99] Not creating XLA devices, tf_xla_enable_xla_devices not set
Traceback (most recent call last):
File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/ops/array_ops.py", line 2874, in zeros
tensor_shape.TensorShape(shape))
File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py", line 356, in _tensor_shape_tensor_conversion_function
"Cannot convert a partially known TensorShape to a Tensor: %s" % s)
ValueError: Cannot convert a partially known TensorShape to a Tensor: (None, 1)
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "code/keras_fun.py", line 127, in <module>
attention_layer = attention()(Dropout_3)
File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/keras/engine/base_layer.py", line 952, in __call__
input_list)
File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/keras/engine/base_layer.py", line 1091, in _functional_construction_call
inputs, input_masks, args, kwargs)
File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/keras/engine/base_layer.py", line 822, in _keras_tensor_symbolic_call
return self._infer_output_signature(inputs, args, kwargs, input_masks)
File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/keras/engine/base_layer.py", line 862, in _infer_output_signature
self._maybe_build(inputs)
File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/keras/engine/base_layer.py", line 2710, in _maybe_build
self.build(input_shapes) # pylint:disable=not-callable
File "code/keras_fun.py", line 34, in build
initializer='zeros', trainable=True)
File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/keras/engine/base_layer.py", line 639, in add_weight
caching_device=caching_device)
File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/training/tracking/base.py", line 810, in _add_variable_with_custom_getter
**kwargs_for_getter)
File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/keras/engine/base_layer_utils.py", line 142, in make_variable
shape=variable_shape if variable_shape else None)
File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/ops/variables.py", line 260, in __call__
return cls._variable_v1_call(*args, **kwargs)
File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/ops/variables.py", line 221, in _variable_v1_call
shape=shape)
File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/ops/variables.py", line 199, in <lambda>
previous_getter = lambda **kwargs: default_variable_creator(None, **kwargs)
File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/ops/variable_scope.py", line 2618, in default_variable_creator
shape=shape)
File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/ops/variables.py", line 264, in __call__
return super(VariableMetaclass, cls).__call__(*args, **kwargs)
File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/ops/resource_variable_ops.py", line 1585, in __init__
distribute_strategy=distribute_strategy)
File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/ops/resource_variable_ops.py", line 1712, in _init_from_args
initial_value = initial_value()
File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/keras/initializers/initializers_v2.py", line 139, in __call__
return super(Zeros, self).__call__(shape, dtype=_get_dtype(dtype), **kwargs)
File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/ops/init_ops_v2.py", line 154, in __call__
return array_ops.zeros(shape, dtype)
File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/util/dispatch.py", line 201, in wrapper
return target(*args, **kwargs)
File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/ops/array_ops.py", line 2819, in wrapped
tensor = fun(*args, **kwargs)
File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/ops/array_ops.py", line 2877, in zeros
shape = ops.convert_to_tensor(shape, dtype=dtypes.int32)
File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/profiler/trace.py", line 163, in wrapped
return func(*args, **kwargs)
File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/framework/ops.py", line 1540, in convert_to_tensor
ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py", line 339, in _constant_tensor_conversion_function
return constant(v, dtype=dtype, name=name)
File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py", line 265, in constant
allow_broadcast=True)
File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py", line 276, in _constant_impl
return _constant_eager_impl(ctx, value, dtype, shape, verify_shape)
File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py", line 301, in _constant_eager_impl
t = convert_to_eager_tensor(value, ctx, dtype)
File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py", line 98, in convert_to_eager_tensor
return ops.EagerTensor(value, ctx.device_name, dtype)
ValueError: Attempt to convert a value (None) with an unsupported type (<class 'NoneType'>) to a Tensor.
I can't find where is going wrong. Not sure if it is something to do with the input shape difference from the attention layer to the dense layer, or from the dropout layer to attention layer.
Which Tensorflow version are you using? I can see there was some tf.keras and tf.keras.layers discrepency. I was able to run the above code with few changes to avoid the error using Tensorflow==2.3.0.
Please find below modified code:
import tensorflow as tf
import pandas as pd
import os
from tensorflow import keras
#from tensorflow.keras import layers
from tensorflow.keras.layers import Layer
import numpy as np
from sklearn.model_selection import train_test_split
from nltk.tokenize import word_tokenize
from tensorflow.keras.preprocessing.text import one_hot
from tensorflow.keras.preprocessing.sequence import pad_sequences
import pickle
import random
import time
from tensorflow.keras.callbacks import TensorBoard
#from tensorflow.keras import backend as K
class attention(tf.keras.layers.Layer):
def __init__(self, **kwargs):
super(attention, self).__init__(**kwargs)
def build(self, input_shape):
w_init = tf.random_normal_initializer()
self.W = tf.Variable(initial_value=w_init(shape=(input_shape[-1], 1), dtype="float32"),trainable=True,)
b_init = tf.zeros_initializer()
self.b = tf.Variable(initial_value=b_init(shape=(1,), dtype="float32"), trainable=True)
super(attention, self).build(input_shape)
#self.W = self.add_weight(shape=(input_shape[-1], 1),tf.random_normal_initializer('random_normal', trainable=True)
#self.b = self.add_weight(shape=(input_shape[1], 1),tf.zeros_initializer('zeros', trainable=True)
def call(self, x):
# Alignment scores. Pass them through tanh function
e = tf.tanh(tf.matmul(x, self.W) + self.b)
# Remove dimension of size 1
e = tf.squeeze(e, axis=-1)
# Compute the weights
alpha = tf.keras.activations.softmax(e)
# Reshape to tensorFlow format
alpha = tf.expand_dims(alpha, axis=-1)
# Compute the context vector
context = x * alpha
context = tf.math.reduce_sum(context, axis=1)
return context
Input_rnn = tf.keras.Input(shape=(None, 1))
LSTM_1 = tf.keras.layers.LSTM(32, activation='relu', return_sequences=True)(Input_rnn)
Dropout_1 = tf.keras.layers.Dropout(0.2)(LSTM_1)
LSTM_2 = tf.keras.layers.LSTM(32, activation='relu', return_sequences=True)(Dropout_1)
Dropout_2 = tf.keras.layers.Dropout(0.2)(LSTM_2)
LSTM_3 = tf.keras.layers.LSTM(32, activation='relu', return_sequences=True)(Dropout_2)
Dropout_3 = tf.keras.layers.Dropout(0.2)(LSTM_3)
attention_layer = attention()(Dropout_3)
Dense_1 = tf.keras.layers.Dense(64, activation='relu')(attention_layer)
Dense_2 = tf.keras.layers.Dense(16, activation='relu')(Dense_1)
Dense_3 = tf.keras.layers.Dense(8, activation='relu')(Dense_2)
Dense_4 = tf.keras.layers.Dense(1, activation='sigmoid')(Dense_3)
I want to implement perceptual loss for sequential image data of the shape [batch_size, sequence_length, height, width, channels]
The predictions of my model also have the same shape as the input.
My problem is, that I'm not able to properly feed in my predictions to the VGG16 in order to calculate the loss.
Firstly, I build my vgg as follows:
def build_vgg_model(self, weights="imagenet"):
# Input image to extract features from
img = Input(shape=(self.img_rows, self.img_cols, 3))
# Mean center and rescale by variance as in PyTorch
processed = Lambda(lambda x: (x - self.mean) / self.std)(img)
# If inference only, just return empty model
if self.inference_only:
model = Model(inputs=img, outputs=[img for _ in range(len(self.vgg_layers))])
model.trainable = False
model.compile(loss='mse', optimizer='adam')
return model
# Get the vgg network from Keras applications
if weights in ['imagenet', None]:
vgg = VGG16(weights=weights, include_top=False)
# model = Model(inputs=base_model.input, outputs=base_model.get_layer('block4_pool').output)
else:
vgg = VGG16(weights=None, include_top=False)
vgg.load_weights(weights, by_name=True)
# Output the first three pooling layers
vgg.outputs = [vgg.layers[i].output for i in self.vgg_layers]
# Create model and compile
model = Model(inputs=img, outputs=vgg(processed))
model.trainable = False
return model
Then I define my loss function:
def total_loss(self, mask):
def loss(y_true, y_pred):
# Compute predicted image with non-hole pixels set to ground truth
y_comp = mask * y_true + (1 - mask) * y_pred
# Compute the vgg features.
if self.vgg_device:
with tf.device(self.vgg_device):
vgg_out = self.vgg(y_pred)
vgg_gt = self.vgg(y_true)
vgg_comp = self.vgg(y_comp)
else:
vgg_out = self.vgg(y_pred)
vgg_gt = self.vgg(y_true)
vgg_comp = self.vgg(y_comp)
# Compute loss components
l1 = self.loss_valid(mask, y_true, y_pred)
l2 = self.loss_hole(mask, y_true, y_pred)
l3 = self.loss_perceptual(vgg_out, vgg_gt, vgg_comp)
# Return loss function
return l1 + 6 * l2 + 0.05 * l3
return loss
def loss_perceptual(self, vgg_out, vgg_gt, vgg_comp):
loss = 0
for o, c, g in zip(vgg_out, vgg_comp, vgg_gt):
loss += self.l1(o, g) + self.l1(c, g)
return loss
If I now run my code, I get the following error:
Traceback (most recent call last):
File "convlstm_main.py", line 51, in <module>
model = ConvLSTM(64, 64, 3, 5)
File "/mnt/workspace/lm78463/RecurrentDiFoRem/src/model/conv_lstm.py", line 40, in __init__
self.model = self.compile()
File "/mnt/workspace/lm78463/RecurrentDiFoRem/src/model/conv_lstm.py", line 116, in compile
self.model.compile(loss=self.loss_total, optimizer='adadelta')
File "/usr/local/lib/python3.6/dist-packages/keras/engine/training.py", line 229, in compile
self.total_loss = self._prepare_total_loss(masks)
File "/usr/local/lib/python3.6/dist-packages/keras/engine/training.py", line 692, in _prepare_total_loss
y_true, y_pred, sample_weight=sample_weight)
File "/usr/local/lib/python3.6/dist-packages/keras/losses.py", line 71, in __call__
losses = self.call(y_true, y_pred)
File "/usr/local/lib/python3.6/dist-packages/keras/losses.py", line 132, in call
return self.fn(y_true, y_pred, **self._fn_kwargs)
File "/mnt/workspace/lm78463/RecurrentDiFoRem/src/model/conv_lstm.py", line 180, in loss_total
vgg_gts = self.vgg(y_trues)
File "/usr/local/lib/python3.6/dist-packages/keras/engine/base_layer.py", line 489, in __call__
output = self.call(inputs, **kwargs)
File "/usr/local/lib/python3.6/dist-packages/keras/engine/network.py", line 583, in call
output_tensors, _, _ = self.run_internal_graph(inputs, masks)
File "/usr/local/lib/python3.6/dist-packages/keras/engine/network.py", line 740, in run_internal_graph
layer.call(computed_tensor, **kwargs))
File "/usr/local/lib/python3.6/dist-packages/keras/engine/network.py", line 583, in call
output_tensors, _, _ = self.run_internal_graph(inputs, masks)
File "/usr/local/lib/python3.6/dist-packages/keras/engine/network.py", line 740, in run_internal_graph
layer.call(computed_tensor, **kwargs))
File "/usr/local/lib/python3.6/dist-packages/keras/layers/convolutional.py", line 171, in call
dilation_rate=self.dilation_rate)
File "/usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py", line 3717, in conv2d
**kwargs)
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/nn_ops.py", line 917, in convolution
name=name)
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/nn_ops.py", line 979, in convolution_internal
strides = _get_sequence(strides, n, channel_index, "strides")
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/nn_ops.py", line 74, in _get_sequence
name, n, n + 2, current_n))
ValueError: strides should be of length 1, 3 or 5 but was 2
I don't know why I am getting this error.
I saw a some posts to change state_is_tuple=False but it was giving me some other error. I think the error is in the way I defined lstm cell but not sure what should I change? I followed this link which has similar code structure.
Here is my code:
Required placeholders
n_hidden = args.rnn_size
n_layers = args.num_layers
max_sequence_length = args.max_sequence_length
encoderEmbeddingsize = args.encoderEmbeddingsize
decoderEmbeddingsize = args.decoderEmbeddingsize
queVocabsize = len(question_vocab_to_int)
ansVocabsize = len(answer_vocab_to_int)
batch_size = args.batch_size
# Input Embedding for Encoder ## CHECK THE VOCAB SIZE!!!
encoder_input = tf.contrib.layers.embed_sequence(input_data, queVocabsize, encoderEmbeddingsize,
initializer=tf.random_uniform_initializer(0, 1))
print('encoder_input', encoder_input)
# Layers for the model
lstm_cell = rnn.BasicLSTMCell(n_hidden) # lstm layer
dropout = rnn.DropoutWrapper(lstm_cell, input_keep_prob=keep_prob) # dropout layer
# Encoder Model
# Make two layer encoder
encoder_multirnn_cell = rnn.MultiRNNCell([dropout]*n_layers)
# Make it bidirectional
print(sequence_length)
encoder_output, encoder_state = tf.nn.dynamic_rnn(encoder_multirnn_cell,
inputs=encoder_input, dtype=tf.float32) # sequence_length=sequence_length,
print('encoder_output', encoder_output)
print('encoder_state', encoder_state)
# preprocessing encoder input
initial_tensor = tf.strided_slice(target, [0, 0], [batch_size, -1], [1, 1])
decoder_input = tf.concat([tf.fill([batch_size, 1], question_vocab_to_int['<GO>']), initial_tensor], 1)
print('decoder_input', decoder_input)
## Input Embedding for the Decoder
decoder_embedding = tf.Variable(tf.random_uniform([queVocabsize+1, decoderEmbeddingsize], 0, 1))
decoder_embedded_input = tf.nn.embedding_lookup(decoder_embedding, decoder_input)
print('check')
print(decoder_embedded_input)
print(decoder_embedding)
## Decoder Model
#with tf.variable_scope("decoding") as decoding_scope:
lstm_decoder_cell = rnn.BasicLSTMCell(n_hidden) # lstm layer
dropout_decoder = rnn.DropoutWrapper(lstm_decoder_cell, input_keep_prob=keep_prob) # droput layer
# decoder
# Make two layer encoder
decoder_multirnn_cell = rnn.MultiRNNCell([dropout_decoder] * n_layers)
# weights = tf.truncated_normal_initializer(stddev=0.1)
# biases = tf.zeros_initializer()
output_layer_function = layers_core.Dense(
ansVocabsize, use_bias=False) #lambda x: tf.contrib.layers.fully_connected(x, queVocabsize, scope=decoding_scope,
# weights_initializer=weights,
# biases_initializer=biases)
#print(decoder_multirnn_cell.output_size)
#decoding_scope.reuse_variables()
print('output_kayer_function', output_layer_function)
# training vs inference!
encoder_output = tf.transpose(encoder_output, [1, 0, 2])
attention_state = tf.zeros([batch_size, 1, decoder_multirnn_cell.output_size * 2])
attention_mechanism = tf.contrib.seq2seq.BahdanauAttention(
num_units=decoder_multirnn_cell.output_size, memory=encoder_output)
lstm_decoder_cell = tf.contrib.seq2seq.AttentionWrapper(lstm_decoder_cell,
attention_mechanism=attention_mechanism)
attn_zero = lstm_decoder_cell.zero_state(batch_size=batch_size, dtype=tf.float32)
init_state = attn_zero.clone(cell_state=encoder_state)
print(('sequence!!!!!!!!1', sequence_length))
helper = tf.contrib.seq2seq.TrainingHelper(decoder_embedded_input, sequence_length)
# decoder
decoder = tf.contrib.seq2seq.BasicDecoder(lstm_decoder_cell, helper, initial_state=init_state,
output_layer= output_layer_function)
print(decoder)
final_outputs, _final_state, _final_sequence_lengths = tf.contrib.seq2seq.dynamic_decode(decoder)
train_pred_drop = tf.nn.dropout(final_outputs, keep_prob)
logits = train_pred_drop.rnn_output
Now, I am getting the error in tf.contrib.seq2seq.dynamic_decode(decoder), as shown below:
Traceback (most recent call last):
File "test_model.py", line 272, in <module>
train_logits, infer_logits = load_model(args, tf.reverse(input_data, [-1]), target, learning_rate, sequence_length, question_vocab_to_int, answer_vocab_to_int, keep_prob ) ## load model here!
File "test_model.py", line 165, in load_model
final_outputs, _final_state, _final_sequence_lengths = tf.contrib.seq2seq.dynamic_decode(decoder)
File "/home/saurabh/tfnightly/lib/python3.5/site-packages/tensorflow/contrib/seq2seq/python/ops/decoder.py", line 286, in dynamic_decode
swap_memory=swap_memory)
File "/home/saurabh/tfnightly/lib/python3.5/site-packages/tensorflow/python/ops/control_flow_ops.py", line 2816, in while_loop
result = loop_context.BuildLoop(cond, body, loop_vars, shape_invariants)
File "/home/saurabh/tfnightly/lib/python3.5/site-packages/tensorflow/python/ops/control_flow_ops.py", line 2640, in BuildLoop
pred, body, original_loop_vars, loop_vars, shape_invariants)
File "/home/saurabh/tfnightly/lib/python3.5/site-packages/tensorflow/python/ops/control_flow_ops.py", line 2590, in _BuildLoop
body_result = body(*packed_vars_for_body)
File "/home/saurabh/tfnightly/lib/python3.5/site-packages/tensorflow/contrib/seq2seq/python/ops/decoder.py", line 234, in body
decoder_finished) = decoder.step(time, inputs, state)
File "/home/saurabh/tfnightly/lib/python3.5/site-packages/tensorflow/contrib/seq2seq/python/ops/basic_decoder.py", line 138, in step
cell_outputs, cell_state = self._cell(inputs, state)
File "/home/saurabh/tfnightly/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 183, in __call__
return super(RNNCell, self).__call__(inputs, state)
File "/home/saurabh/tfnightly/lib/python3.5/site-packages/tensorflow/python/layers/base.py", line 575, in __call__
outputs = self.call(inputs, *args, **kwargs)
File "/home/saurabh/tfnightly/lib/python3.5/site-packages/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py", line 1295, in call
cell_output, next_cell_state = self._cell(cell_inputs, cell_state)
File "/home/saurabh/tfnightly/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 183, in __call__
return super(RNNCell, self).__call__(inputs, state)
File "/home/saurabh/tfnightly/lib/python3.5/site-packages/tensorflow/python/layers/base.py", line 575, in __call__
outputs = self.call(inputs, *args, **kwargs)
File "/home/saurabh/tfnightly/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 438, in call
self._linear = _Linear([inputs, h], 4 * self._num_units, True)
File "/home/saurabh/tfnightly/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 1154, in __init__
shapes = [a.get_shape() for a in args]
File "/home/saurabh/tfnightly/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 1154, in <listcomp>
shapes = [a.get_shape() for a in args]
AttributeError: 'LSTMStateTuple' object has no attribute 'get_shape'
I construct a LSTM network, and my input's dimension is 100*100*83 ( batch_size=100, steps = 100, char_vector = 83). I build a two LSTM layers which has 512 hidden units.
# coding: utf-8
from __future__ import print_function
import tensorflow as tf
import numpy as np
import time
class CharRNN:
def __init__(self, num_classes, batch_size=64, num_steps=50, lstm_size=128, num_layers =2,\
learning_rate = 0.001, grad_clip=5, keep_prob=0.001,sampling= False):
# True for SGD
if sampling == True:
self.batch_size, self.num_steps = 1,1
else:
self.batch_size, self.num_steps = batch_size, num_steps
tf.reset_default_graph()
self.inputs, self.targets, self.keep_prob = self.build_inputs(self.batch_size,self.num_steps)
self.keep_prob = keep_prob
self.cell, self.initial_state = self.build_lstm(lstm_size,num_layers,self.batch_size,self.keep_prob)
# print(self.cell.state_size)
x_one_hot = tf.one_hot(self.inputs, num_classes)
print("cell state size: ",self.cell.state_size)
print("cell initial state: ",self.initial_state)
print("this is inputs", self.inputs)
print("x_one_hot: ",x_one_hot)
outputs, state = tf.nn.dynamic_rnn(self.cell, x_one_hot, initial_state= self.initial_state)
def build_inputs(self, num_seqs, num_steps):
inputs = tf.placeholder(tf.int32, shape=(num_seqs, num_steps), name = "inputs")
targets = tf.placeholder(tf.int32, shape= (num_seqs, num_steps), name="targets")
print('inputs shape: ',inputs.shape)
keep_prob = tf.placeholder(tf.float32, name="keep_prob")
return inputs, targets, keep_prob
def build_lstm(self, lstm_size, num_layers, batch_size, keep_prob):
# construct lstm cell
lstm = tf.nn.rnn_cell.BasicLSTMCell(lstm_size)
# add dropout
drop = tf.nn.rnn_cell.DropoutWrapper(lstm, output_keep_prob= keep_prob)
# stack multiple rnn cells
cell = tf.nn.rnn_cell.MultiRNNCell([drop for _ in range(num_layers)])
initial_state = cell.zero_state(batch_size, tf.float32)
return cell, initial_state
if __name__ == '__main__':
len_vocab = 83
batch_size = 100
num_steps = 100
lstm_size = 512
num_layers = 2
learning_rate = 0.001
keep_prob = 0.5
epochs = 20
save_every_n = 200
print("h1")
model = CharRNN(len_vocab, batch_size = batch_size, num_steps=num_steps, lstm_size = lstm_size,num_layers=num_layers\
,learning_rate=learning_rate,sampling= False,keep_prob = keep_prob
I get a dimension not match error at tf.nn.dynamic_rnn.
error message is like this:
inputs shape: (100, 100)
cell state size: (LSTMStateTuple(c=512, h=512), LSTMStateTuple(c=512, h=512))
cell initial state: (LSTMStateTuple(c=<tf.Tensor 'MultiRNNCellZeroState/DropoutWrapperZeroState/BasicLSTMCellZeroState/zeros:0' shape=(100, 512) dtype=float32>, h=<tf.Tensor 'MultiRNNCellZeroState/DropoutWrapperZeroState/BasicLSTMCellZeroState/zeros_1:0' shape=(100, 512) dtype=float32>), LSTMStateTuple(c=<tf.Tensor 'MultiRNNCellZeroState/DropoutWrapperZeroState_1/BasicLSTMCellZeroState/zeros:0' shape=(100, 512) dtype=float32>, h=<tf.Tensor 'MultiRNNCellZeroState/DropoutWrapperZeroState_1/BasicLSTMCellZeroState/zeros_1:0' shape=(100, 512) dtype=float32>))
this is inputs Tensor("inputs:0", shape=(100, 100), dtype=int32)
x_one_hot: Tensor("one_hot:0", shape=(100, 100, 83), dtype=float32)
Traceback (most recent call last):
File "./seq2_minimal.py", line 70, in <module>
,learning_rate=learning_rate,sampling= False,keep_prob = keep_prob)
File "./seq2_minimal.py", line 32, in __init__
outputs, state = tf.nn.dynamic_rnn(self.cell, x_one_hot, initial_state= self.initial_state)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/rnn.py", line 614, in dynamic_rnn
dtype=dtype)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/rnn.py", line 777, in _dynamic_rnn_loop
swap_memory=swap_memory)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/control_flow_ops.py", line 2816, in while_loop
result = loop_context.BuildLoop(cond, body, loop_vars, shape_invariants)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/control_flow_ops.py", line 2640, in BuildLoop
pred, body, original_loop_vars, loop_vars, shape_invariants)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/control_flow_ops.py", line 2590, in _BuildLoop
body_result = body(*packed_vars_for_body)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/rnn.py", line 762, in _time_step
(output, new_state) = call_cell()
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/rnn.py", line 748, in <lambda>
call_cell = lambda: cell(input_t, state)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 183, in __call__
return super(RNNCell, self).__call__(inputs, state)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/layers/base.py", line 575, in __call__
outputs = self.call(inputs, *args, **kwargs)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 1066, in call
cur_inp, new_state = cell(cur_inp, cur_state)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 891, in __call__
output, new_state = self._cell(inputs, state, scope)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 183, in __call__
return super(RNNCell, self).__call__(inputs, state)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/layers/base.py", line 575, in __call__
outputs = self.call(inputs, *args, **kwargs)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 441, in call
value=self._linear([inputs, h]), num_or_size_splits=4, axis=1)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 1189, in __call__
res = math_ops.matmul(array_ops.concat(args, 1), self._weights)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/math_ops.py", line 1891, in matmul
a, b, transpose_a=transpose_a, transpose_b=transpose_b, name=name)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/gen_math_ops.py", line 2437, in _mat_mul
name=name)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
op_def=op_def)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 2958, in create_op
set_shapes_for_outputs(ret)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 2209, in set_shapes_for_outputs
shapes = shape_func(op)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 2159, in call_with_requiring
return call_cpp_shape_fn(op, require_shape_fn=True)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/framework/common_shapes.py", line 627, in call_cpp_shape_fn
require_shape_fn)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/framework/common_shapes.py", line 691, in _call_cpp_shape_fn_impl
raise ValueError(err.message)
ValueError: Dimensions must be equal, but are 1024 and 595 for 'rnn/while/rnn/multi_rnn_cell/cell_0/cell_0/basic_lstm_cell/MatMul_1' (op: 'MatMul') with input shapes: [100,1024], [595,2048].
I search that and find that tensorflow's lstm cell should adjust its input size automatically. But error message said this.
It shows
input size is [100, 1024] and lstm is [595, 2048].
Thanks firstly.
cell = tf.nn.rnn_cell.MultiRNNCell([drop for _ in range(num_layers)])
TO
cell = tf.nn.rnn_cell.MultiRNNCell([drop])
because your given input tensor and produces tensor are not the same.
Classify MNIST Digits with Tensorflow by a 2-layer RNN approach. Training works fine, but when evaluating accuracy, incompatible shape of test data is reported.
import tensorflow as tf
import inspect
import numpy as np
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot = True)
hm_epochs = 1
n_classes = 10
batch_size = 128
chunk_size = 28
n_chunks = 28
rnn_size = 128
x = tf.placeholder('float', [None, n_chunks,chunk_size])
y = tf.placeholder('float')
def lstm_cell():
if 'reuse' in inspect.getargspec(
tf.contrib.rnn.BasicLSTMCell.__init__).args:
return tf.contrib.rnn.BasicLSTMCell(
rnn_size, forget_bias=0.0, state_is_tuple=True,
reuse=tf.get_variable_scope().reuse)
else:
return tf.contrib.rnn.BasicLSTMCell(
rnn_size, forget_bias=0.0, state_is_tuple=True)
def attn_cell():
return tf.contrib.rnn.DropoutWrapper(
lstm_cell())
def recurrent_neural_network(x):
layer = {'weights':tf.Variable(tf.random_normal([rnn_size,n_classes])),
'biases':tf.Variable(tf.random_normal([n_classes]))}
x = tf.transpose(x, [1,0,2])
x = tf.reshape(x, [-1, chunk_size])
x = tf.split(x, n_chunks, 0)
stacked_lstm = tf.contrib.rnn.MultiRNNCell([attn_cell(),attn_cell()], state_is_tuple=True)
initial_state = state = stacked_lstm.zero_state(batch_size, tf.float32)
outputs, states = tf.contrib.rnn.static_rnn(stacked_lstm, x,state)
output = tf.matmul(outputs[-1],layer['weights']) + layer['biases']
return output
def train_neural_network(x):
prediction = recurrent_neural_network(x)
cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(labels=y,logits=prediction) )
optimizer = tf.train.AdamOptimizer().minimize(cost)
with tf.Session() as sess:
sess.run(tf.initialize_all_variables())
for epoch in range(hm_epochs):
epoch_loss = 0
for _ in range(int(mnist.train.num_examples/batch_size)):
epoch_x, epoch_y = mnist.train.next_batch(batch_size)
epoch_x = epoch_x.reshape((batch_size,n_chunks,chunk_size))
_, c = sess.run([optimizer, cost], feed_dict={x: epoch_x, y: epoch_y})
epoch_loss += c
print('Epoch', epoch, 'completed out of',hm_epochs,'loss:',epoch_loss)
correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
testdata= np.reshape( mnist.test.images, (10000, n_chunks, chunk_size))
print("Testdata ",testdata.shape)
print("x ",x)
print('Accuracy:',accuracy.eval({x:testdata, y:mnist.test.labels}))
train_neural_network(x)
However, the shapes of test data and placeholders are printed as follows. Aren't they compatible?
Epoch 0 completed out of 1 loss: 228.159379691
Testdata (10000, 28, 28)
x Tensor("Placeholder:0", shape=(?, 28, 28), dtype=float32)
Error:
Caused by op 'rnn/rnn/multi_rnn_cell/cell_0/cell_0/basic_lstm_cell/basic_lstm_ce
ll/concat', defined at:
File "main.py", line 90, in <module>
train_neural_network(x)
File "main.py", line 59, in train_neural_network
prediction = recurrent_neural_network(x)
File "main.py", line 52, in recurrent_neural_network
outputs, states = tf.contrib.rnn.static_rnn(stacked_lstm, x,state)
File "C:\Users\henry\Anaconda3\lib\site-packages\tensorflow\python\ops\rnn.py"
, line 1212, in static_rnn
(output, state) = call_cell()
File "C:\Users\henry\Anaconda3\lib\site-packages\tensorflow\python\ops\rnn.py"
, line 1199, in <lambda>
call_cell = lambda: cell(input_, state)
File "C:\Users\henry\Anaconda3\lib\site-packages\tensorflow\python\ops\rnn_cel
l_impl.py", line 180, in __call__
return super(RNNCell, self).__call__(inputs, state)
File "C:\Users\henry\Anaconda3\lib\site-packages\tensorflow\python\layers\base
.py", line 441, in __call__
outputs = self.call(inputs, *args, **kwargs)
File "C:\Users\henry\Anaconda3\lib\site-packages\tensorflow\python\ops\rnn_cel
l_impl.py", line 916, in call
cur_inp, new_state = cell(cur_inp, cur_state)
File "C:\Users\henry\Anaconda3\lib\site-packages\tensorflow\python\ops\rnn_cel
l_impl.py", line 752, in __call__
output, new_state = self._cell(inputs, state, scope)
File "C:\Users\henry\Anaconda3\lib\site-packages\tensorflow\python\ops\rnn_cel
l_impl.py", line 180, in __call__
return super(RNNCell, self).__call__(inputs, state)
File "C:\Users\henry\Anaconda3\lib\site-packages\tensorflow\python\layers\base
.py", line 441, in __call__
outputs = self.call(inputs, *args, **kwargs)
File "C:\Users\henry\Anaconda3\lib\site-packages\tensorflow\python\ops\rnn_cel
l_impl.py", line 383, in call
concat = _linear([inputs, h], 4 * self._num_units, True)
File "C:\Users\henry\Anaconda3\lib\site-packages\tensorflow\python\ops\rnn_cel
l_impl.py", line 1021, in _linear
res = math_ops.matmul(array_ops.concat(args, 1), weights)
File "C:\Users\henry\Anaconda3\lib\site-packages\tensorflow\python\ops\array_o
ps.py", line 1048, in concat
name=name)
File "C:\Users\henry\Anaconda3\lib\site-packages\tensorflow\python\ops\gen_arr
ay_ops.py", line 495, in _concat_v2
name=name)
File "C:\Users\henry\Anaconda3\lib\site-packages\tensorflow\python\framework\o
p_def_library.py", line 767, in apply_op
op_def=op_def)
File "C:\Users\henry\Anaconda3\lib\site-packages\tensorflow\python\framework\o
ps.py", line 2506, in create_op
original_op=self._default_original_op, op_def=op_def)
File "C:\Users\henry\Anaconda3\lib\site-packages\tensorflow\python\framework\o
ps.py", line 1269, in __init__
self._traceback = _extract_stack()
InvalidArgumentError (see above for traceback): ConcatOp : Dimensions of inputs
should match: shape[0] = [10000,28] vs. shape[1] = [128,128]
[[Node: rnn/rnn/multi_rnn_cell/cell_0/cell_0/basic_lstm_cell/basic_lstm
_cell/concat = ConcatV2[N=2, T=DT_FLOAT, Tidx=DT_INT32, _device="/job:localhost/
replica:0/task:0/cpu:0"](split, MultiRNNCellZeroState/DropoutWrapperZeroState/Ba
sicLSTMCellZeroState/zeros_1, rnn/rnn/multi_rnn_cell/cell_0/cell_0/basic_lstm_ce
ll/basic_lstm_cell/concat/axis)]]
When I print the shape of training data it is (128,28,28). I am confused that why the test data leads to the error because both training data and test data are formatted in the same way, that is (?,n_chunks,chunk_size). Thanks in advance.
The issue is that you always create the initial state with shape set to the training batch size instead of the eval batch size.
This is the culprit line:
initial_state = state = stacked_lstm.zero_state(batch_size, tf.float32)