Custom attention layer after LSTM layer gives ValueError in Keras - python

I was trying to use keras to build a customized attention block after LSTM and got an error. Without the attention block the code is ok to run. The input code is as below, I omitted some irrelevant part.
import tensorflow as tf
import pandas as pd
import os
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.layers import Layer
import numpy as np
from sklearn.model_selection import train_test_split
from nltk.tokenize import word_tokenize
from tensorflow.keras.preprocessing.text import one_hot
from tensorflow.keras.preprocessing.sequence import pad_sequences
import pickle
import random
import time
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.keras import backend as K
class attention(Layer):
def __init__(self, **kwargs):
super(attention, self).__init__(**kwargs)
def build(self, input_shape):
self.W = self.add_weight(shape=(input_shape[-1], 1),
initializer='random_normal', trainable=True)
self.b = self.add_weight(shape=(input_shape[1], 1),
initializer='zeros', trainable=True)
super(attention, self).build(input_shape)
def call(self, x):
# Alignment scores. Pass them through tanh function
e = K.tanh(K.dot(x, self.W) + self.b)
# Remove dimension of size 1
e = K.squeeze(e, axis=-1)
# Compute the weights
alpha = K.softmax(e)
# Reshape to tensorFlow format
alpha = K.expand_dims(alpha, axis=-1)
# Compute the context vector
context = x * alpha
context = K.sum(context, axis=1)
return context
Input_rnn = keras.Input(shape=(None, 1))
LSTM_1 = layers.LSTM(32, activation='relu', return_sequences=True)(Input_rnn)
Dropout_1 = layers.Dropout(0.2)(LSTM_1)
LSTM_2 = layers.LSTM(32, activation='relu', return_sequences=True)(Dropout_1)
Dropout_2 = layers.Dropout(0.2)(LSTM_2)
LSTM_3 = layers.LSTM(32, activation='relu', return_sequences=True)(Dropout_2)
Dropout_3 = layers.Dropout(0.2)(LSTM_3)
attention_layer = attention()(Dropout_3)
Dense_1 = layers.Dense(64, activation='relu')(attention_layer)
Dense_2 = layers.Dense(16, activation='relu')(Dense_1)
Dense_3 = layers.Dense(8, activation='relu')(Dense_2)
Dense_4 = layers.Dense(1, activation='sigmoid')(Dense_3)
The error is:
2021-11-13 21:06:12.520715: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda-11.0/lib64:/usr/local/cuda-11.0/extras/CUPTI/lib64:/usr/local/cudnn8.0-11.0/lib64:
2021-11-13 21:06:12.520735: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2021-11-13 21:06:18.627597: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
2021-11-13 21:06:18.627719: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda-11.0/lib64:/usr/local/cuda-11.0/extras/CUPTI/lib64:/usr/local/cudnn8.0-11.0/lib64:
2021-11-13 21:06:18.627731: W tensorflow/stream_executor/cuda/cuda_driver.cc:326] failed call to cuInit: UNKNOWN ERROR (303)
2021-11-13 21:06:18.627746: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (janus0.ihpc.uts.edu.au): /proc/driver/nvidia/version does not exist
2021-11-13 21:06:18.629462: I tensorflow/compiler/jit/xla_gpu_device.cc:99] Not creating XLA devices, tf_xla_enable_xla_devices not set
Traceback (most recent call last):
File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/ops/array_ops.py", line 2874, in zeros
tensor_shape.TensorShape(shape))
File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py", line 356, in _tensor_shape_tensor_conversion_function
"Cannot convert a partially known TensorShape to a Tensor: %s" % s)
ValueError: Cannot convert a partially known TensorShape to a Tensor: (None, 1)
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "code/keras_fun.py", line 127, in <module>
attention_layer = attention()(Dropout_3)
File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/keras/engine/base_layer.py", line 952, in __call__
input_list)
File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/keras/engine/base_layer.py", line 1091, in _functional_construction_call
inputs, input_masks, args, kwargs)
File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/keras/engine/base_layer.py", line 822, in _keras_tensor_symbolic_call
return self._infer_output_signature(inputs, args, kwargs, input_masks)
File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/keras/engine/base_layer.py", line 862, in _infer_output_signature
self._maybe_build(inputs)
File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/keras/engine/base_layer.py", line 2710, in _maybe_build
self.build(input_shapes) # pylint:disable=not-callable
File "code/keras_fun.py", line 34, in build
initializer='zeros', trainable=True)
File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/keras/engine/base_layer.py", line 639, in add_weight
caching_device=caching_device)
File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/training/tracking/base.py", line 810, in _add_variable_with_custom_getter
**kwargs_for_getter)
File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/keras/engine/base_layer_utils.py", line 142, in make_variable
shape=variable_shape if variable_shape else None)
File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/ops/variables.py", line 260, in __call__
return cls._variable_v1_call(*args, **kwargs)
File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/ops/variables.py", line 221, in _variable_v1_call
shape=shape)
File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/ops/variables.py", line 199, in <lambda>
previous_getter = lambda **kwargs: default_variable_creator(None, **kwargs)
File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/ops/variable_scope.py", line 2618, in default_variable_creator
shape=shape)
File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/ops/variables.py", line 264, in __call__
return super(VariableMetaclass, cls).__call__(*args, **kwargs)
File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/ops/resource_variable_ops.py", line 1585, in __init__
distribute_strategy=distribute_strategy)
File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/ops/resource_variable_ops.py", line 1712, in _init_from_args
initial_value = initial_value()
File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/keras/initializers/initializers_v2.py", line 139, in __call__
return super(Zeros, self).__call__(shape, dtype=_get_dtype(dtype), **kwargs)
File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/ops/init_ops_v2.py", line 154, in __call__
return array_ops.zeros(shape, dtype)
File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/util/dispatch.py", line 201, in wrapper
return target(*args, **kwargs)
File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/ops/array_ops.py", line 2819, in wrapped
tensor = fun(*args, **kwargs)
File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/ops/array_ops.py", line 2877, in zeros
shape = ops.convert_to_tensor(shape, dtype=dtypes.int32)
File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/profiler/trace.py", line 163, in wrapped
return func(*args, **kwargs)
File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/framework/ops.py", line 1540, in convert_to_tensor
ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py", line 339, in _constant_tensor_conversion_function
return constant(v, dtype=dtype, name=name)
File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py", line 265, in constant
allow_broadcast=True)
File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py", line 276, in _constant_impl
return _constant_eager_impl(ctx, value, dtype, shape, verify_shape)
File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py", line 301, in _constant_eager_impl
t = convert_to_eager_tensor(value, ctx, dtype)
File "/home/tialan/tf/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py", line 98, in convert_to_eager_tensor
return ops.EagerTensor(value, ctx.device_name, dtype)
ValueError: Attempt to convert a value (None) with an unsupported type (<class 'NoneType'>) to a Tensor.
I can't find where is going wrong. Not sure if it is something to do with the input shape difference from the attention layer to the dense layer, or from the dropout layer to attention layer.

Which Tensorflow version are you using? I can see there was some tf.keras and tf.keras.layers discrepency. I was able to run the above code with few changes to avoid the error using Tensorflow==2.3.0.
Please find below modified code:
import tensorflow as tf
import pandas as pd
import os
from tensorflow import keras
#from tensorflow.keras import layers
from tensorflow.keras.layers import Layer
import numpy as np
from sklearn.model_selection import train_test_split
from nltk.tokenize import word_tokenize
from tensorflow.keras.preprocessing.text import one_hot
from tensorflow.keras.preprocessing.sequence import pad_sequences
import pickle
import random
import time
from tensorflow.keras.callbacks import TensorBoard
#from tensorflow.keras import backend as K
class attention(tf.keras.layers.Layer):
def __init__(self, **kwargs):
super(attention, self).__init__(**kwargs)
def build(self, input_shape):
w_init = tf.random_normal_initializer()
self.W = tf.Variable(initial_value=w_init(shape=(input_shape[-1], 1), dtype="float32"),trainable=True,)
b_init = tf.zeros_initializer()
self.b = tf.Variable(initial_value=b_init(shape=(1,), dtype="float32"), trainable=True)
super(attention, self).build(input_shape)
#self.W = self.add_weight(shape=(input_shape[-1], 1),tf.random_normal_initializer('random_normal', trainable=True)
#self.b = self.add_weight(shape=(input_shape[1], 1),tf.zeros_initializer('zeros', trainable=True)
def call(self, x):
# Alignment scores. Pass them through tanh function
e = tf.tanh(tf.matmul(x, self.W) + self.b)
# Remove dimension of size 1
e = tf.squeeze(e, axis=-1)
# Compute the weights
alpha = tf.keras.activations.softmax(e)
# Reshape to tensorFlow format
alpha = tf.expand_dims(alpha, axis=-1)
# Compute the context vector
context = x * alpha
context = tf.math.reduce_sum(context, axis=1)
return context
Input_rnn = tf.keras.Input(shape=(None, 1))
LSTM_1 = tf.keras.layers.LSTM(32, activation='relu', return_sequences=True)(Input_rnn)
Dropout_1 = tf.keras.layers.Dropout(0.2)(LSTM_1)
LSTM_2 = tf.keras.layers.LSTM(32, activation='relu', return_sequences=True)(Dropout_1)
Dropout_2 = tf.keras.layers.Dropout(0.2)(LSTM_2)
LSTM_3 = tf.keras.layers.LSTM(32, activation='relu', return_sequences=True)(Dropout_2)
Dropout_3 = tf.keras.layers.Dropout(0.2)(LSTM_3)
attention_layer = attention()(Dropout_3)
Dense_1 = tf.keras.layers.Dense(64, activation='relu')(attention_layer)
Dense_2 = tf.keras.layers.Dense(16, activation='relu')(Dense_1)
Dense_3 = tf.keras.layers.Dense(8, activation='relu')(Dense_2)
Dense_4 = tf.keras.layers.Dense(1, activation='sigmoid')(Dense_3)

Related

new bug in a variational autoencoder (keras)

I used to use this code to train variational autoencoder (I found the code on a forum and adapted it to my needs) :
import pickle
from pylab import mpl,plt
#lecture des résultats
filename=r'XXX.pic'
data_file=open(filename,'rb')
X_sec = pickle.load(data_file)#[:,3000:]
data_file.close()
size=X_sec.shape[0]
prop=0.75
cut=int(size*prop)
X_train=X_sec[:cut]
X_test=X_sec[cut:]
std=X_train.std()
X_train /= std
X_test /= std
import keras
from keras import layers
from keras import backend as K
from keras.models import Model
import numpy as np
#encoding_dim = 12
sig_shape = (3600,)
batch_size = 128
latent_dim = 12
input_sig = keras.Input(shape=sig_shape)
x = layers.Dense(128, activation='relu')(input_sig)
x = layers.Dense(64, activation='relu')(x)
shape_before_flattening = K.int_shape(x)
x = layers.Dense(32, activation='relu')(x)
z_mean = layers.Dense(latent_dim)(x)
z_log_var = layers.Dense(latent_dim)(x)
encoder=Model(input_sig,[z_mean,z_log_var])
def sampling(args):
z_mean, z_log_var = args
epsilon = K.random_normal(shape=(K.shape(z_mean)[0], latent_dim),
mean=0., stddev=1.)
return z_mean + K.exp(z_log_var) * epsilon
z = layers.Lambda(sampling)([z_mean, z_log_var])
decoder_input = layers.Input(K.int_shape(z)[1:])
x = layers.Dense(np.prod(shape_before_flattening[1:]),activation='relu')(decoder_input)
x = layers.Reshape(shape_before_flattening[1:])(x)
x = layers.Dense(128, activation='relu')(x)
x = layers.Dense(3600, activation='linear')(x)
decoder = Model(decoder_input, x)
z_decoded = decoder(z)
class CustomVariationalLayer(keras.layers.Layer):
def vae_loss(self, x, z_decoded):
x = K.flatten(x)
z_decoded = K.flatten(z_decoded)
xent_loss = keras.metrics.mae(x, z_decoded)
kl_loss = -5e-4 * K.mean(
1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1)
return K.mean(xent_loss + kl_loss)
def call(self, inputs):
x = inputs[0]
z_decoded = inputs[1]
loss = self.vae_loss(x, z_decoded)
self.add_loss(loss, inputs=inputs)
return x
y = CustomVariationalLayer()([input_sig, z_decoded])
vae = Model(input_sig, y)
vae.compile(optimizer='rmsprop', loss=None)
vae.summary()
vae.fit(x=X_train, y=None,shuffle=True,epochs=100,batch_size=batch_size,validation_data=(X_test, None))
it used to work smoothly but I have updated my librairies and now I get this error :
File
"C:\Users\user\AppData\Local\conda\conda\envs\my_root\lib\site-packages\tensorflow_core\python\framework\ops.py",
line 1619, in _create_c_op
c_op = c_api.TF_FinishOperation(op_desc)
InvalidArgumentError: Duplicate node name in graph:
'lambda_1/random_normal/shape'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File
"I:\Documents\Nico\Python\finance\travail_amont\autoencoder_variationnel_bruit.py",
line 74, in
z = layers.Lambda(sampling)([z_mean, z_log_var])
File
"C:\Users\user\AppData\Local\conda\conda\envs\my_root\lib\site-packages\keras\backend\tensorflow_backend.py",
line 75, in symbolic_fn_wrapper
return func(*args, **kwargs)
File
"C:\Users\user\AppData\Local\conda\conda\envs\my_root\lib\site-packages\keras\engine\base_layer.py",
line 506, in call
output_shape = self.compute_output_shape(input_shape)
File
"C:\Users\user\AppData\Local\conda\conda\envs\my_root\lib\site-packages\keras\layers\core.py",
line 674, in compute_output_shape
x = self.call(xs)
File
"C:\Users\user\AppData\Local\conda\conda\envs\my_root\lib\site-packages\keras\layers\core.py",
line 716, in call
return self.function(inputs, **arguments)
File
"I:\Documents\Nico\Python\finance\travail_amont\autoencoder_variationnel_bruit.py",
line 71, in sampling
mean=0., stddev=1.)
File
"C:\Users\user\AppData\Local\conda\conda\envs\my_root\lib\site-packages\keras\backend\tensorflow_backend.py",
line 4329, in random_normal
shape, mean=mean, stddev=stddev, dtype=dtype, seed=seed)
File
"C:\Users\user\AppData\Local\conda\conda\envs\my_root\lib\site-packages\tensorflow_core\python\keras\backend.py",
line 5602, in random_normal
shape, mean=mean, stddev=stddev, dtype=dtype, seed=seed)
File
"C:\Users\user\AppData\Local\conda\conda\envs\my_root\lib\site-packages\tensorflow_core\python\ops\random_ops.py",
line 69, in random_normal
shape_tensor = tensor_util.shape_tensor(shape)
File
"C:\Users\user\AppData\Local\conda\conda\envs\my_root\lib\site-packages\tensorflow_core\python\framework\tensor_util.py",
line 994, in shape_tensor
return ops.convert_to_tensor(shape, dtype=dtype, name="shape")
File
"C:\Users\user\AppData\Local\conda\conda\envs\my_root\lib\site-packages\tensorflow_core\python\framework\ops.py",
line 1314, in convert_to_tensor
ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
File
"C:\Users\user\AppData\Local\conda\conda\envs\my_root\lib\site-packages\tensorflow_core\python\ops\array_ops.py",
line 1368, in _autopacking_conversion_function
return _autopacking_helper(v, dtype, name or "packed")
File
"C:\Users\user\AppData\Local\conda\conda\envs\my_root\lib\site-packages\tensorflow_core\python\ops\array_ops.py",
line 1304, in _autopacking_helper
return gen_array_ops.pack(elems_as_tensors, name=scope)
File
"C:\Users\user\AppData\Local\conda\conda\envs\my_root\lib\site-packages\tensorflow_core\python\ops\gen_array_ops.py",
line 5704, in pack
"Pack", values=values, axis=axis, name=name)
File
"C:\Users\user\AppData\Local\conda\conda\envs\my_root\lib\site-packages\tensorflow_core\python\framework\op_def_library.py",
line 742, in _apply_op_helper
attrs=attr_protos, op_def=op_def)
File
"C:\Users\user\AppData\Local\conda\conda\envs\my_root\lib\site-packages\tensorflow_core\python\framework\func_graph.py",
line 595, in _create_op_internal
compute_device)
File
"C:\Users\user\AppData\Local\conda\conda\envs\my_root\lib\site-packages\tensorflow_core\python\framework\ops.py",
line 3322, in _create_op_internal
op_def=op_def)
File
"C:\Users\user\AppData\Local\conda\conda\envs\my_root\lib\site-packages\tensorflow_core\python\framework\ops.py",
line 1786, in init
control_input_ops)
File
"C:\Users\user\AppData\Local\conda\conda\envs\my_root\lib\site-packages\tensorflow_core\python\framework\ops.py",
line 1622, in _create_c_op
raise ValueError(str(e))
ValueError: Duplicate node name in graph:
'lambda_1/random_normal/shape'
I do not know this error : "Duplicate node name in graph". Does anyone has a clue ? Thanks.
If you're using tf 2.x, then import your keras modules as follows.
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.kerasimport backend as K
from tensorflow.keras.models import Model
More related on this, #36509, #130

ValueError: logits and labels must have the same shape ((None, 23, 23, 1) vs (None, 1))

am new to ML so i don't really know what am doing i don't know what logits means in the code i haven't even written logits i just followed a YouTube tutorial to get my self familiar with the environment.. this is the entire code thanks for your help.. i am aware that there is already this kind of post on stackoverflow but i don't think it applies to my situation maybe it does i don't know but i still don't know how to implement it even if it does so please help me out here am struggling :) tnx
the code:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing import image
from tensorflow.keras.optimizers import RMSprop
import matplotlib.pyplot as plt
import tensorflow as tf
import cv2
import os
import numpy as np
img = cv2.imread("/content/drive/MyDrive/data/train/ha/2.jpg").shape
print(img)
imgg = image.load_img("/content/drive/MyDrive/data/train/ha/2.jpg")
plt.imshow(imgg)
train = ImageDataGenerator(rescale=1/255)
validation = ImageDataGenerator(rescale=1/255)
train_dataset = train.flow_from_directory("/content/drive/MyDrive/data/train/", target_size = (100,100),
batch_size = 3,
class_mode ="binary")
print(train_dataset.class_indices)
validation_dataset = train.flow_from_directory("/content/drive/MyDrive/data/validate/",
target_size = (100,100),
batch_size = 3,
class_mode ="binary")
model = tf.keras.models.Sequential([tf.keras.layers.Conv2D(16,(3,3),activation = 'relu',input_shape =(200,200,3)),
tf.keras.layers.MaxPool2D(2,2),
#
tf.keras.layers.Conv2D(32,(3,3),activation = 'relu'),
tf.keras.layers.MaxPool2D(2,2),
#
tf.keras.layers.Conv2D(64,(3,3),activation = 'relu'),
tf.keras.layers.MaxPool2D(2,2),
##
tf.keras.layers.Dense(134,activation = 'relu'),
##
tf.keras.layers.Dense(1,activation = 'sigmoid')
])
model.compile(loss = 'binary_crossentropy',
optimizer = RMSprop(lr=0.001),
metrics =['accuracy'])
model_fit = model.fit(train_dataset,
steps_per_epoch = 3,
epochs = 1,
validation_data = validation_dataset)
the error:
2021-01-01 13:39:18.588397: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1
(51, 51, 3)
Found 9 images belonging to 2 classes.
{'ha': 0, 'hu': 1}
Found 4 images belonging to 2 classes.
2021-01-01 13:39:22.999078: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
2021-01-01 13:39:23.026197: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcuda.so.1
2021-01-01 13:39:23.092853: E tensorflow/stream_executor/cuda/cuda_driver.cc:328] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
2021-01-01 13:39:23.092917: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (6e4fde799083): /proc/driver/nvidia/version does not exist
2021-01-01 13:39:23.093374: I tensorflow/compiler/jit/xla_gpu_device.cc:99] Not creating XLA devices, tf_xla_enable_xla_devices not set
2021-01-01 13:39:23.846859: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:116] None of the MLIR optimization passes are enabled (registered 2)
2021-01-01 13:39:23.850373: I tensorflow/core/platform/profile_utils/cpu_utils.cc:112] CPU Frequency: 2300000000 Hz
Traceback (most recent call last):
File "/content/drive/MyDrive/main.py", line 48, in <module>
validation_data = validation_dataset)
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py", line 1100, in fit
tmp_logs = self.train_function(iterator)
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/def_function.py", line 828, in __call__
result = self._call(*args, **kwds)
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/def_function.py", line 871, in _call
self._initialize(args, kwds, add_initializers_to=initializers)
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/def_function.py", line 726, in _initialize
*args, **kwds))
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/function.py", line 2969, in _get_concrete_function_internal_garbage_collected
graph_function, _ = self._maybe_define_function(args, kwargs)
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/function.py", line 3361, in _maybe_define_function
graph_function = self._create_graph_function(args, kwargs)
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/function.py", line 3206, in _create_graph_function
capture_by_value=self._capture_by_value),
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/func_graph.py", line 990, in func_graph_from_py_func
func_outputs = python_func(*func_args, **func_kwargs)
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/def_function.py", line 634, in wrapped_fn
out = weak_wrapped_fn().__wrapped__(*args, **kwds)
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/func_graph.py", line 977, in wrapper
raise e.ag_error_metadata.to_exception(e)
ValueError: in user code:
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py:805 train_function *
return step_function(self, iterator)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py:795 step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
/usr/local/lib/python3.6/dist-packages/tensorflow/python/distribute/distribute_lib.py:1259 run
return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/distribute/distribute_lib.py:2730 call_for_each_replica
return self._call_for_each_replica(fn, args, kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/distribute/distribute_lib.py:3417 _call_for_each_replica
return fn(*args, **kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py:788 run_step **
outputs = model.train_step(data)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py:756 train_step
y, y_pred, sample_weight, regularization_losses=self.losses)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/compile_utils.py:203 __call__
loss_value = loss_obj(y_t, y_p, sample_weight=sw)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/losses.py:152 __call__
losses = call_fn(y_true, y_pred)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/losses.py:256 call **
return ag_fn(y_true, y_pred, **self._fn_kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/util/dispatch.py:201 wrapper
return target(*args, **kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/losses.py:1608 binary_crossentropy
K.binary_crossentropy(y_true, y_pred, from_logits=from_logits), axis=-1)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/util/dispatch.py:201 wrapper
return target(*args, **kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/backend.py:4979 binary_crossentropy
return nn.sigmoid_cross_entropy_with_logits(labels=target, logits=output)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/util/dispatch.py:201 wrapper
return target(*args, **kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/nn_impl.py:174 sigmoid_cross_entropy_with_logits
(logits.get_shape(), labels.get_shape()))
ValueError: logits and labels must have the same shape ((None, 23, 23, 1) vs (None, 1))
Your problem is that the input to the dense layer has to be a vector. To achieve that
you can
replace tf.keras.layers.MaxPool2D(2,2)
with tf.keras.layers.GlobalMaxPooling2D()
or just add
tf.keras.layers.GlobalMaxPooling2D() after tf.keras.layers.MaxPool2D(2,2)

Keras load_model is causing 'TypeError: Keyword argument not understood:' when using custom layer in model

I am building a model with a custom attention layer as implemented in Tensorflow's nmt tutorial. I used the same layer code with a few changes which I found as suggestions in order to solve my problem.
The problem is that I cannot load the model from file after I save it when I have this custom layer. This is the layer class:
class BahdanauAttention(layers.Layer):
def __init__(self, output_dim=30, **kwargs):
super(BahdanauAttention, self).__init__(**kwargs)
self.W1 = tf.keras.layers.Dense(output_dim)
self.W2 = tf.keras.layers.Dense(output_dim)
self.V = tf.keras.layers.Dense(1)
def call(self, inputs, **kwargs):
query = inputs[0]
values = inputs[1]
query_with_time_axis = tf.expand_dims(query, 1)
score = self.V(tf.nn.tanh(
self.W1(query_with_time_axis) + self.W2(values)))
attention_weights = tf.nn.softmax(score, axis=1)
context_vector = attention_weights * values
context_vector = tf.reduce_sum(context_vector, axis=1)
return context_vector, attention_weights
def get_config(self):
config = super(BahdanauAttention, self).get_config()
config.update({
'W1': self.W1,
'W2': self.W2,
'V': self.V,
})
return config
I am saving the model with keras' ModelCheckpoint callback:
path = os.path.join(self.dir, 'model_{}'.format(self.timestamp))
callbacks.append(ModelCheckpoint(path, save_best_only=True, monitor='val_loss', mode='min'))
Later, I am loading the model like so:
self.model = load_model(path, custom_objects={'BahdanauAttention': BahdanauAttention, 'custom_loss': self.custom_loss})
This is the error message I am getting:
raise TypeError(error_message, kwarg)
TypeError: ('Keyword argument not understood:', 'W1')
and full traceback:
Traceback (most recent call last):
File "models/lstm.py", line 49, in load_model
'dollar_mape_loss': self.dollar_mape_loss})
File "venv/m/lib/python3.7/site-packages/tensorflow/python/keras/saving/save.py", line 187, in load_model
return saved_model_load.load(filepath, compile, options)
File "venv/m/lib/python3.7/site-packages/tensorflow/python/keras/saving/saved_model/load.py", line 121, in load
path, options=options, loader_cls=KerasObjectLoader)
File "venv/m/lib/python3.7/site-packages/tensorflow/python/saved_model/load.py", line 633, in load_internal
ckpt_options)
File "venv/m/lib/python3.7/site-packages/tensorflow/python/keras/saving/saved_model/load.py", line 194, in __init__
super(KerasObjectLoader, self).__init__(*args, **kwargs)
File "venv/m/lib/python3.7/site-packages/tensorflow/python/saved_model/load.py", line 130, in __init__
self._load_all()
File "venv/m/lib/python3.7/site-packages/tensorflow/python/keras/saving/saved_model/load.py", line 215, in _load_all
self._layer_nodes = self._load_layers()
File "venv/m/lib/python3.7/site-packages/tensorflow/python/keras/saving/saved_model/load.py", line 315, in _load_layers
layers[node_id] = self._load_layer(proto.user_object, node_id)
File "venv/m/lib/python3.7/site-packages/tensorflow/python/keras/saving/saved_model/load.py", line 341, in _load_layer
obj, setter = self._revive_from_config(proto.identifier, metadata, node_id)
File "venv/m/lib/python3.7/site-packages/tensorflow/python/keras/saving/saved_model/load.py", line 359, in _revive_from_config
self._revive_layer_from_config(metadata, node_id))
File "venv/m/lib/python3.7/site-packages/tensorflow/python/keras/saving/saved_model/load.py", line 417, in _revive_layer_from_config
generic_utils.serialize_keras_class_and_config(class_name, config))
File "venv/m/lib/python3.7/site-packages/tensorflow/python/keras/layers/serialization.py", line 175, in deserialize
printable_module_name='layer')
File "venv/m/lib/python3.7/site-packages/tensorflow/python/keras/utils/generic_utils.py", line 360, in deserialize_keras_object
return cls.from_config(cls_config)
File "venv/m/lib/python3.7/site-packages/tensorflow/python/keras/engine/base_layer.py", line 697, in from_config
return cls(**config)
File "models/lstm.py", line 310, in __init__
super(BahdanauAttention, self).__init__(**kwargs)
File "venv/m/lib/python3.7/site-packages/tensorflow/python/training/tracking/base.py", line 457, in _method_wrapper
result = method(self, *args, **kwargs)
File "venv/m/lib/python3.7/site-packages/tensorflow/python/keras/engine/base_layer.py", line 318, in __init__
generic_utils.validate_kwargs(kwargs, allowed_kwargs)
File "venv/m/lib/python3.7/site-packages/tensorflow/python/keras/utils/generic_utils.py", line 778, in validate_kwargs
raise TypeError(error_message, kwarg)
TypeError: ('Keyword argument not understood:', 'W1')
Similar questions suggest that the code is using different versions of Keras and TensorFlow. I am only using TensorFlow's Keras. These are the imports
from tensorflow.keras.models import load_model
from tensorflow.keras.callbacks import EarlyStopping, CSVLogger, ModelCheckpoint
from tensorflow.keras import layers
Following keras' documentation on custom layers, They recommend that any weights should not be initialized in __init__() but in build(). This way the weights do not need to be added to the config and the error will be resolved.
This is the updated custom layer class:
class BahdanauAttention(tf.keras.layers.Layer):
def __init__(self, units=30, **kwargs):
super(BahdanauAttention, self).__init__(**kwargs)
self.units = units
def build(self, input_shape):
self.W1 = tf.keras.layers.Dense(self.units)
self.W2 = tf.keras.layers.Dense(self.units)
self.V = tf.keras.layers.Dense(1)
def call(self, inputs, **kwargs):
query = inputs[0]
values = inputs[1]
query_with_time_axis = tf.expand_dims(query, 1)
score = self.V(tf.nn.tanh(
self.W1(query_with_time_axis) + self.W2(values)))
attention_weights = tf.nn.softmax(score, axis=1)
context_vector = attention_weights * values
context_vector = tf.reduce_sum(context_vector, axis=1)
return context_vector, attention_weights
def get_config(self):
config = super(BahdanauAttention, self).get_config()
config.update({
'units': self.units,
})
return config
I also have this problem.
I've tried a lot of methods and found that this method can be used.
first,build model
model = TextAttBiRNN(maxlen, max_features, embedding_dims).get_model()
model.compile(loss='binary_crossentropy',
optimizer='adam',
metrics=['accuracy'])
second, load weights:
I solved the problem with this:
model_file = "/content/drive/My Drive/dga/output_data/model_lstm_att_test_v6.h5"
model.load_weights(model_file)
then,we will find the modle can be use.
in this way,I avoided the previous questions.

How to freeze a keras model with custom keras layers(.h5) to tensorflow graph(.pb)?

I am trying to implement a Faster-RCNN model for object detection written by Yinghan Xu. After I have trained and saved the model with model_all.save('filename.h5'), I am trying to freeze the Keras model as TensorFlow graph (as .pb) for inference using keras_to_tensorflow.py written by Amir Abdi. But when I try to convert it, I get a ValueError: Unknown layer: roipoolingconv due to a custom RoiPoolingConv layer:
class RoiPoolingConv(Layer):
'''ROI pooling layer for 2D inputs.
See Spatial Pyramid Pooling in Deep Convolutional Networks for Visual Recognition,
K. He, X. Zhang, S. Ren, J. Sun
# Arguments
pool_size: int
Size of pooling region to use. pool_size = 7 will result in a 7x7 region.
num_rois: number of regions of interest to be used
# Input shape
list of two 4D tensors [X_img,X_roi] with shape:
X_img:
`(1, rows, cols, channels)`
X_roi:
`(1,num_rois,4)` list of rois, with ordering (x,y,w,h)
# Output shape
3D tensor with shape:
`(1, num_rois, channels, pool_size, pool_size)`
'''
def __init__(self, pool_size, num_rois, **kwargs):
self.dim_ordering = K.image_dim_ordering()
self.pool_size = pool_size
self.num_rois = num_rois
super(RoiPoolingConv, self).__init__(**kwargs)
def build(self, input_shape):
self.nb_channels = input_shape[0][3]
def compute_output_shape(self, input_shape):
return None, self.num_rois, self.pool_size, self.pool_size, self.nb_channels
def call(self, x, mask=None):
assert(len(x) == 2)
# x[0] is image with shape (rows, cols, channels)
img = x[0]
# x[1] is roi with shape (num_rois,4) with ordering (x,y,w,h)
rois = x[1]
input_shape = K.shape(img)
outputs = []
for roi_idx in range(self.num_rois):
x = rois[0, roi_idx, 0]
y = rois[0, roi_idx, 1]
w = rois[0, roi_idx, 2]
h = rois[0, roi_idx, 3]
x = K.cast(x, 'int32')
y = K.cast(y, 'int32')
w = K.cast(w, 'int32')
h = K.cast(h, 'int32')
# Resized roi of the image to pooling size (7x7)
rs = tf.image.resize_images(img[:, y:y+h, x:x+w, :], (self.pool_size, self.pool_size))
outputs.append(rs)
final_output = K.concatenate(outputs, axis=0)
# Reshape to (1, num_rois, pool_size, pool_size, nb_channels)
# Might be (1, 4, 7, 7, 3)
final_output = K.reshape(final_output, (1, self.num_rois, self.pool_size, self.pool_size, self.nb_channels))
# permute_dimensions is similar to transpose
final_output = K.permute_dimensions(final_output, (0, 1, 2, 3, 4))
return final_output
def get_config(self):
config = {'pool_size': self.pool_size,
'num_rois': self.num_rois}
base_config = super(RoiPoolingConv, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
I have looked at most of the resources out there and almost all of them suggest to comment out this layer. But since this layer is important for object detection, I was wondering if a workaround is possible or not.
The complete traceback of error (note: I've saved filename as freezekeras.py, contents are same as keras_to_tensorflow.py):
Using TensorFlow backend.
Traceback (most recent call last):
File "freezekeras.py", line 181, in <module>
app.run(main)
File "/usr/local/lib/python3.5/dist-packages/absl/app.py", line 300, in run
_run_main(main, args)
File "/usr/local/lib/python3.5/dist-packages/absl/app.py", line 251, in _run_main
sys.exit(main(argv))
File "freezekeras.py", line 127, in main
model = load_model(FLAGS.input_model, FLAGS.input_model_json, FLAGS.input_model_yaml)
File "freezekeras.py", line 105, in load_model
raise wrong_file_err
File "freezekeras.py", line 62, in load_model
model = keras.models.load_model(input_model_path)
File "/usr/local/lib/python3.5/dist-packages/keras/engine/saving.py", line 419, in load_model
model = _deserialize_model(f, custom_objects, compile)
File "/usr/local/lib/python3.5/dist-packages/keras/engine/saving.py", line 225, in _deserialize_model
model = model_from_config(model_config, custom_objects=custom_objects)
File "/usr/local/lib/python3.5/dist-packages/keras/engine/saving.py", line 458, in model_from_config
return deserialize(config, custom_objects=custom_objects)
File "/usr/local/lib/python3.5/dist-packages/keras/layers/__init__.py", line 55, in deserialize
printable_module_name='layer')
File "/usr/local/lib/python3.5/dist-packages/keras/utils/generic_utils.py", line 145, in deserialize_keras_object
list(custom_objects.items())))
File "/usr/local/lib/python3.5/dist-packages/keras/engine/network.py", line 1022, in from_config
process_layer(layer_data)
File "/usr/local/lib/python3.5/dist-packages/keras/engine/network.py", line 1008, in process_layer
custom_objects=custom_objects)
File "/usr/local/lib/python3.5/dist-packages/keras/layers/__init__.py", line 55, in deserialize
printable_module_name='layer')
File "/usr/local/lib/python3.5/dist-packages/keras/utils/generic_utils.py", line 138, in deserialize_keras_object
': ' + class_name)
ValueError: Unknown layer: RoiPoolingConv
Try to specify the custom layer explicitly:
model = load_model('my_model.h5', custom_objects={'RoiPoolingConv': RoiPoolingConv})
Obviously, you have to re-write the keras_to_tensorflow.py script. See Handling custom layers (or other custom objects) in saved models section under Keras FAQ.
Solution
specify custom layer while loading model in keras_to_tensorflow.py
model = keras.models.load_model(input_model_path, custom_objects={'RoiPoolingConv':RoiPoolingConv})
import RoiPoolingConv.py to keras_to_tensorflow project
specify default pool_size, num_rois for RoiPoolingConv
def __init__(self, pool_size = 7, num_rois = 32, **kwargs):

lstm dimension not match by tensorflow

I construct a LSTM network, and my input's dimension is 100*100*83 ( batch_size=100, steps = 100, char_vector = 83). I build a two LSTM layers which has 512 hidden units.
# coding: utf-8
from __future__ import print_function
import tensorflow as tf
import numpy as np
import time
class CharRNN:
def __init__(self, num_classes, batch_size=64, num_steps=50, lstm_size=128, num_layers =2,\
learning_rate = 0.001, grad_clip=5, keep_prob=0.001,sampling= False):
# True for SGD
if sampling == True:
self.batch_size, self.num_steps = 1,1
else:
self.batch_size, self.num_steps = batch_size, num_steps
tf.reset_default_graph()
self.inputs, self.targets, self.keep_prob = self.build_inputs(self.batch_size,self.num_steps)
self.keep_prob = keep_prob
self.cell, self.initial_state = self.build_lstm(lstm_size,num_layers,self.batch_size,self.keep_prob)
# print(self.cell.state_size)
x_one_hot = tf.one_hot(self.inputs, num_classes)
print("cell state size: ",self.cell.state_size)
print("cell initial state: ",self.initial_state)
print("this is inputs", self.inputs)
print("x_one_hot: ",x_one_hot)
outputs, state = tf.nn.dynamic_rnn(self.cell, x_one_hot, initial_state= self.initial_state)
def build_inputs(self, num_seqs, num_steps):
inputs = tf.placeholder(tf.int32, shape=(num_seqs, num_steps), name = "inputs")
targets = tf.placeholder(tf.int32, shape= (num_seqs, num_steps), name="targets")
print('inputs shape: ',inputs.shape)
keep_prob = tf.placeholder(tf.float32, name="keep_prob")
return inputs, targets, keep_prob
def build_lstm(self, lstm_size, num_layers, batch_size, keep_prob):
# construct lstm cell
lstm = tf.nn.rnn_cell.BasicLSTMCell(lstm_size)
# add dropout
drop = tf.nn.rnn_cell.DropoutWrapper(lstm, output_keep_prob= keep_prob)
# stack multiple rnn cells
cell = tf.nn.rnn_cell.MultiRNNCell([drop for _ in range(num_layers)])
initial_state = cell.zero_state(batch_size, tf.float32)
return cell, initial_state
if __name__ == '__main__':
len_vocab = 83
batch_size = 100
num_steps = 100
lstm_size = 512
num_layers = 2
learning_rate = 0.001
keep_prob = 0.5
epochs = 20
save_every_n = 200
print("h1")
model = CharRNN(len_vocab, batch_size = batch_size, num_steps=num_steps, lstm_size = lstm_size,num_layers=num_layers\
,learning_rate=learning_rate,sampling= False,keep_prob = keep_prob
I get a dimension not match error at tf.nn.dynamic_rnn.
error message is like this:
inputs shape: (100, 100)
cell state size: (LSTMStateTuple(c=512, h=512), LSTMStateTuple(c=512, h=512))
cell initial state: (LSTMStateTuple(c=<tf.Tensor 'MultiRNNCellZeroState/DropoutWrapperZeroState/BasicLSTMCellZeroState/zeros:0' shape=(100, 512) dtype=float32>, h=<tf.Tensor 'MultiRNNCellZeroState/DropoutWrapperZeroState/BasicLSTMCellZeroState/zeros_1:0' shape=(100, 512) dtype=float32>), LSTMStateTuple(c=<tf.Tensor 'MultiRNNCellZeroState/DropoutWrapperZeroState_1/BasicLSTMCellZeroState/zeros:0' shape=(100, 512) dtype=float32>, h=<tf.Tensor 'MultiRNNCellZeroState/DropoutWrapperZeroState_1/BasicLSTMCellZeroState/zeros_1:0' shape=(100, 512) dtype=float32>))
this is inputs Tensor("inputs:0", shape=(100, 100), dtype=int32)
x_one_hot: Tensor("one_hot:0", shape=(100, 100, 83), dtype=float32)
Traceback (most recent call last):
File "./seq2_minimal.py", line 70, in <module>
,learning_rate=learning_rate,sampling= False,keep_prob = keep_prob)
File "./seq2_minimal.py", line 32, in __init__
outputs, state = tf.nn.dynamic_rnn(self.cell, x_one_hot, initial_state= self.initial_state)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/rnn.py", line 614, in dynamic_rnn
dtype=dtype)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/rnn.py", line 777, in _dynamic_rnn_loop
swap_memory=swap_memory)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/control_flow_ops.py", line 2816, in while_loop
result = loop_context.BuildLoop(cond, body, loop_vars, shape_invariants)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/control_flow_ops.py", line 2640, in BuildLoop
pred, body, original_loop_vars, loop_vars, shape_invariants)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/control_flow_ops.py", line 2590, in _BuildLoop
body_result = body(*packed_vars_for_body)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/rnn.py", line 762, in _time_step
(output, new_state) = call_cell()
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/rnn.py", line 748, in <lambda>
call_cell = lambda: cell(input_t, state)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 183, in __call__
return super(RNNCell, self).__call__(inputs, state)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/layers/base.py", line 575, in __call__
outputs = self.call(inputs, *args, **kwargs)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 1066, in call
cur_inp, new_state = cell(cur_inp, cur_state)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 891, in __call__
output, new_state = self._cell(inputs, state, scope)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 183, in __call__
return super(RNNCell, self).__call__(inputs, state)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/layers/base.py", line 575, in __call__
outputs = self.call(inputs, *args, **kwargs)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 441, in call
value=self._linear([inputs, h]), num_or_size_splits=4, axis=1)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 1189, in __call__
res = math_ops.matmul(array_ops.concat(args, 1), self._weights)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/math_ops.py", line 1891, in matmul
a, b, transpose_a=transpose_a, transpose_b=transpose_b, name=name)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/gen_math_ops.py", line 2437, in _mat_mul
name=name)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
op_def=op_def)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 2958, in create_op
set_shapes_for_outputs(ret)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 2209, in set_shapes_for_outputs
shapes = shape_func(op)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 2159, in call_with_requiring
return call_cpp_shape_fn(op, require_shape_fn=True)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/framework/common_shapes.py", line 627, in call_cpp_shape_fn
require_shape_fn)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/framework/common_shapes.py", line 691, in _call_cpp_shape_fn_impl
raise ValueError(err.message)
ValueError: Dimensions must be equal, but are 1024 and 595 for 'rnn/while/rnn/multi_rnn_cell/cell_0/cell_0/basic_lstm_cell/MatMul_1' (op: 'MatMul') with input shapes: [100,1024], [595,2048].
I search that and find that tensorflow's lstm cell should adjust its input size automatically. But error message said this.
It shows
input size is [100, 1024] and lstm is [595, 2048].
Thanks firstly.
cell = tf.nn.rnn_cell.MultiRNNCell([drop for _ in range(num_layers)])
TO
cell = tf.nn.rnn_cell.MultiRNNCell([drop])
because your given input tensor and produces tensor are not the same.

Categories