Tensorflow saving subclass model which has multiple arguments to call() method - python
I am following the tensorflow neural machine translation tutorial:
https://www.tensorflow.org/tutorials/text/nmt_with_attention
I am trying to save the Encoder and Decoder models which are subclasses of the tf.keras.Model and work properly during training and inference, however I want to save the models. When I try to do so I get the following error:
TypeError: call() missing 1 required positional argument: 'initial_state'
Here is the code:
class Encoder(tf.keras.Model):
def __init__(self, vocab_size, embedding_matrix, n_units, batch_size):
super(Encoder, self).__init__()
self.n_units = n_units
self.batch_size = batch_size
self.embedding = Embedding(vocab_size, embedding_matrix.shape[1], weights=[embedding_matrix], trainable=True, mask_zero=True)
self.lstm = LSTM(n_units, return_sequences=True, return_state=True, recurrent_initializer="glorot_uniform")
def call(self, input_utterence, initial_state):
input_embed = self.embedding(input_utterence)
encoder_states, h1, c1 = self.lstm(input_embed, initial_state=initial_state)
return encoder_states, h1, c1
def create_initial_state(self):
return tf.zeros((self.batch_size, self.n_units))
encoder = Encoder(vocab_size, embedding_matrix, LSTM_DIM, BATCH_SIZE)
# do some training...
tf.saved_model.save(decoder, "encoder_model")
I also tried to make the call method take one input list argument only and unpack the variables I need within the method but then I get the following error when trying to save:
File "C:\Users\Fady\Documents\Machine Learning\chatbot\models\seq2seq_model.py", line 32, in call
input_utterence, initial_state = inputs
ValueError: too many values to unpack (expected 2)
You can export the model successfully if you package your inputs in a list. You also need to specify the input signatures to export your model, here your code with slight modifications which works
import tensorflow as tf
from tensorflow.keras.layers import Embedding, LSTM
import numpy as np
print('TensorFlow: ', tf.__version__)
vocab_size = 10000
LSTM_DIM = 256
BATCH_SIZE = 16
embedding_matrix = np.random.randn(vocab_size, 300)
class Encoder(tf.keras.Model):
def __init__(self, vocab_size, embedding_matrix, n_units, batch_size):
super(Encoder, self).__init__()
self.n_units = n_units
self.batch_size = batch_size
self.embedding = Embedding(vocab_size, embedding_matrix.shape[1], weights=[embedding_matrix], trainable=True, mask_zero=True)
self.lstm = LSTM(n_units, return_sequences=True, return_state=True, recurrent_initializer="glorot_uniform")
#tf.function
def call(self, inputs):
input_utterence, initial_state = inputs
input_embed = self.embedding(input_utterence)
encoder_states, h1, c1 = self.lstm(input_embed, initial_state=initial_state)
return encoder_states, h1, c1
def create_initial_state(self):
return tf.zeros((self.batch_size, self.n_units))
random_input = tf.random.uniform(shape=[BATCH_SIZE, 3], maxval=vocab_size, dtype=tf.int32)
encoder = Encoder(vocab_size, embedding_matrix, LSTM_DIM, BATCH_SIZE)
initial_state = [encoder.create_initial_state(), encoder.create_initial_state()]
_ = encoder([random_input, initial_state]) # required so that encoder.build is triggered
tf.saved_model.save(encoder, "encoder_model", signatures=encoder.call.get_concrete_function(
[
tf.TensorSpec(shape=[None, None], dtype=tf.int32, name='input_utterence'),
[
tf.TensorSpec(shape=[None, LSTM_DIM], dtype=tf.float32, name='initial_h'),
tf.TensorSpec(shape=[None, LSTM_DIM], dtype=tf.float32, name='initial_c')
]
]))
loaded_model = tf.saved_model.load('encoder_model')
loaded_model([random_input, initial_state])
output:
TensorFlow: 2.2.0-rc1
WARNING:tensorflow:From /home/dl_user/tf_stable/lib/python3.7/site-packages/tensorflow/python/ops/resource_variable_ops.py:1817: calling BaseResourceVariable.__init__ (from tensorflow.python.ops.resource_variable_ops) with constraint is deprecated and will be removed in a future version.
Instructions for updating:
If using Keras pass *_constraint arguments to layers.
INFO:tensorflow:Assets written to: encoder_model/assets
(<tf.Tensor: shape=(16, 3, 256), dtype=float32, numpy=
array([[[-0.06000457, 0.02422162, -0.05310762, ..., -0.01340707,
0.12212028, -0.02747637],
[ 0.13303193, 0.3119418 , -0.17995344, ..., -0.10185111,
0.09568192, 0.06919193],
[-0.08075664, -0.11490613, -0.20294832, ..., -0.14999194,
0.02177649, 0.05538464]],
[[-0.03792192, -0.08431012, 0.03687581, ..., -0.1768839 ,
-0.10469476, 0.08730042],
[-0.02956271, 0.43850696, -0.07400024, ..., 0.04097629,
0.209705 , 0.27194855],
[ 0.02529916, 0.18367583, -0.11409087, ..., 0.0458075 ,
0.2065246 , 0.22976378]],
[[ 0.04196627, 0.08302739, 0.02218204, ..., 0.07388053,
-0.05696848, -0.31895265],
[-0.00536443, 0.1566213 , -0.22412768, ..., 0.10560389,
0.20187919, -0.1896591 ],
[ 0.26364946, 0.13163888, 0.14586888, ..., 0.19517538,
0.17677066, -0.40476215]],
...,
[[ 0.10999472, 0.07398727, 0.23443945, ..., -0.1912791 ,
-0.0195728 , 0.11717851],
[ 0.03978832, 0.07587367, 0.16567066, ..., -0.29463592,
0.05950819, 0.0242265 ],
[ 0.2505787 , 0.15849623, 0.06635283, ..., -0.17969091,
0.12549783, -0.11459641]],
[[-0.20408148, 0.04629526, 0.00601436, ..., 0.21321473,
0.04952445, -0.0129672 ],
[-0.14671509, 0.2911171 , 0.13047697, ..., -0.03531414,
-0.16794083, 0.01575338],
[-0.08337164, 0.08723269, 0.16235027, ..., 0.07919721,
0.05701642, 0.15379705]],
[[-0.2747393 , 0.24351111, -0.05829309, ..., -0.00448833,
0.07568972, 0.03978251],
[-0.16282909, -0.04586324, -0.0054924 , ..., 0.11050001,
0.1312355 , 0.16555254],
[ 0.07759799, -0.07308074, -0.10038756, ..., 0.18139914,
0.07769153, 0.1375772 ]]], dtype=float32)>,
<tf.Tensor: shape=(16, 256), dtype=float32, numpy=
array([[-0.08075664, -0.11490613, -0.20294832, ..., -0.14999194,
0.02177649, 0.05538464],
[ 0.02529916, 0.18367583, -0.11409087, ..., 0.0458075 ,
0.2065246 , 0.22976378],
[ 0.26364946, 0.13163888, 0.14586888, ..., 0.19517538,
0.17677066, -0.40476215],
...,
[ 0.2505787 , 0.15849623, 0.06635283, ..., -0.17969091,
0.12549783, -0.11459641],
[-0.08337164, 0.08723269, 0.16235027, ..., 0.07919721,
0.05701642, 0.15379705],
[ 0.07759799, -0.07308074, -0.10038756, ..., 0.18139914,
0.07769153, 0.1375772 ]], dtype=float32)>,
<tf.Tensor: shape=(16, 256), dtype=float32, numpy=
array([[-0.32829475, -0.18770668, -0.2956414 , ..., -0.2427501 ,
0.03146099, 0.16033864],
[ 0.05112522, 0.6664379 , -0.19836858, ..., 0.10015503,
0.511694 , 0.51550364],
[ 0.3379809 , 0.7145362 , 0.22311993, ..., 0.372106 ,
0.25914627, -0.81374717],
...,
[ 0.36742535, 0.29009506, 0.13245934, ..., -0.4318537 ,
0.26666188, -0.20086129],
[-0.17384854, 0.22998339, 0.27335796, ..., 0.09973672,
0.10726923, 0.47339764],
[ 0.22148325, -0.11998752, -0.16339599, ..., 0.31903535,
0.20365229, 0.28087002]], dtype=float32)>)
Related
Custom activation function in Tensorflow with trainable params
I am trying to implement a custom version of the PElu activation function in tensorflow. The custom thing about this activation is the knee of the relu is smoothed. I got the equation from this paper. Here is the code: from keras import backend as K import tensorflow as tf def SMU_LeakyPRElu(x, alpha=2.5,u=1.0): return ((1+alpha)*x)+((1-alpha)*x)*(tf.math.erf(u*(1-alpha)*x)) from keras.layers import Layer class SMU_LeakyPRElu(Layer): def __init__(self, alpha=2.5, u=1.0, trainable=False, **kwargs): super(SMU_LeakyPRElu, self).__init__(**kwargs) self.supports_masking = True self.alpha = alpha self.u = u self.trainable = trainable def build(self, input_shape): self.alpha_factor = K.variable(self.alpha, dtype=K.floatx(), name='alpha_factor') self.u_factor = K.variable(self.u, dtype=K.floatx(), name='u_factor') if self.trainable: self._trainable_weights.append(self.alpha_factor) self._trainable_weights.append(self.u_factor) super(SMU_LeakyPRElu, self).build(input_shape) def call(self, inputs, mask=None): return SMU_LeakyPRElu(inputs, self.alpha_factor,self.u_factor) def get_config(self): config = {'alpha': self.get_weights()[0] if self.trainable else self.alpha, 'u' : self.get_weights()[1] if self.trainable else self.u, 'trainable': self.trainable} base_config = super(SMU_LeakyPRElu, self).get_config() return dict(list(base_config.items()) + list(config.items())) def compute_output_shape(self, input_shape): return input_shape x = tf.random.normal((1,10,4)) print(x) input_shape = (1,10,4) input_layer = tf.keras.layers.Input(shape=input_shape[1:], name="input_layer") layer_1 = tf.keras.layers.Conv1D(2, 1,padding = 'valid', input_shape=input_shape[:1])(input_layer) layer_2 = SMU_LeakyPRElu(alpha=2.5,u=1.0,trainable=True)(layer_1) model = tf.keras.models.Model(input_layer, layer_2, name="model") model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005), loss="categorical_crossentropy", run_eagerly=True) print(model.summary()) result = model.predict(x) print(result) print(result.shape) I implemented this code using a example from this post at Data Science SE. Error: tf.Tensor( [[[ 1.0467066 -1.1833347 1.5384735 2.078511 ] [-1.6025988 -0.30846047 0.8019808 0.3113866 ] [ 0.58313304 -0.90643036 -0.3926888 -0.6210553 ] [ 0.16505387 -0.5930619 0.6983522 -0.12211661] [ 0.06077941 -0.11117186 -1.2540722 -0.32234746] [ 0.41838828 0.7090619 0.30999053 0.10459523] [ 0.35603598 -0.2695868 -0.17901018 -0.09100233] [ 1.2746769 0.8311447 0.02825974 -0.48021472] [-1.536545 -0.24765234 -0.36437735 -1.1891246 ] [ 0.7531206 -0.56109476 -0.65761757 0.19102335]]], shape=(1, 10, 4), dtype=float32) --------------------------------------------------------------------------- TypeError Traceback (most recent call last) <ipython-input-50-c9d490dfd533> in <module> 5 input_layer = tf.keras.layers.Input(shape=input_shape[1:], name="input_layer") 6 layer_1 = tf.keras.layers.Conv1D(2, 1,padding = 'valid', input_shape=input_shape[:1])(input_layer) ----> 7 layer_2 = SMU_LeakyPRElu(alpha=2.5,u=1.0,trainable=True)(layer_1) 8 9 model = tf.keras.models.Model(input_layer, layer_2, name="model") 1 frames /usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/type_spec.py in type_spec_from_value(value) 888 3, "Failed to convert %r to tensor: %s" % (type(value).__name__, e)) 889 --> 890 raise TypeError(f"Could not build a TypeSpec for {value} of " 891 f"unsupported type {type(value)}.") 892 TypeError: Could not build a TypeSpec for <__main__.SMU_LeakyPRElu object at 0x7fde698f7850> of unsupported type <class '__main__.SMU_LeakyPRElu'>. I don't understand this error. How should I implement this function as custom activation function with trainable parameters alpha and u.?
The problem is that you have named your activation function and the custom layer you created the same thing. I refactored your code for you. Code: import tensorflow as tf from typing import Optional from tensorflow.keras import Model from tensorflow.keras.layers import Conv1D from tensorflow.keras.layers import Input from tensorflow.keras.layers import Layer from tensorflow.keras.optimizers import Adam class SMULeakyPReLU(Layer): """``SMULeakyPReLU``.""" def __init__(self, alpha: float = 2.5, u: float = 1., trainable: bool = False, **kwargs): super().__init__(**kwargs) self.alpha = alpha self.u = u self.trainable = trainable def build(self, input_shape: tf.TensorShape): super().build(input_shape) self.alpha_factor = tf.Variable( self.alpha, dtype=tf.float32, trainable=self.trainable, name="alpha_factor") self.u_factor = tf.Variable( self.u, dtype=tf.float32, name="u_factor") def call(self, inputs: tf.Tensor, mask: Optional[tf.Tensor] = None ) -> tf.Tensor: fst = (1. + self.alpha_factor) * inputs snd = (1. - self.alpha_factor) * inputs trd = tf.math.erf(self.u_factor * (1. - self.alpha_factor) * inputs) return fst * snd * trd def get_config(self): config = { "alpha": self.get_weights()[0] if self.trainable else self.alpha, "u": self.get_weights()[1] if self.trainable else self.u, "trainable": self.trainable } base_config = super().get_config() return dict(list(base_config.items()) + list(config.items())) Test # fake data x = tf.random.normal((1, 10, 4)) # create network input_layer = Input(shape=x.shape[1:], name="input_layer") layer_1 = Conv1D(2, 1, padding="valid")(input_layer) layer_2 = SMULeakyPReLU(alpha=2.5, u=1.0, trainable=True)(layer_1) # create model model = Model(input_layer, layer_2, name="model") # compile model and summary model.compile( optimizer=Adam(learning_rate=5e-4), loss="categorical_crossentropy", run_eagerly=True) print(model.summary()) # forward pass result = model.predict(x) print(result) print(result.shape) # Model: "model" # _________________________________________________________________ # Layer (type) Output Shape Param # # ================================================================= # input_layer (InputLayer) [(None, 10, 4)] 0 # # conv1d_1 (Conv1D) (None, 10, 2) 10 # # smu_leaky_p_re_lu_1 (SMULea (None, 10, 2) 2 # kyPReLU) # # ================================================================= # Total params: 12 # Trainable params: 12 # Non-trainable params: 0 # _________________________________________________________________ # None # 1/1 [==============================] - 0s 13ms/step # [[[-1.6503611e+01 -3.5051659e+01] # [ 4.0098205e-02 1.5923592e+00] # [-1.4898951e+00 7.5487376e-05] # [ 3.1900513e+01 2.8786476e+01] # [ 1.9207695e+01 3.6511238e+01] # [-6.8302655e-01 -4.7705490e-02] # [ 9.6008554e-03 7.5611029e+00] # [ 4.7136435e-01 2.5528276e+00] # [ 2.6859209e-01 3.3496175e+00] # [ 1.4372441e+01 3.4978668e+01]]] # (1, 10, 2)
tf.GradientTape giving None gradient
I'm trying to write a custom training loop. After creating the model, I have added some extra trainable parameter to some layers of my model. I have used these extra parameters to update my original parameter on every forward pass. But when I'm calculating the gradient, it's giving None for the extra parameter that i have added last. Code is given below: model = Sequential() model.add(tf.keras.layers.Flatten(input_shape=(1,1))) model.add(Dense(1, activation='relu')) model.add(Dense(2, activation='softmax')) model.layers[1].add_weight(name="x1", shape=(1,), initializer=tf.keras.initializers.Constant(value=1.0),trainable=True) dataset = tf.data.Dataset.from_tensor_slices((feature, labels)) for i, (x_batch_train, y_batch_train) in enumerate(dataset): with tf.GradientTape() as tape: for par in model.layers[1].trainable_weights: if "x1" in par.name: bits = tf.convert_to_tensor(par) for par in model.layers[1].trainable_weights: if "kernel" in par.name: par = bits + 1.0 x = model(x_batch_train, training = True) loss = tf.keras.losses.SparseCategoricalCrossentropy(y_batch_train, x) val = tape.gradient(loss, model.trainable_weights) for v in val: print(v) Here, I have added one extra parameter called x1 and it's updating the kernel of Dense layer. But I'm getting None gradient for x1 parameter. The output is: tf.Tensor([[0.]], shape=(1, 1), dtype=float32) tf.Tensor([-0.], shape=(1,), dtype=float32) None tf.Tensor([[0. 0.]], shape=(1, 2), dtype=float32) tf.Tensor([-0.5 0.5], shape=(2,), dtype=float32) Why it's happening?
The problem is that the changes you are making to the layer's weights have no direct connection to the output of the model in the context of tf.GradientTape and are therefore not tracked. You could solve this with a simple custom layer: import tensorflow as tf class DenseLayer(tf.keras.layers.Layer): def __init__(self, units=1): super(DenseLayer, self).__init__() self.units = units def build(self, input_shape): self.w = self.add_weight("kernel", shape=[int(input_shape[-1]), self.units], trainable=True) self.b = self.add_weight(shape=(self.units,), initializer="zeros", trainable=True) self.bits = self.add_weight(name="x1", shape=[int(input_shape[-1]), self.units], initializer=tf.keras.initializers.ones(), trainable=True) def call(self, inputs): return tf.nn.relu(tf.matmul(inputs, (self.w + self.bits + 1.0)) + self.b) dense_layer = DenseLayer(1) model = tf.keras.Sequential() model.add(tf.keras.layers.Flatten(input_shape=(1,1))) model.add(dense_layer) model.add(tf.keras.layers.Dense(2, activation='softmax')) print(model.summary()) dataset = tf.data.Dataset.from_tensor_slices((tf.random.normal((50, 1, 1)), tf.random.uniform((50, ), maxval=2, dtype=tf.int32))).batch(2) loss_fn = tf.keras.losses.SparseCategoricalCrossentropy() optimizer = tf.keras.optimizers.Adam(learning_rate=0.01) for i, (x_batch_train, y_batch_train) in enumerate(dataset): with tf.GradientTape() as tape: y = model(x_batch_train, training = True) loss = loss_fn(y_batch_train, y) val = tape.gradient(loss, model.trainable_weights) for v in val: print(v) optimizer.apply_gradients(zip(val, model.trainable_variables))
Your idea is good I didn't extend from the last answer but this question is asked once about the custom layer and that you can do it for lstm by training as model.fit( ... ) It is not about the Gradient Tape. [ Sample - Dense ]: """"""""""""""""""""""""""""""""""""""""""""""""""""""""" : Class / Function """"""""""""""""""""""""""""""""""""""""""""""""""""""""" class MyDenseLayer(tf.keras.layers.Layer): def __init__(self, num_outputs, num_add): super(MyDenseLayer, self).__init__() self.num_outputs = num_outputs self.num_add = num_add def build(self, input_shape): self.kernel = self.add_weight("kernel", shape=[int(input_shape[-1]), self.num_outputs]) def call(self, inputs): temp = tf.add( inputs, self.num_add ) temp = tf.matmul(temp, self.kernel) return temp """"""""""""""""""""""""""""""""""""""""""""""""""""""""" : Model Initialize """"""""""""""""""""""""""""""""""""""""""""""""""""""""" model = tf.keras.models.Sequential([ tf.keras.layers.InputLayer(input_shape=( 32, 32, 4 )), tf.keras.layers.Normalization(mean=3., variance=2.), tf.keras.layers.Normalization(mean=4., variance=6.), tf.keras.layers.Conv2D(32, (3, 3), activation='relu'), tf.keras.layers.MaxPooling2D((2, 2)), tf.keras.layers.Dense(128, activation='relu'), tf.keras.layers.Reshape((128, 225)), tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(96, return_sequences=True, return_state=False)), tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(96)), ]) layer = MyDenseLayer(10, 5) model.add(layer) model.add(tf.keras.layers.Flatten()) model.add(tf.keras.layers.Dense(192, activation='relu')) model.add(tf.keras.layers.Dense(10, activation='softmax')) model.summary() [ Output ]:
Multi lstm layers and multi lstm in pytorch
I am using two ways to create a two-layer lstm as shown in the following two codes. Can anyone tell me why the outputs are not the same? and If you have the experience, can you tell me which one is better ? Thanks so much ! (Thanks for the suggestion of initializing them to have the same weights and bias. I add this suggestion in the original code. Despite the same initial parameters, their outputs are still not the same...) The first way using num_layers: import torch import torch.nn as nn import torch.nn.functional as F import torch.optim as optim torch.manual_seed(1) lstm = nn.LSTM(3, 3,2) # Input dim is 3, output dim is 3 inputs = [torch.randn(1, 3) for _ in range(5)] # make a sequence of length 5 weight_ih_0=None weight_hh_0=None # bias_ih_0=None # bias_hh_0=None weight_ih_1=None weight_hh_1=None # bias_ih_1=None # bias_hh_1=None for name, param in lstm.named_parameters(): if 'bias' in name: # print(f'bias {name} before init: {param}') nn.init.constant_(param, 0.0) # print(f'bias {name} after init: {param}') elif 'weight' in name: # print(f'weight {name} before init: {param}') nn.init.xavier_normal_(param) print(f'weight {name} after init: {param}') for name, param in lstm.named_parameters(): if 'weight_ih_l0' in name: weight_ih_0=param if 'weight_hh_l0' in name: weight_hh_0=param if 'weight_ih_l1' in name: weight_ih_1=param if 'weight_hh_l1' in name: weight_hh_1=param print(f'inputs: {inputs}') # initialize the hidden state. hidden = (torch.zeros(2, 1, 3), torch.zeros(2, 1, 3)) idx=0 for i in inputs: print(f'idx: {idx}') # print(f'i: {i}') idx+=1 # Step through the sequence one element at a time. # after each step, hidden contains the hidden out, hidden = lstm(i.view(1, 1, -1), hidden) print(out) print("==========") # print(hidden) The outputs is: weight weight_ih_l0 after init: Parameter containing: tensor([[ 0.6025, -0.1577, -0.0990], [-0.5255, 0.4554, 0.4651], [ 0.1428, 0.1414, -0.0291], [ 0.1248, 0.3465, -0.5053], [ 0.6295, -0.8635, -0.3394], [ 0.1072, 0.0786, 0.3427], [ 0.5352, -0.2032, 0.8816], [ 0.3727, -0.1608, -0.6332], [-0.3745, 0.1903, -0.1654], [-0.0460, -0.2148, 0.7737], [-0.1980, -0.8980, -0.3470], [-0.1130, 0.6074, 0.1844]], requires_grad=True) weight weight_hh_l0 after init: Parameter containing: tensor([[-0.0719, -0.0122, 0.2626], [ 0.3887, -0.3044, -0.4356], [-0.8422, 0.2204, 0.1151], [ 0.4171, 0.1116, -0.2114], [ 0.2061, -0.3204, -0.0983], [ 0.4791, -0.5683, -0.3928], [-0.3196, -0.1726, -0.0732], [-0.3058, -0.5667, -0.0211], [-0.0832, -0.3168, 0.1241], [-0.4197, 0.0525, 0.0741], [ 0.3849, 0.0481, -0.3130], [ 0.5788, 0.6312, -0.3627]], requires_grad=True) weight weight_ih_l1 after init: Parameter containing: tensor([[ 3.6955e-02, 7.1276e-02, -4.3073e-01], [-5.2666e-01, 2.7323e-02, 1.2894e-01], [ 3.7136e-01, 3.3969e-01, 1.9601e-01], [ 3.5802e-01, -4.3600e-01, -1.7962e-01], [ 8.3209e-01, 1.7189e-01, 2.2195e-01], [-2.1302e-02, -1.6867e-01, -1.3460e-01], [ 1.3446e-01, 1.7708e-01, -5.6676e-01], [-2.3697e-01, -2.8254e-02, -2.2063e-01], [-2.0928e-01, 3.4973e-01, 3.5858e-04], [-5.0565e-01, -6.8619e-02, 3.7702e-01], [-9.0796e-02, -1.7238e-01, 4.7868e-01], [-1.1565e-01, -6.7956e-02, -2.1049e-01]], requires_grad=True) weight weight_hh_l1 after init: Parameter containing: tensor([[-0.3017, -0.0811, -0.6554], [ 0.2665, -0.2052, -0.0577], [ 0.5493, -0.5094, 0.2167], [ 0.1210, -0.3868, -0.2293], [-0.0991, 0.6744, -0.0114], [-0.0343, -0.6136, 0.4856], [ 0.0505, 0.3920, -0.1662], [ 0.1163, -0.1296, 0.2505], [-0.1373, -0.8803, -0.4666], [-0.0230, -0.0346, -0.8451], [ 0.2032, 0.1847, -0.0758], [ 0.2533, 0.1532, 0.8224]], requires_grad=True) inputs: [tensor([[1.5381, 1.4673, 1.5951]]), tensor([[-1.5279, 1.0156, -0.2020]]), tensor([[-1.2865, 0.8231, -0.6101]]), tensor([[-1.2960, -0.9434, 0.6684]]), tensor([[ 1.1628, -0.3229, 1.8782]])] idx: 0 tensor([[[ 0.0374, -0.0085, -0.0240]]], grad_fn=<StackBackward>) ========== idx: 1 tensor([[[ 0.0073, -0.0110, -0.0296]]], grad_fn=<StackBackward>) ========== idx: 2 tensor([[[-0.0314, -0.0147, -0.0136]]], grad_fn=<StackBackward>) ========== idx: 3 tensor([[[-0.0458, -0.0118, -0.0254]]], grad_fn=<StackBackward>) ========== idx: 4 tensor([[[-0.0096, -0.0281, -0.0440]]], grad_fn=<StackBackward>) ========== The second way creating two individual lstm: import copy torch.manual_seed(1) lstm = nn.LSTMCell(3, 3) # Input dim is 3, output dim is 3 lstm2 = nn.LSTMCell(3, 3) # Input dim is 3, output dim is 3 inputs = [torch.randn(1, 3) for _ in range(5)] # make a sequence of length 5 for name, param in lstm.named_parameters(): if 'bias' in name: # print(f'lstm bias {name} before init: {param}') nn.init.constant_(param, 0.0) # print(f'lstm bias {name} after init: {param}') elif 'weight' in name: # print(f'lstm weight {name} before init: {param}') if 'weight_ih' in name: param=copy.deepcopy(weight_ih_0) print(f'lstm {name} after init: {param}') if 'weight_hh' in name: param=copy.deepcopy(weight_hh_0) print(f'lstm {name} after init: {param}') for name, param in lstm2.named_parameters(): if 'bias' in name: # print(f'lstm2 bias {name} before init: {param}') nn.init.constant_(param, 0.0) # print(f'lstm2 bias {name} after init: {param}') elif 'weight' in name: # print(f'lstm2 weight {name} before init: {param}') if 'weight_ih' in name: param=copy.deepcopy(weight_ih_1) print(f'lstm2 {name} after init: {param}') if 'weight_hh' in name: param=copy.deepcopy(weight_hh_1) print(f'lstm2 {name} after init: {param}') print(f'inputs: {inputs}') # initialize the hidden state. hidden = torch.zeros(1, 3) cell= torch.zeros(1, 3) idx=0 for i in inputs: print(f'idx: {idx}') idx+=1 # Step through the sequence one element at a time. # after each step, hidden contains the hidden hidden, cell = lstm(i.view(1, -1), (hidden,cell)) # print(hidden.shape) hidden, cell = lstm2(hidden, (hidden,cell)) print(hidden) print("==========") And the output is: lstm weight_ih after init: Parameter containing: tensor([[ 0.6025, -0.1577, -0.0990], [-0.5255, 0.4554, 0.4651], [ 0.1428, 0.1414, -0.0291], [ 0.1248, 0.3465, -0.5053], [ 0.6295, -0.8635, -0.3394], [ 0.1072, 0.0786, 0.3427], [ 0.5352, -0.2032, 0.8816], [ 0.3727, -0.1608, -0.6332], [-0.3745, 0.1903, -0.1654], [-0.0460, -0.2148, 0.7737], [-0.1980, -0.8980, -0.3470], [-0.1130, 0.6074, 0.1844]], requires_grad=True) lstm weight_hh after init: Parameter containing: tensor([[-0.0719, -0.0122, 0.2626], [ 0.3887, -0.3044, -0.4356], [-0.8422, 0.2204, 0.1151], [ 0.4171, 0.1116, -0.2114], [ 0.2061, -0.3204, -0.0983], [ 0.4791, -0.5683, -0.3928], [-0.3196, -0.1726, -0.0732], [-0.3058, -0.5667, -0.0211], [-0.0832, -0.3168, 0.1241], [-0.4197, 0.0525, 0.0741], [ 0.3849, 0.0481, -0.3130], [ 0.5788, 0.6312, -0.3627]], requires_grad=True) lstm2 weight_ih after init: Parameter containing: tensor([[ 3.6955e-02, 7.1276e-02, -4.3073e-01], [-5.2666e-01, 2.7323e-02, 1.2894e-01], [ 3.7136e-01, 3.3969e-01, 1.9601e-01], [ 3.5802e-01, -4.3600e-01, -1.7962e-01], [ 8.3209e-01, 1.7189e-01, 2.2195e-01], [-2.1302e-02, -1.6867e-01, -1.3460e-01], [ 1.3446e-01, 1.7708e-01, -5.6676e-01], [-2.3697e-01, -2.8254e-02, -2.2063e-01], [-2.0928e-01, 3.4973e-01, 3.5858e-04], [-5.0565e-01, -6.8619e-02, 3.7702e-01], [-9.0796e-02, -1.7238e-01, 4.7868e-01], [-1.1565e-01, -6.7956e-02, -2.1049e-01]], requires_grad=True) lstm2 weight_hh after init: Parameter containing: tensor([[-0.3017, -0.0811, -0.6554], [ 0.2665, -0.2052, -0.0577], [ 0.5493, -0.5094, 0.2167], [ 0.1210, -0.3868, -0.2293], [-0.0991, 0.6744, -0.0114], [-0.0343, -0.6136, 0.4856], [ 0.0505, 0.3920, -0.1662], [ 0.1163, -0.1296, 0.2505], [-0.1373, -0.8803, -0.4666], [-0.0230, -0.0346, -0.8451], [ 0.2032, 0.1847, -0.0758], [ 0.2533, 0.1532, 0.8224]], requires_grad=True) inputs: [tensor([[1.5381, 1.4673, 1.5951]]), tensor([[-1.5279, 1.0156, -0.2020]]), tensor([[-1.2865, 0.8231, -0.6101]]), tensor([[-1.2960, -0.9434, 0.6684]]), tensor([[ 1.1628, -0.3229, 1.8782]])] idx: 0 tensor([[-0.0152, -0.0344, 0.0368]], grad_fn=<MulBackward0>) ========== idx: 1 tensor([[-0.0265, -0.0143, 0.0730]], grad_fn=<MulBackward0>) ========== idx: 2 tensor([[-0.0210, -0.0033, 0.0529]], grad_fn=<MulBackward0>) ========== idx: 3 tensor([[-0.0580, -0.0201, 0.1194]], grad_fn=<MulBackward0>) ========== idx: 4 tensor([[-0.0672, -0.0801, 0.1165]], grad_fn=<MulBackward0>) ==========
Although you initialized two LSTMs, obviously the initial weights of the two are different. You can verify this with the following code: for p in lstm.parameters(): print(p) I may prefer the first method, because this method does not require us to manually link between multiple layers.
I have the answer now. At the very beginning, I was confused with the hidden state and input state of the second lstm layer. Thus, for stacked lstm with num_layers=2, we initialize the hidden states with the number of 2, since each lstm layer needs the initial hidden state, while the second lstm layer takes the output hidden state of the first lstm layer as its input. And for the model containing individual lstm, since, for the above-stacked lstm model, each lstm layer has the initial hidden states being 0, thus, we should initialize the two individual lstms to both have zero hidden states. In addition, I made a mistake to initialize the weight and bias values. As a result, to make the above two methods have the same outputs, I use the following codes: the first method: import torch import torch.nn as nn import torch.nn.functional as F import torch.optim as optim torch.manual_seed(1) lstm = nn.LSTM(3, 3,2) # Input dim is 3, output dim is 3 inputs = [torch.randn(1, 3) for _ in range(5)] # make a sequence of length 5 weight_ih_0=None weight_hh_0=None # bias_ih_0=None # bias_hh_0=None weight_ih_1=None weight_hh_1=None # bias_ih_1=None # bias_hh_1=None for name, param in lstm.named_parameters(): if 'bias' in name: # print(f'bias {name} before init: {param}') nn.init.constant_(param, 0.0) # print(f'bias {name} after init: {param}') elif 'weight' in name: # print(f'weight {name} before init: {param}') nn.init.xavier_normal_(param) print(f'weight {name} after init: {param}') for name, param in lstm.named_parameters(): if 'weight_ih_l0' in name: weight_ih_0=param if 'weight_hh_l0' in name: weight_hh_0=param if 'weight_ih_l1' in name: weight_ih_1=param if 'weight_hh_l1' in name: weight_hh_1=param print(f'inputs: {inputs}') # initialize the hidden state. hidden = (torch.zeros(2, 1, 3), torch.zeros(2, 1, 3)) idx=0 for i in inputs: print(f'idx: {idx}') # print(f'i: {i}') idx+=1 # Step through the sequence one element at a time. # after each step, hidden contains the hidden out, hidden = lstm(i.view(1, 1, -1), hidden) print(out) # print(hidden) print("==========") And the output is: weight weight_ih_l0 after init: Parameter containing: tensor([[ 0.6025, -0.1577, -0.0990], [-0.5255, 0.4554, 0.4651], [ 0.1428, 0.1414, -0.0291], [ 0.1248, 0.3465, -0.5053], [ 0.6295, -0.8635, -0.3394], [ 0.1072, 0.0786, 0.3427], [ 0.5352, -0.2032, 0.8816], [ 0.3727, -0.1608, -0.6332], [-0.3745, 0.1903, -0.1654], [-0.0460, -0.2148, 0.7737], [-0.1980, -0.8980, -0.3470], [-0.1130, 0.6074, 0.1844]], requires_grad=True) weight weight_hh_l0 after init: Parameter containing: tensor([[-0.0719, -0.0122, 0.2626], [ 0.3887, -0.3044, -0.4356], [-0.8422, 0.2204, 0.1151], [ 0.4171, 0.1116, -0.2114], [ 0.2061, -0.3204, -0.0983], [ 0.4791, -0.5683, -0.3928], [-0.3196, -0.1726, -0.0732], [-0.3058, -0.5667, -0.0211], [-0.0832, -0.3168, 0.1241], [-0.4197, 0.0525, 0.0741], [ 0.3849, 0.0481, -0.3130], [ 0.5788, 0.6312, -0.3627]], requires_grad=True) weight weight_ih_l1 after init: Parameter containing: tensor([[ 3.6955e-02, 7.1276e-02, -4.3073e-01], [-5.2666e-01, 2.7323e-02, 1.2894e-01], [ 3.7136e-01, 3.3969e-01, 1.9601e-01], [ 3.5802e-01, -4.3600e-01, -1.7962e-01], [ 8.3209e-01, 1.7189e-01, 2.2195e-01], [-2.1302e-02, -1.6867e-01, -1.3460e-01], [ 1.3446e-01, 1.7708e-01, -5.6676e-01], [-2.3697e-01, -2.8254e-02, -2.2063e-01], [-2.0928e-01, 3.4973e-01, 3.5858e-04], [-5.0565e-01, -6.8619e-02, 3.7702e-01], [-9.0796e-02, -1.7238e-01, 4.7868e-01], [-1.1565e-01, -6.7956e-02, -2.1049e-01]], requires_grad=True) weight weight_hh_l1 after init: Parameter containing: tensor([[-0.3017, -0.0811, -0.6554], [ 0.2665, -0.2052, -0.0577], [ 0.5493, -0.5094, 0.2167], [ 0.1210, -0.3868, -0.2293], [-0.0991, 0.6744, -0.0114], [-0.0343, -0.6136, 0.4856], [ 0.0505, 0.3920, -0.1662], [ 0.1163, -0.1296, 0.2505], [-0.1373, -0.8803, -0.4666], [-0.0230, -0.0346, -0.8451], [ 0.2032, 0.1847, -0.0758], [ 0.2533, 0.1532, 0.8224]], requires_grad=True) inputs: [tensor([[1.5381, 1.4673, 1.5951]]), tensor([[-1.5279, 1.0156, -0.2020]]), tensor([[-1.2865, 0.8231, -0.6101]]), tensor([[-1.2960, -0.9434, 0.6684]]), tensor([[ 1.1628, -0.3229, 1.8782]])] idx: 0 tensor([[[ 0.0374, -0.0085, -0.0240]]], grad_fn=<StackBackward>) ========== idx: 1 tensor([[[ 0.0073, -0.0110, -0.0296]]], grad_fn=<StackBackward>) ========== idx: 2 tensor([[[-0.0314, -0.0147, -0.0136]]], grad_fn=<StackBackward>) ========== idx: 3 tensor([[[-0.0458, -0.0118, -0.0254]]], grad_fn=<StackBackward>) ========== idx: 4 tensor([[[-0.0096, -0.0281, -0.0440]]], grad_fn=<StackBackward>) ========== The second method: torch.manual_seed(1) lstm = nn.LSTM(3, 3,1) # Input dim is 3, output dim is 3 lstm2 = nn.LSTM(3, 3,1) # Input dim is 3, output dim is 3 inputs = [torch.randn(1, 3) for _ in range(5)] # make a sequence of length 5 print(f'inputs: {inputs}') # initialize the hidden state. hidden1 = (torch.zeros(1, 1, 3), torch.zeros(1, 1, 3)) hidden2 = (torch.zeros(1, 1, 3), torch.zeros(1, 1, 3)) for name, param in lstm.named_parameters(): if 'bias' in name: # print(f'lstm bias {name} before init: {param}') nn.init.constant_(param, 0.0) # print(f'lstm bias {name} after init: {param}') elif 'weight' in name: # print(f'lstm weight {name} before init: {param}') if 'weight_ih' in name: lstm.weight_ih_l0.data=weight_ih_0 print(f'lstm {name} after init: {param}') if 'weight_hh' in name: lstm.weight_hh_l0.data=weight_hh_0 print(f'lstm {name} after init: {param}') for name, param in lstm2.named_parameters(): if 'bias' in name: # print(f'lstm2 bias {name} before init: {param}') nn.init.constant_(param, 0.0) # print(f'lstm2 bias {name} after init: {param}') elif 'weight' in name: # print(f'lstm2 weight {name} before init: {param}') if 'weight_ih' in name: lstm2.weight_ih_l0.data=weight_ih_1 print(f'lstm2 {name} after init: {param}') if 'weight_hh' in name: lstm2.weight_hh_l0.data=weight_hh_1 print(f'lstm2 {name} after init: {param}') for name, param in lstm2.named_parameters(): if 'weight' in name: # print(f'lstm2 weight {name} before init: {param}') print(f'lstm2 {name} after init: {param}') idx=0 for i in inputs: print(f'idx: {idx}') idx+=1 # Step through the sequence one element at a time. # after each step, hidden contains the hidden out, hidden1 = lstm(i.view(1, 1, -1), hidden1) out, hidden2 = lstm2(out.view(1, 1, -1), hidden2) print(out) print("==========") And the output is: inputs: [tensor([[1.5381, 1.4673, 1.5951]]), tensor([[-1.5279, 1.0156, -0.2020]]), tensor([[-1.2865, 0.8231, -0.6101]]), tensor([[-1.2960, -0.9434, 0.6684]]), tensor([[ 1.1628, -0.3229, 1.8782]])] lstm weight_ih_l0 after init: Parameter containing: tensor([[ 0.6025, -0.1577, -0.0990], [-0.5255, 0.4554, 0.4651], [ 0.1428, 0.1414, -0.0291], [ 0.1248, 0.3465, -0.5053], [ 0.6295, -0.8635, -0.3394], [ 0.1072, 0.0786, 0.3427], [ 0.5352, -0.2032, 0.8816], [ 0.3727, -0.1608, -0.6332], [-0.3745, 0.1903, -0.1654], [-0.0460, -0.2148, 0.7737], [-0.1980, -0.8980, -0.3470], [-0.1130, 0.6074, 0.1844]], requires_grad=True) lstm weight_hh_l0 after init: Parameter containing: tensor([[-0.0719, -0.0122, 0.2626], [ 0.3887, -0.3044, -0.4356], [-0.8422, 0.2204, 0.1151], [ 0.4171, 0.1116, -0.2114], [ 0.2061, -0.3204, -0.0983], [ 0.4791, -0.5683, -0.3928], [-0.3196, -0.1726, -0.0732], [-0.3058, -0.5667, -0.0211], [-0.0832, -0.3168, 0.1241], [-0.4197, 0.0525, 0.0741], [ 0.3849, 0.0481, -0.3130], [ 0.5788, 0.6312, -0.3627]], requires_grad=True) lstm2 weight_ih_l0 after init: Parameter containing: tensor([[ 3.6955e-02, 7.1276e-02, -4.3073e-01], [-5.2666e-01, 2.7323e-02, 1.2894e-01], [ 3.7136e-01, 3.3969e-01, 1.9601e-01], [ 3.5802e-01, -4.3600e-01, -1.7962e-01], [ 8.3209e-01, 1.7189e-01, 2.2195e-01], [-2.1302e-02, -1.6867e-01, -1.3460e-01], [ 1.3446e-01, 1.7708e-01, -5.6676e-01], [-2.3697e-01, -2.8254e-02, -2.2063e-01], [-2.0928e-01, 3.4973e-01, 3.5858e-04], [-5.0565e-01, -6.8619e-02, 3.7702e-01], [-9.0796e-02, -1.7238e-01, 4.7868e-01], [-1.1565e-01, -6.7956e-02, -2.1049e-01]], requires_grad=True) lstm2 weight_hh_l0 after init: Parameter containing: tensor([[-0.3017, -0.0811, -0.6554], [ 0.2665, -0.2052, -0.0577], [ 0.5493, -0.5094, 0.2167], [ 0.1210, -0.3868, -0.2293], [-0.0991, 0.6744, -0.0114], [-0.0343, -0.6136, 0.4856], [ 0.0505, 0.3920, -0.1662], [ 0.1163, -0.1296, 0.2505], [-0.1373, -0.8803, -0.4666], [-0.0230, -0.0346, -0.8451], [ 0.2032, 0.1847, -0.0758], [ 0.2533, 0.1532, 0.8224]], requires_grad=True) lstm2 weight_ih_l0 after init: Parameter containing: tensor([[ 3.6955e-02, 7.1276e-02, -4.3073e-01], [-5.2666e-01, 2.7323e-02, 1.2894e-01], [ 3.7136e-01, 3.3969e-01, 1.9601e-01], [ 3.5802e-01, -4.3600e-01, -1.7962e-01], [ 8.3209e-01, 1.7189e-01, 2.2195e-01], [-2.1302e-02, -1.6867e-01, -1.3460e-01], [ 1.3446e-01, 1.7708e-01, -5.6676e-01], [-2.3697e-01, -2.8254e-02, -2.2063e-01], [-2.0928e-01, 3.4973e-01, 3.5858e-04], [-5.0565e-01, -6.8619e-02, 3.7702e-01], [-9.0796e-02, -1.7238e-01, 4.7868e-01], [-1.1565e-01, -6.7956e-02, -2.1049e-01]], requires_grad=True) lstm2 weight_hh_l0 after init: Parameter containing: tensor([[-0.3017, -0.0811, -0.6554], [ 0.2665, -0.2052, -0.0577], [ 0.5493, -0.5094, 0.2167], [ 0.1210, -0.3868, -0.2293], [-0.0991, 0.6744, -0.0114], [-0.0343, -0.6136, 0.4856], [ 0.0505, 0.3920, -0.1662], [ 0.1163, -0.1296, 0.2505], [-0.1373, -0.8803, -0.4666], [-0.0230, -0.0346, -0.8451], [ 0.2032, 0.1847, -0.0758], [ 0.2533, 0.1532, 0.8224]], requires_grad=True) idx: 0 tensor([[[ 0.0374, -0.0085, -0.0240]]], grad_fn=<StackBackward>) ========== idx: 1 tensor([[[ 0.0073, -0.0110, -0.0296]]], grad_fn=<StackBackward>) ========== idx: 2 tensor([[[-0.0314, -0.0147, -0.0136]]], grad_fn=<StackBackward>) ========== idx: 3 tensor([[[-0.0458, -0.0118, -0.0254]]], grad_fn=<StackBackward>) ========== idx: 4 tensor([[[-0.0096, -0.0281, -0.0440]]], grad_fn=<StackBackward>) ==========
ValueError: Graph disconnected: cannot obtain value for tensor Tensor in keras, how can I fix it?
I try to build a model that looks like a encoder-decoder model, but the difference is that I use the same LSTM model in both for loops. To merge hidden states from t-1 and t-2, I built a middle layer called MyLayer. But following error always raises: ValueError: Graph disconnected: cannot obtain value for tensor Tensor("lstm_layer_18/Identity_1:0", shape=(8, 128), dtype=float32) at layer "my_layer". The following previous layers were accessed without issue: ['input_1', 'embedding', 'input_3', 'input_2', 'lambda1', 'reshape1'] I tried whole afternoon debugging, but couldn't figured out. I find most of answers is about avoiding Input layer names for intermediate parameters. But this method does not work for me. I try to print out "lstm_layer_18/Identity_1:0" in the error, which is the state_h in the second for loop. But I can not fix it. Can anyone help me find error? My code is showed down below. class MyLayer(Layer): def __init__(self, output_dim, **kwargs): self.output_dim = output_dim super(MyLayer, self).__init__(**kwargs) def build(self, input_shape): assert isinstance(input_shape, list) # 为该层创建一个可训练的权重 self.kernel_1 = self.add_weight(name='kernel_1', shape=input_shape[0], initializer='uniform', trainable=True) self.kernel_2 = self.add_weight(name='kernel_2', shape=input_shape[1], initializer='uniform', trainable=True) super(MyLayer, self).build(input_shape) # 一定要在最后调用它 def call(self, v): assert isinstance(v, list) a, b = v return Add()([Multiply()([a, self.kernel_1]), Multiply()([b, self.kernel_2])]) def compute_output_shape(self, input_shape): assert isinstance(input_shape, list) shape_a, shape_b = input_shape assert shape_a == shape_b return shape_a def cf_lstm_model(input_step, output_step, features=2, embed_dim=64, lstm_dim=128, batch_size=8): # input_shape:(batch_size, input_step, features) x = Input(batch_shape=(batch_size, input_step, features)) h_1 = Input(batch_shape=(batch_size, lstm_dim)) h_2 = Input(batch_shape=(batch_size, lstm_dim)) c = Input(batch_shape=(batch_size, lstm_dim)) state_c = c embedding = Dense(embed_dim, activation='relu', name='embedding') inverse_embedding = Dense(features, activation='relu', name='inverse_embedding') lstm_layer = LSTM(lstm_dim, activation='relu', return_state=True, name='lstm_layer') cascade = MyLayer(lstm_dim) # decoder = LSTM(lstm_dim, activation='relu', return_state=True, name='decoder') # embeding ---> (batch_size, input_step, embed_dim) emb_x = embedding(x) # cascade_input cascade_input = [h_2, h_1] # Observe for i in range(input_step - 1): h_star = cascade(cascade_input) m = Lambda(lambda xx: xx[:, i, :], name='lambda1')(emb_x) m1 = Reshape((1, embed_dim), name='reshape1')(m) lstm1, state_h, state_c = lstm_layer(m1, [h_star, state_c]) cascade_input.pop() cascade_input.append(state_h) out = Lambda(lambda xx: xx[:, input_step - 1, :], name='lambda2')(emb_x) out = Reshape((1, embed_dim), name='reshape2')(out) # Predict for j in range(output_step): h_star = cascade(cascade_input) out, state_h, state_c = lstm_layer(out, [h_star, state_c]) # print('state_h', state_h) print('state_c', state_c) out = inverse_embedding(out) if j == 0: all_outputs = Reshape((1, features), name='reshape3')(out) # print('output shape:', out.shape, K.int_shape(out)) else: # 注意axis的纬度 all_outputs = K.concatenate([all_outputs, Reshape((1, features))(out)], axis=1) # print('output shape:', K.int_shape(outputs)) cascade_input.pop() cascade_input.append(state_h) out = embedding(out) out = Reshape((1, embed_dim), name='reshape4')(out) # print('out', out.shape) return Model(inputs=[x, h_1, h_2, c], outputs=all_outputs) cf_lstm = cf_lstm_model(input_step=8, output_step=12, embed_dim=64, lstm_dim=128) Error Feedback state_c Tensor("lstm_layer_7/Identity_2:0", shape=(8, 128), dtype=float32) state_c Tensor("lstm_layer_8/Identity_2:0", shape=(8, 128), dtype=float32) state_c Tensor("lstm_layer_9/Identity_2:0", shape=(8, 128), dtype=float32) state_c Tensor("lstm_layer_10/Identity_2:0", shape=(8, 128), dtype=float32) state_c Tensor("lstm_layer_11/Identity_2:0", shape=(8, 128), dtype=float32) state_c Tensor("lstm_layer_12/Identity_2:0", shape=(8, 128), dtype=float32) state_c Tensor("lstm_layer_13/Identity_2:0", shape=(8, 128), dtype=float32) state_c Tensor("lstm_layer_14/Identity_2:0", shape=(8, 128), dtype=float32) state_c Tensor("lstm_layer_15/Identity_2:0", shape=(8, 128), dtype=float32) state_c Tensor("lstm_layer_16/Identity_2:0", shape=(8, 128), dtype=float32) state_c Tensor("lstm_layer_17/Identity_2:0", shape=(8, 128), dtype=float32) state_c Tensor("lstm_layer_18/Identity_2:0", shape=(8, 128), dtype=float32) Traceback (most recent call last): File "/Users/duoshaoshishi/Desktop/Pedestrian prediction/CF-lstm/model.py", line 105, in <module> cf_lstm = cf_lstm_model(input_step=8, File "/Users/duoshaoshishi/Desktop/Pedestrian prediction/CF-lstm/model.py", line 102, in cf_lstm_model return Model(inputs=[x, h_1, h_2, c], outputs=all_outputs) File "/Users/duoshaoshishi/opt/anaconda3/envs/tf2/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py", line 167, in __init__ super(Model, self).__init__(*args, **kwargs) File "/Users/duoshaoshishi/opt/anaconda3/envs/tf2/lib/python3.8/site-packages/tensorflow/python/keras/engine/network.py", line 173, in __init__ self._init_graph_network(*args, **kwargs) File "/Users/duoshaoshishi/opt/anaconda3/envs/tf2/lib/python3.8/site-packages/tensorflow/python/training/tracking/base.py", line 456, in _method_wrapper result = method(self, *args, **kwargs) File "/Users/duoshaoshishi/opt/anaconda3/envs/tf2/lib/python3.8/site-packages/tensorflow/python/keras/engine/network.py", line 306, in _init_graph_network nodes, nodes_by_depth, layers, _ = _map_graph_network( File "/Users/duoshaoshishi/opt/anaconda3/envs/tf2/lib/python3.8/site-packages/tensorflow/python/keras/engine/network.py", line 1787, in _map_graph_network raise ValueError('Graph disconnected: ' ValueError: Graph disconnected: cannot obtain value for tensor Tensor("lstm_layer_18/Identity_1:0", shape=(8, 128), dtype=float32) at layer "my_layer". The following previous layers were accessed without issue: ['input_1', 'embedding', 'input_3', 'input_2', 'lambda1', 'reshape1']
ValueError: Python inputs incompatible with input_signature:
System information OS Platform and Distribution :CentOS Linux release 7.7.1908 -TensorFlow version:2.3.0 I am following this example:https://www.tensorflow.org/tutorials/text/image_captioning?hl=en It is working as it should be and saving checkpoints and I want to now convert this to a TF Lite model. Here is the Link of full convert code:https://colab.research.google.com/drive/1GJkGcwWvDAWMooTsECzuSRUSPbirADhb?usp=sharing Here is the Link of full train code: https://colab.research.google.com/drive/1X2d9WW1EMEzN8Rgva3rtjevP0T_jFccj?usp=sharing I also following the isssue#32999 Here is what I am running to save and them convert the inference graph: #tf.function def evaluate(image): hidden = decoder.reset_states(batch_size=1) temp_input = tf.expand_dims(load_image(image)[0], 0) img_tensor_val = image_features_extract_model(temp_input) img_tensor_val = tf.reshape(img_tensor_val, (img_tensor_val.shape[0], -1, img_tensor_val.shape[3])) features = encoder(img_tensor_val) dec_input = tf.expand_dims([tokenizer.word_index['<start>']], 0) result = [] for i in range(max_length): predictions, hidden, attention_weights = decoder(dec_input, features, hidden) predicted_id = tf.random.categorical(predictions, 1)[0][0] # print(tokenizer.index_word) print(predicted_id,predicted_id.dtype) # for key,value in tokenizer.index_word.items(): # key = tf.convert_to_tensor(key) # tf.dtypes.cast(key,tf.int64) # print(key) # print(tokenizer.index_word) result.append(predicted_id) # if tokenizer.index_word[predicted_id] == '<end>': # return result dec_input = tf.expand_dims([predicted_id], 0) return result export_dir = "./" tflite_enc_input = '' ckpt.f = evaluate to_save = evaluate.get_concrete_function('') converter = tf.lite.TFLiteConverter.from_concrete_functions([to_save]) tflite_model = converter.convert() but I get this error ValueError: in user code: convert2savedmodel.py:310 evaluate * predictions, hidden, attention_weights = decoder(dec_input, features, hidden) /share/nishome/19930072_0/miniconda3/envs/tf2.3/lib/python3.7/site-packages/tensorflow/python/keras/engine/base_layer.py:985 __call__ ** outputs = call_fn(inputs, *args, **kwargs) /share/nishome/19930072_0/miniconda3/envs/tf2.3/lib/python3.7/site-packages/tensorflow/python/eager/def_function.py:780 __call__ result = self._call(*args, **kwds) /share/nishome/19930072_0/miniconda3/envs/tf2.3/lib/python3.7/site-packages/tensorflow/python/eager/def_function.py:840 _call return self._stateless_fn(*args, **kwds) /share/nishome/19930072_0/miniconda3/envs/tf2.3/lib/python3.7/site-packages/tensorflow/python/eager/function.py:2828 __call__ graph_function, args, kwargs = self._maybe_define_function(args, kwargs) /share/nishome/19930072_0/miniconda3/envs/tf2.3/lib/python3.7/site-packages/tensorflow/python/eager/function.py:3171 _maybe_define_function *args, **kwargs) /share/nishome/19930072_0/miniconda3/envs/tf2.3/lib/python3.7/site-packages/tensorflow/python/eager/function.py:2622 canonicalize_function_inputs self._flat_input_signature) /share/nishome/19930072_0/miniconda3/envs/tf2.3/lib/python3.7/site-packages/tensorflow/python/eager/function.py:2713 _convert_inputs_to_signature format_error_message(inputs, input_signature)) ValueError: Python inputs incompatible with input_signature: inputs: ( Tensor("ExpandDims_1:0", shape=(1, 1), dtype=int32), Tensor("cnn__encoder/StatefulPartitionedCall:0", shape=(1, 64, 256), dtype=float32), Tensor("zeros:0", shape=(1, 512), dtype=float32)) input_signature: ( TensorSpec(shape=(1, 1), dtype=tf.int64, name=None), TensorSpec(shape=(1, 64, 256), dtype=tf.float32, name=None), TensorSpec(shape=(1, 512), dtype=tf.float32, name=None)) Encoder Model: class CNN_Encoder(tf.keras.Model): def __init__(self, embedding): super(CNN_Encoder, self).__init__() # shape after fc == (batch_size, 64, embedding_dim) self.fc = tf.keras.layers.Dense(embedding_dim) #tf.function(input_signature=[tf.TensorSpec(shape=(1, 64, features_shape),dtype=tf.dtypes.float32)]) def call(self, x): x = self.fc(x) x = tf.nn.relu(x) return x Decoder model: class RNN_Decoder(tf.keras.Model): def __init__(self, embedding_dim, units, vocab_size): super(RNN_Decoder, self).__init__() self.units = units self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim) self.gru = tf.keras.layers.GRU(self.units, return_sequences=True, return_state=True, recurrent_initializer='glorot_uniform', unroll = True) self.fc1 = tf.keras.layers.Dense(self.units) self.fc2 = tf.keras.layers.Dense(vocab_size) self.attention = BahdanauAttention(self.units) #tf.function(input_signature=[tf.TensorSpec(shape=[1, 1], dtype=tf.int64), tf.TensorSpec(shape=[1, 64, 256], dtype=tf.float32), tf.TensorSpec(shape=[1, 512], dtype=tf.float32)]) def call(self, x , features, hidden): context_vector, attention_weights = self.attention(features, hidden) #x shape after passing through embedding == (batch_size, 1, embedding_dim) x = self.embedding(x) #x shape after concatenation == (batch_size, 1, embedding_dim + hidden_size) x = tf.concat([tf.expand_dims(context_vector, 1), x], axis=-1) output, state = self.gru(x) #shape == (batch_size, max_length, hidden_size) x = self.fc1(output) #x shape == (batch_size, max_length, hidden_size) x = tf.reshape(x, (-1, x.shape[2])) # output shape == (batch_size * max_length, vocab) x = self.fc2(x) return x, state, attention_weights def reset_states(self, batch_size): return tf.zeros((batch_size, self.units)) I just change the tf.function to int32 as below: #tf.function(input_signature=[tf.TensorSpec(shape=[1, 1], dtype=tf.int32), tf.TensorSpec(shape=[1, 64,256], dtype=tf.float32), tf.TensorSpec(shape=[1, 512], dtype=tf.float32)]) but another error came: ValueError: Python inputs incompatible with input_signature: Tensor("ExpandDims_2:0", shape=(1, 1), dtype=int64), Tensor("cnn__encoder/StatefulPartitionedCall:0", shape=(1, 64, 256), dtype=float32), Tensor("rnn__decoder/StatefulPartitionedCall:1", shape=(1, 512), dtype=float32)) input_signature: ( TensorSpec(shape=(1, 1), dtype=tf.int32, name=None), TensorSpec(shape=(1, 64, 256), dtype=tf.float32, name=None), TensorSpec(shape=(1, 512), dtype=tf.float32, name=None))``` Why the dtypes of inputs change from int64 to int32?