Tensorflow saving subclass model which has multiple arguments to call() method

Tensorflow saving subclass model which has multiple arguments to call() method - python

I am following the tensorflow neural machine translation tutorial:
https://www.tensorflow.org/tutorials/text/nmt_with_attention
I am trying to save the Encoder and Decoder models which are subclasses of the tf.keras.Model and work properly during training and inference, however I want to save the models. When I try to do so I get the following error:
TypeError: call() missing 1 required positional argument: 'initial_state'
Here is the code:
class Encoder(tf.keras.Model):
def __init__(self, vocab_size, embedding_matrix, n_units, batch_size):
super(Encoder, self).__init__()
self.n_units = n_units
self.batch_size = batch_size
self.embedding = Embedding(vocab_size, embedding_matrix.shape[1], weights=[embedding_matrix], trainable=True, mask_zero=True)
self.lstm = LSTM(n_units, return_sequences=True, return_state=True, recurrent_initializer="glorot_uniform")
def call(self, input_utterence, initial_state):
input_embed = self.embedding(input_utterence)
encoder_states, h1, c1 = self.lstm(input_embed, initial_state=initial_state)
return encoder_states, h1, c1
def create_initial_state(self):
return tf.zeros((self.batch_size, self.n_units))
encoder = Encoder(vocab_size, embedding_matrix, LSTM_DIM, BATCH_SIZE)
# do some training...
tf.saved_model.save(decoder, "encoder_model")
I also tried to make the call method take one input list argument only and unpack the variables I need within the method but then I get the following error when trying to save:
File "C:\Users\Fady\Documents\Machine Learning\chatbot\models\seq2seq_model.py", line 32, in call
input_utterence, initial_state = inputs
ValueError: too many values to unpack (expected 2)

You can export the model successfully if you package your inputs in a list. You also need to specify the input signatures to export your model, here your code with slight modifications which works
import tensorflow as tf
from tensorflow.keras.layers import Embedding, LSTM
import numpy as np
print('TensorFlow: ', tf.__version__)
vocab_size = 10000
LSTM_DIM = 256
BATCH_SIZE = 16
embedding_matrix = np.random.randn(vocab_size, 300)
class Encoder(tf.keras.Model):
def __init__(self, vocab_size, embedding_matrix, n_units, batch_size):
super(Encoder, self).__init__()
self.n_units = n_units
self.batch_size = batch_size
self.embedding = Embedding(vocab_size, embedding_matrix.shape[1], weights=[embedding_matrix], trainable=True, mask_zero=True)
self.lstm = LSTM(n_units, return_sequences=True, return_state=True, recurrent_initializer="glorot_uniform")
#tf.function
def call(self, inputs):
input_utterence, initial_state = inputs
input_embed = self.embedding(input_utterence)
encoder_states, h1, c1 = self.lstm(input_embed, initial_state=initial_state)
return encoder_states, h1, c1
def create_initial_state(self):
return tf.zeros((self.batch_size, self.n_units))
random_input = tf.random.uniform(shape=[BATCH_SIZE, 3], maxval=vocab_size, dtype=tf.int32)
encoder = Encoder(vocab_size, embedding_matrix, LSTM_DIM, BATCH_SIZE)
initial_state = [encoder.create_initial_state(), encoder.create_initial_state()]
_ = encoder([random_input, initial_state]) # required so that encoder.build is triggered
tf.saved_model.save(encoder, "encoder_model", signatures=encoder.call.get_concrete_function(
[
tf.TensorSpec(shape=[None, None], dtype=tf.int32, name='input_utterence'),
[
tf.TensorSpec(shape=[None, LSTM_DIM], dtype=tf.float32, name='initial_h'),
tf.TensorSpec(shape=[None, LSTM_DIM], dtype=tf.float32, name='initial_c')
]
]))
loaded_model = tf.saved_model.load('encoder_model')
loaded_model([random_input, initial_state])
output:
TensorFlow: 2.2.0-rc1
WARNING:tensorflow:From /home/dl_user/tf_stable/lib/python3.7/site-packages/tensorflow/python/ops/resource_variable_ops.py:1817: calling BaseResourceVariable.__init__ (from tensorflow.python.ops.resource_variable_ops) with constraint is deprecated and will be removed in a future version.
Instructions for updating:
If using Keras pass *_constraint arguments to layers.
INFO:tensorflow:Assets written to: encoder_model/assets
(<tf.Tensor: shape=(16, 3, 256), dtype=float32, numpy=
array([[[-0.06000457, 0.02422162, -0.05310762, ..., -0.01340707,
0.12212028, -0.02747637],
[ 0.13303193, 0.3119418 , -0.17995344, ..., -0.10185111,
0.09568192, 0.06919193],
[-0.08075664, -0.11490613, -0.20294832, ..., -0.14999194,
0.02177649, 0.05538464]],
[[-0.03792192, -0.08431012, 0.03687581, ..., -0.1768839 ,
-0.10469476, 0.08730042],
[-0.02956271, 0.43850696, -0.07400024, ..., 0.04097629,
0.209705 , 0.27194855],
[ 0.02529916, 0.18367583, -0.11409087, ..., 0.0458075 ,
0.2065246 , 0.22976378]],
[[ 0.04196627, 0.08302739, 0.02218204, ..., 0.07388053,
-0.05696848, -0.31895265],
[-0.00536443, 0.1566213 , -0.22412768, ..., 0.10560389,
0.20187919, -0.1896591 ],
[ 0.26364946, 0.13163888, 0.14586888, ..., 0.19517538,
0.17677066, -0.40476215]],
...,
[[ 0.10999472, 0.07398727, 0.23443945, ..., -0.1912791 ,
-0.0195728 , 0.11717851],
[ 0.03978832, 0.07587367, 0.16567066, ..., -0.29463592,
0.05950819, 0.0242265 ],
[ 0.2505787 , 0.15849623, 0.06635283, ..., -0.17969091,
0.12549783, -0.11459641]],
[[-0.20408148, 0.04629526, 0.00601436, ..., 0.21321473,
0.04952445, -0.0129672 ],
[-0.14671509, 0.2911171 , 0.13047697, ..., -0.03531414,
-0.16794083, 0.01575338],
[-0.08337164, 0.08723269, 0.16235027, ..., 0.07919721,
0.05701642, 0.15379705]],
[[-0.2747393 , 0.24351111, -0.05829309, ..., -0.00448833,
0.07568972, 0.03978251],
[-0.16282909, -0.04586324, -0.0054924 , ..., 0.11050001,
0.1312355 , 0.16555254],
[ 0.07759799, -0.07308074, -0.10038756, ..., 0.18139914,
0.07769153, 0.1375772 ]]], dtype=float32)>,
<tf.Tensor: shape=(16, 256), dtype=float32, numpy=
array([[-0.08075664, -0.11490613, -0.20294832, ..., -0.14999194,
0.02177649, 0.05538464],
[ 0.02529916, 0.18367583, -0.11409087, ..., 0.0458075 ,
0.2065246 , 0.22976378],
[ 0.26364946, 0.13163888, 0.14586888, ..., 0.19517538,
0.17677066, -0.40476215],
...,
[ 0.2505787 , 0.15849623, 0.06635283, ..., -0.17969091,
0.12549783, -0.11459641],
[-0.08337164, 0.08723269, 0.16235027, ..., 0.07919721,
0.05701642, 0.15379705],
[ 0.07759799, -0.07308074, -0.10038756, ..., 0.18139914,
0.07769153, 0.1375772 ]], dtype=float32)>,
<tf.Tensor: shape=(16, 256), dtype=float32, numpy=
array([[-0.32829475, -0.18770668, -0.2956414 , ..., -0.2427501 ,
0.03146099, 0.16033864],
[ 0.05112522, 0.6664379 , -0.19836858, ..., 0.10015503,
0.511694 , 0.51550364],
[ 0.3379809 , 0.7145362 , 0.22311993, ..., 0.372106 ,
0.25914627, -0.81374717],
...,
[ 0.36742535, 0.29009506, 0.13245934, ..., -0.4318537 ,
0.26666188, -0.20086129],
[-0.17384854, 0.22998339, 0.27335796, ..., 0.09973672,
0.10726923, 0.47339764],
[ 0.22148325, -0.11998752, -0.16339599, ..., 0.31903535,
0.20365229, 0.28087002]], dtype=float32)>)

Related

Custom activation function in Tensorflow with trainable params

I am trying to implement a custom version of the PElu activation function in tensorflow. The custom thing about this activation is the knee of the relu is smoothed. I got the equation from this paper.
Here is the code:
from keras import backend as K
import tensorflow as tf
def SMU_LeakyPRElu(x, alpha=2.5,u=1.0):
return ((1+alpha)*x)+((1-alpha)*x)*(tf.math.erf(u*(1-alpha)*x))
from keras.layers import Layer
class SMU_LeakyPRElu(Layer):
def __init__(self, alpha=2.5, u=1.0, trainable=False, **kwargs):
super(SMU_LeakyPRElu, self).__init__(**kwargs)
self.supports_masking = True
self.alpha = alpha
self.u = u
self.trainable = trainable
def build(self, input_shape):
self.alpha_factor = K.variable(self.alpha,
dtype=K.floatx(),
name='alpha_factor')
self.u_factor = K.variable(self.u,
dtype=K.floatx(),
name='u_factor')
if self.trainable:
self._trainable_weights.append(self.alpha_factor)
self._trainable_weights.append(self.u_factor)
super(SMU_LeakyPRElu, self).build(input_shape)
def call(self, inputs, mask=None):
return SMU_LeakyPRElu(inputs, self.alpha_factor,self.u_factor)
def get_config(self):
config = {'alpha': self.get_weights()[0] if self.trainable else self.alpha,
'u' : self.get_weights()[1] if self.trainable else self.u,
'trainable': self.trainable}
base_config = super(SMU_LeakyPRElu, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
def compute_output_shape(self, input_shape):
return input_shape
x = tf.random.normal((1,10,4))
print(x)
input_shape = (1,10,4)
input_layer = tf.keras.layers.Input(shape=input_shape[1:], name="input_layer")
layer_1 = tf.keras.layers.Conv1D(2, 1,padding = 'valid', input_shape=input_shape[:1])(input_layer)
layer_2 = SMU_LeakyPRElu(alpha=2.5,u=1.0,trainable=True)(layer_1)
model = tf.keras.models.Model(input_layer, layer_2, name="model")
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005), loss="categorical_crossentropy", run_eagerly=True)
print(model.summary())
result = model.predict(x)
print(result)
print(result.shape)
I implemented this code using a example from this post at Data Science SE.
Error:
tf.Tensor(
[[[ 1.0467066 -1.1833347 1.5384735 2.078511 ]
[-1.6025988 -0.30846047 0.8019808 0.3113866 ]
[ 0.58313304 -0.90643036 -0.3926888 -0.6210553 ]
[ 0.16505387 -0.5930619 0.6983522 -0.12211661]
[ 0.06077941 -0.11117186 -1.2540722 -0.32234746]
[ 0.41838828 0.7090619 0.30999053 0.10459523]
[ 0.35603598 -0.2695868 -0.17901018 -0.09100233]
[ 1.2746769 0.8311447 0.02825974 -0.48021472]
[-1.536545 -0.24765234 -0.36437735 -1.1891246 ]
[ 0.7531206 -0.56109476 -0.65761757 0.19102335]]], shape=(1, 10, 4), dtype=float32)
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-50-c9d490dfd533> in <module>
5 input_layer = tf.keras.layers.Input(shape=input_shape[1:], name="input_layer")
6 layer_1 = tf.keras.layers.Conv1D(2, 1,padding = 'valid', input_shape=input_shape[:1])(input_layer)
----> 7 layer_2 = SMU_LeakyPRElu(alpha=2.5,u=1.0,trainable=True)(layer_1)
8
9 model = tf.keras.models.Model(input_layer, layer_2, name="model")
1 frames
/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/type_spec.py in type_spec_from_value(value)
888 3, "Failed to convert %r to tensor: %s" % (type(value).__name__, e))
889
--> 890 raise TypeError(f"Could not build a TypeSpec for {value} of "
891 f"unsupported type {type(value)}.")
892
TypeError: Could not build a TypeSpec for <__main__.SMU_LeakyPRElu object at 0x7fde698f7850> of unsupported type <class '__main__.SMU_LeakyPRElu'>.
I don't understand this error. How should I implement this function as custom activation function with trainable parameters alpha and u.?

The problem is that you have named your activation function and the custom layer you created the same thing. I refactored your code for you.
Code:
import tensorflow as tf
from typing import Optional
from tensorflow.keras import Model
from tensorflow.keras.layers import Conv1D
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import Layer
from tensorflow.keras.optimizers import Adam
class SMULeakyPReLU(Layer):
"""``SMULeakyPReLU``."""
def __init__(self,
alpha: float = 2.5,
u: float = 1.,
trainable: bool = False,
**kwargs):
super().__init__(**kwargs)
self.alpha = alpha
self.u = u
self.trainable = trainable
def build(self, input_shape: tf.TensorShape):
super().build(input_shape)
self.alpha_factor = tf.Variable(
self.alpha,
dtype=tf.float32,
trainable=self.trainable,
name="alpha_factor")
self.u_factor = tf.Variable(
self.u,
dtype=tf.float32,
name="u_factor")
def call(self,
inputs: tf.Tensor,
mask: Optional[tf.Tensor] = None
) -> tf.Tensor:
fst = (1. + self.alpha_factor) * inputs
snd = (1. - self.alpha_factor) * inputs
trd = tf.math.erf(self.u_factor * (1. - self.alpha_factor) * inputs)
return fst * snd * trd
def get_config(self):
config = {
"alpha": self.get_weights()[0] if self.trainable else self.alpha,
"u": self.get_weights()[1] if self.trainable else self.u,
"trainable": self.trainable
}
base_config = super().get_config()
return dict(list(base_config.items()) + list(config.items()))
Test
# fake data
x = tf.random.normal((1, 10, 4))
# create network
input_layer = Input(shape=x.shape[1:], name="input_layer")
layer_1 = Conv1D(2, 1, padding="valid")(input_layer)
layer_2 = SMULeakyPReLU(alpha=2.5, u=1.0, trainable=True)(layer_1)
# create model
model = Model(input_layer, layer_2, name="model")
# compile model and summary
model.compile(
optimizer=Adam(learning_rate=5e-4),
loss="categorical_crossentropy",
run_eagerly=True)
print(model.summary())
# forward pass
result = model.predict(x)
print(result)
print(result.shape)
# Model: "model"
# _________________________________________________________________
# Layer (type) Output Shape Param #
# =================================================================
# input_layer (InputLayer) [(None, 10, 4)] 0
#
# conv1d_1 (Conv1D) (None, 10, 2) 10
#
# smu_leaky_p_re_lu_1 (SMULea (None, 10, 2) 2
# kyPReLU)
#
# =================================================================
# Total params: 12
# Trainable params: 12
# Non-trainable params: 0
# _________________________________________________________________
# None
# 1/1 [==============================] - 0s 13ms/step
# [[[-1.6503611e+01 -3.5051659e+01]
# [ 4.0098205e-02 1.5923592e+00]
# [-1.4898951e+00 7.5487376e-05]
# [ 3.1900513e+01 2.8786476e+01]
# [ 1.9207695e+01 3.6511238e+01]
# [-6.8302655e-01 -4.7705490e-02]
# [ 9.6008554e-03 7.5611029e+00]
# [ 4.7136435e-01 2.5528276e+00]
# [ 2.6859209e-01 3.3496175e+00]
# [ 1.4372441e+01 3.4978668e+01]]]
# (1, 10, 2)

tf.GradientTape giving None gradient

I'm trying to write a custom training loop. After creating the model, I have added some extra trainable parameter to some layers of my model. I have used these extra parameters to update my original parameter on every forward pass. But when I'm calculating the gradient, it's giving None for the extra parameter that i have added last. Code is given below:
model = Sequential()
model.add(tf.keras.layers.Flatten(input_shape=(1,1)))
model.add(Dense(1, activation='relu'))
model.add(Dense(2, activation='softmax'))
model.layers[1].add_weight(name="x1", shape=(1,), initializer=tf.keras.initializers.Constant(value=1.0),trainable=True)
dataset = tf.data.Dataset.from_tensor_slices((feature, labels))
for i, (x_batch_train, y_batch_train) in enumerate(dataset):
with tf.GradientTape() as tape:
for par in model.layers[1].trainable_weights:
if "x1" in par.name:
bits = tf.convert_to_tensor(par)
for par in model.layers[1].trainable_weights:
if "kernel" in par.name:
par = bits + 1.0
x = model(x_batch_train, training = True)
loss = tf.keras.losses.SparseCategoricalCrossentropy(y_batch_train, x)
val = tape.gradient(loss, model.trainable_weights)
for v in val:
print(v)
Here, I have added one extra parameter called x1 and it's updating the kernel of Dense layer. But I'm getting None gradient for x1 parameter. The output is:
tf.Tensor([[0.]], shape=(1, 1), dtype=float32)
tf.Tensor([-0.], shape=(1,), dtype=float32)
None
tf.Tensor([[0. 0.]], shape=(1, 2), dtype=float32)
tf.Tensor([-0.5 0.5], shape=(2,), dtype=float32)
Why it's happening?

The problem is that the changes you are making to the layer's weights have no direct connection to the output of the model in the context of tf.GradientTape and are therefore not tracked. You could solve this with a simple custom layer:
import tensorflow as tf
class DenseLayer(tf.keras.layers.Layer):
def __init__(self, units=1):
super(DenseLayer, self).__init__()
self.units = units
def build(self, input_shape):
self.w = self.add_weight("kernel",
shape=[int(input_shape[-1]),
self.units], trainable=True)
self.b = self.add_weight(shape=(self.units,), initializer="zeros", trainable=True)
self.bits = self.add_weight(name="x1", shape=[int(input_shape[-1]),
self.units], initializer=tf.keras.initializers.ones(), trainable=True)
def call(self, inputs):
return tf.nn.relu(tf.matmul(inputs, (self.w + self.bits + 1.0)) + self.b)
dense_layer = DenseLayer(1)
model = tf.keras.Sequential()
model.add(tf.keras.layers.Flatten(input_shape=(1,1)))
model.add(dense_layer)
model.add(tf.keras.layers.Dense(2, activation='softmax'))
print(model.summary())
dataset = tf.data.Dataset.from_tensor_slices((tf.random.normal((50, 1, 1)), tf.random.uniform((50, ), maxval=2, dtype=tf.int32))).batch(2)
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy()
optimizer = tf.keras.optimizers.Adam(learning_rate=0.01)
for i, (x_batch_train, y_batch_train) in enumerate(dataset):
with tf.GradientTape() as tape:
y = model(x_batch_train, training = True)
loss = loss_fn(y_batch_train, y)
val = tape.gradient(loss, model.trainable_weights)
for v in val:
print(v)
optimizer.apply_gradients(zip(val, model.trainable_variables))

Your idea is good I didn't extend from the last answer but this question is asked once about the custom layer and that you can do it for lstm by training as model.fit( ... )
It is not about the Gradient Tape.
[ Sample - Dense ]:
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Class / Function
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
class MyDenseLayer(tf.keras.layers.Layer):
def __init__(self, num_outputs, num_add):
super(MyDenseLayer, self).__init__()
self.num_outputs = num_outputs
self.num_add = num_add
def build(self, input_shape):
self.kernel = self.add_weight("kernel",
shape=[int(input_shape[-1]),
self.num_outputs])
def call(self, inputs):
temp = tf.add( inputs, self.num_add )
temp = tf.matmul(temp, self.kernel)
return temp
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Model Initialize
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
model = tf.keras.models.Sequential([
tf.keras.layers.InputLayer(input_shape=( 32, 32, 4 )),
tf.keras.layers.Normalization(mean=3., variance=2.),
tf.keras.layers.Normalization(mean=4., variance=6.),
tf.keras.layers.Conv2D(32, (3, 3), activation='relu'),
tf.keras.layers.MaxPooling2D((2, 2)),
tf.keras.layers.Dense(128, activation='relu'),
tf.keras.layers.Reshape((128, 225)),
tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(96, return_sequences=True, return_state=False)),
tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(96)),
])
layer = MyDenseLayer(10, 5)
model.add(layer)
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(192, activation='relu'))
model.add(tf.keras.layers.Dense(10, activation='softmax'))
model.summary()
[ Output ]:

Multi lstm layers and multi lstm in pytorch

I am using two ways to create a two-layer lstm as shown in the following two codes. Can anyone tell me why the outputs are not the same? and If you have the experience, can you tell me which one is better ? Thanks so much ! (Thanks for the suggestion of initializing them to have the same weights and bias. I add this suggestion in the original code. Despite the same initial parameters, their outputs are still not the same...)
The first way using num_layers:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
torch.manual_seed(1)
lstm = nn.LSTM(3, 3,2) # Input dim is 3, output dim is 3
inputs = [torch.randn(1, 3) for _ in range(5)] # make a sequence of length 5
weight_ih_0=None
weight_hh_0=None
# bias_ih_0=None
# bias_hh_0=None
weight_ih_1=None
weight_hh_1=None
# bias_ih_1=None
# bias_hh_1=None
for name, param in lstm.named_parameters():
if 'bias' in name:
# print(f'bias {name} before init: {param}')
nn.init.constant_(param, 0.0)
# print(f'bias {name} after init: {param}')
elif 'weight' in name:
# print(f'weight {name} before init: {param}')
nn.init.xavier_normal_(param)
print(f'weight {name} after init: {param}')
for name, param in lstm.named_parameters():
if 'weight_ih_l0' in name:
weight_ih_0=param
if 'weight_hh_l0' in name:
weight_hh_0=param
if 'weight_ih_l1' in name:
weight_ih_1=param
if 'weight_hh_l1' in name:
weight_hh_1=param
print(f'inputs: {inputs}')
# initialize the hidden state.
hidden = (torch.zeros(2, 1, 3),
torch.zeros(2, 1, 3))
idx=0
for i in inputs:
print(f'idx: {idx}')
# print(f'i: {i}')
idx+=1
# Step through the sequence one element at a time.
# after each step, hidden contains the hidden
out, hidden = lstm(i.view(1, 1, -1), hidden)
print(out)
print("==========")
# print(hidden)
The outputs is:
weight weight_ih_l0 after init: Parameter containing:
tensor([[ 0.6025, -0.1577, -0.0990],
[-0.5255, 0.4554, 0.4651],
[ 0.1428, 0.1414, -0.0291],
[ 0.1248, 0.3465, -0.5053],
[ 0.6295, -0.8635, -0.3394],
[ 0.1072, 0.0786, 0.3427],
[ 0.5352, -0.2032, 0.8816],
[ 0.3727, -0.1608, -0.6332],
[-0.3745, 0.1903, -0.1654],
[-0.0460, -0.2148, 0.7737],
[-0.1980, -0.8980, -0.3470],
[-0.1130, 0.6074, 0.1844]], requires_grad=True)
weight weight_hh_l0 after init: Parameter containing:
tensor([[-0.0719, -0.0122, 0.2626],
[ 0.3887, -0.3044, -0.4356],
[-0.8422, 0.2204, 0.1151],
[ 0.4171, 0.1116, -0.2114],
[ 0.2061, -0.3204, -0.0983],
[ 0.4791, -0.5683, -0.3928],
[-0.3196, -0.1726, -0.0732],
[-0.3058, -0.5667, -0.0211],
[-0.0832, -0.3168, 0.1241],
[-0.4197, 0.0525, 0.0741],
[ 0.3849, 0.0481, -0.3130],
[ 0.5788, 0.6312, -0.3627]], requires_grad=True)
weight weight_ih_l1 after init: Parameter containing:
tensor([[ 3.6955e-02, 7.1276e-02, -4.3073e-01],
[-5.2666e-01, 2.7323e-02, 1.2894e-01],
[ 3.7136e-01, 3.3969e-01, 1.9601e-01],
[ 3.5802e-01, -4.3600e-01, -1.7962e-01],
[ 8.3209e-01, 1.7189e-01, 2.2195e-01],
[-2.1302e-02, -1.6867e-01, -1.3460e-01],
[ 1.3446e-01, 1.7708e-01, -5.6676e-01],
[-2.3697e-01, -2.8254e-02, -2.2063e-01],
[-2.0928e-01, 3.4973e-01, 3.5858e-04],
[-5.0565e-01, -6.8619e-02, 3.7702e-01],
[-9.0796e-02, -1.7238e-01, 4.7868e-01],
[-1.1565e-01, -6.7956e-02, -2.1049e-01]], requires_grad=True)
weight weight_hh_l1 after init: Parameter containing:
tensor([[-0.3017, -0.0811, -0.6554],
[ 0.2665, -0.2052, -0.0577],
[ 0.5493, -0.5094, 0.2167],
[ 0.1210, -0.3868, -0.2293],
[-0.0991, 0.6744, -0.0114],
[-0.0343, -0.6136, 0.4856],
[ 0.0505, 0.3920, -0.1662],
[ 0.1163, -0.1296, 0.2505],
[-0.1373, -0.8803, -0.4666],
[-0.0230, -0.0346, -0.8451],
[ 0.2032, 0.1847, -0.0758],
[ 0.2533, 0.1532, 0.8224]], requires_grad=True)
inputs: [tensor([[1.5381, 1.4673, 1.5951]]), tensor([[-1.5279, 1.0156, -0.2020]]), tensor([[-1.2865, 0.8231, -0.6101]]), tensor([[-1.2960, -0.9434, 0.6684]]), tensor([[ 1.1628, -0.3229, 1.8782]])]
idx: 0
tensor([[[ 0.0374, -0.0085, -0.0240]]], grad_fn=<StackBackward>)
==========
idx: 1
tensor([[[ 0.0073, -0.0110, -0.0296]]], grad_fn=<StackBackward>)
==========
idx: 2
tensor([[[-0.0314, -0.0147, -0.0136]]], grad_fn=<StackBackward>)
==========
idx: 3
tensor([[[-0.0458, -0.0118, -0.0254]]], grad_fn=<StackBackward>)
==========
idx: 4
tensor([[[-0.0096, -0.0281, -0.0440]]], grad_fn=<StackBackward>)
==========
The second way creating two individual lstm:
import copy
torch.manual_seed(1)
lstm = nn.LSTMCell(3, 3) # Input dim is 3, output dim is 3
lstm2 = nn.LSTMCell(3, 3) # Input dim is 3, output dim is 3
inputs = [torch.randn(1, 3) for _ in range(5)] # make a sequence of length 5
for name, param in lstm.named_parameters():
if 'bias' in name:
# print(f'lstm bias {name} before init: {param}')
nn.init.constant_(param, 0.0)
# print(f'lstm bias {name} after init: {param}')
elif 'weight' in name:
# print(f'lstm weight {name} before init: {param}')
if 'weight_ih' in name:
param=copy.deepcopy(weight_ih_0)
print(f'lstm {name} after init: {param}')
if 'weight_hh' in name:
param=copy.deepcopy(weight_hh_0)
print(f'lstm {name} after init: {param}')
for name, param in lstm2.named_parameters():
if 'bias' in name:
# print(f'lstm2 bias {name} before init: {param}')
nn.init.constant_(param, 0.0)
# print(f'lstm2 bias {name} after init: {param}')
elif 'weight' in name:
# print(f'lstm2 weight {name} before init: {param}')
if 'weight_ih' in name:
param=copy.deepcopy(weight_ih_1)
print(f'lstm2 {name} after init: {param}')
if 'weight_hh' in name:
param=copy.deepcopy(weight_hh_1)
print(f'lstm2 {name} after init: {param}')
print(f'inputs: {inputs}')
# initialize the hidden state.
hidden = torch.zeros(1, 3)
cell= torch.zeros(1, 3)
idx=0
for i in inputs:
print(f'idx: {idx}')
idx+=1
# Step through the sequence one element at a time.
# after each step, hidden contains the hidden
hidden, cell = lstm(i.view(1, -1), (hidden,cell))
# print(hidden.shape)
hidden, cell = lstm2(hidden, (hidden,cell))
print(hidden)
print("==========")
And the output is:
lstm weight_ih after init: Parameter containing:
tensor([[ 0.6025, -0.1577, -0.0990],
[-0.5255, 0.4554, 0.4651],
[ 0.1428, 0.1414, -0.0291],
[ 0.1248, 0.3465, -0.5053],
[ 0.6295, -0.8635, -0.3394],
[ 0.1072, 0.0786, 0.3427],
[ 0.5352, -0.2032, 0.8816],
[ 0.3727, -0.1608, -0.6332],
[-0.3745, 0.1903, -0.1654],
[-0.0460, -0.2148, 0.7737],
[-0.1980, -0.8980, -0.3470],
[-0.1130, 0.6074, 0.1844]], requires_grad=True)
lstm weight_hh after init: Parameter containing:
tensor([[-0.0719, -0.0122, 0.2626],
[ 0.3887, -0.3044, -0.4356],
[-0.8422, 0.2204, 0.1151],
[ 0.4171, 0.1116, -0.2114],
[ 0.2061, -0.3204, -0.0983],
[ 0.4791, -0.5683, -0.3928],
[-0.3196, -0.1726, -0.0732],
[-0.3058, -0.5667, -0.0211],
[-0.0832, -0.3168, 0.1241],
[-0.4197, 0.0525, 0.0741],
[ 0.3849, 0.0481, -0.3130],
[ 0.5788, 0.6312, -0.3627]], requires_grad=True)
lstm2 weight_ih after init: Parameter containing:
tensor([[ 3.6955e-02, 7.1276e-02, -4.3073e-01],
[-5.2666e-01, 2.7323e-02, 1.2894e-01],
[ 3.7136e-01, 3.3969e-01, 1.9601e-01],
[ 3.5802e-01, -4.3600e-01, -1.7962e-01],
[ 8.3209e-01, 1.7189e-01, 2.2195e-01],
[-2.1302e-02, -1.6867e-01, -1.3460e-01],
[ 1.3446e-01, 1.7708e-01, -5.6676e-01],
[-2.3697e-01, -2.8254e-02, -2.2063e-01],
[-2.0928e-01, 3.4973e-01, 3.5858e-04],
[-5.0565e-01, -6.8619e-02, 3.7702e-01],
[-9.0796e-02, -1.7238e-01, 4.7868e-01],
[-1.1565e-01, -6.7956e-02, -2.1049e-01]], requires_grad=True)
lstm2 weight_hh after init: Parameter containing:
tensor([[-0.3017, -0.0811, -0.6554],
[ 0.2665, -0.2052, -0.0577],
[ 0.5493, -0.5094, 0.2167],
[ 0.1210, -0.3868, -0.2293],
[-0.0991, 0.6744, -0.0114],
[-0.0343, -0.6136, 0.4856],
[ 0.0505, 0.3920, -0.1662],
[ 0.1163, -0.1296, 0.2505],
[-0.1373, -0.8803, -0.4666],
[-0.0230, -0.0346, -0.8451],
[ 0.2032, 0.1847, -0.0758],
[ 0.2533, 0.1532, 0.8224]], requires_grad=True)
inputs: [tensor([[1.5381, 1.4673, 1.5951]]), tensor([[-1.5279, 1.0156, -0.2020]]), tensor([[-1.2865, 0.8231, -0.6101]]), tensor([[-1.2960, -0.9434, 0.6684]]), tensor([[ 1.1628, -0.3229, 1.8782]])]
idx: 0
tensor([[-0.0152, -0.0344, 0.0368]], grad_fn=<MulBackward0>)
==========
idx: 1
tensor([[-0.0265, -0.0143, 0.0730]], grad_fn=<MulBackward0>)
==========
idx: 2
tensor([[-0.0210, -0.0033, 0.0529]], grad_fn=<MulBackward0>)
==========
idx: 3
tensor([[-0.0580, -0.0201, 0.1194]], grad_fn=<MulBackward0>)
==========
idx: 4
tensor([[-0.0672, -0.0801, 0.1165]], grad_fn=<MulBackward0>)
==========

Although you initialized two LSTMs, obviously the initial weights of the two are different. You can verify this with the following code:
for p in lstm.parameters():
print(p)
I may prefer the first method, because this method does not require us to manually link between multiple layers.

I have the answer now. At the very beginning, I was confused with the hidden state and input state of the second lstm layer.
Thus, for stacked lstm with num_layers=2, we initialize the hidden states with the number of 2, since each lstm layer needs the initial hidden state, while the second lstm layer takes the output hidden state of the first lstm layer as its input.
And for the model containing individual lstm, since, for the above-stacked lstm model, each lstm layer has the initial hidden states being 0, thus, we should initialize the two individual lstms to both have zero hidden states.
In addition, I made a mistake to initialize the weight and bias values.
As a result, to make the above two methods have the same outputs, I use the following codes:
the first method:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
torch.manual_seed(1)
lstm = nn.LSTM(3, 3,2) # Input dim is 3, output dim is 3
inputs = [torch.randn(1, 3) for _ in range(5)] # make a sequence of length 5
weight_ih_0=None
weight_hh_0=None
# bias_ih_0=None
# bias_hh_0=None
weight_ih_1=None
weight_hh_1=None
# bias_ih_1=None
# bias_hh_1=None
for name, param in lstm.named_parameters():
if 'bias' in name:
# print(f'bias {name} before init: {param}')
nn.init.constant_(param, 0.0)
# print(f'bias {name} after init: {param}')
elif 'weight' in name:
# print(f'weight {name} before init: {param}')
nn.init.xavier_normal_(param)
print(f'weight {name} after init: {param}')
for name, param in lstm.named_parameters():
if 'weight_ih_l0' in name:
weight_ih_0=param
if 'weight_hh_l0' in name:
weight_hh_0=param
if 'weight_ih_l1' in name:
weight_ih_1=param
if 'weight_hh_l1' in name:
weight_hh_1=param
print(f'inputs: {inputs}')
# initialize the hidden state.
hidden = (torch.zeros(2, 1, 3),
torch.zeros(2, 1, 3))
idx=0
for i in inputs:
print(f'idx: {idx}')
# print(f'i: {i}')
idx+=1
# Step through the sequence one element at a time.
# after each step, hidden contains the hidden
out, hidden = lstm(i.view(1, 1, -1), hidden)
print(out)
# print(hidden)
print("==========")
And the output is:
weight weight_ih_l0 after init: Parameter containing:
tensor([[ 0.6025, -0.1577, -0.0990],
[-0.5255, 0.4554, 0.4651],
[ 0.1428, 0.1414, -0.0291],
[ 0.1248, 0.3465, -0.5053],
[ 0.6295, -0.8635, -0.3394],
[ 0.1072, 0.0786, 0.3427],
[ 0.5352, -0.2032, 0.8816],
[ 0.3727, -0.1608, -0.6332],
[-0.3745, 0.1903, -0.1654],
[-0.0460, -0.2148, 0.7737],
[-0.1980, -0.8980, -0.3470],
[-0.1130, 0.6074, 0.1844]], requires_grad=True)
weight weight_hh_l0 after init: Parameter containing:
tensor([[-0.0719, -0.0122, 0.2626],
[ 0.3887, -0.3044, -0.4356],
[-0.8422, 0.2204, 0.1151],
[ 0.4171, 0.1116, -0.2114],
[ 0.2061, -0.3204, -0.0983],
[ 0.4791, -0.5683, -0.3928],
[-0.3196, -0.1726, -0.0732],
[-0.3058, -0.5667, -0.0211],
[-0.0832, -0.3168, 0.1241],
[-0.4197, 0.0525, 0.0741],
[ 0.3849, 0.0481, -0.3130],
[ 0.5788, 0.6312, -0.3627]], requires_grad=True)
weight weight_ih_l1 after init: Parameter containing:
tensor([[ 3.6955e-02, 7.1276e-02, -4.3073e-01],
[-5.2666e-01, 2.7323e-02, 1.2894e-01],
[ 3.7136e-01, 3.3969e-01, 1.9601e-01],
[ 3.5802e-01, -4.3600e-01, -1.7962e-01],
[ 8.3209e-01, 1.7189e-01, 2.2195e-01],
[-2.1302e-02, -1.6867e-01, -1.3460e-01],
[ 1.3446e-01, 1.7708e-01, -5.6676e-01],
[-2.3697e-01, -2.8254e-02, -2.2063e-01],
[-2.0928e-01, 3.4973e-01, 3.5858e-04],
[-5.0565e-01, -6.8619e-02, 3.7702e-01],
[-9.0796e-02, -1.7238e-01, 4.7868e-01],
[-1.1565e-01, -6.7956e-02, -2.1049e-01]], requires_grad=True)
weight weight_hh_l1 after init: Parameter containing:
tensor([[-0.3017, -0.0811, -0.6554],
[ 0.2665, -0.2052, -0.0577],
[ 0.5493, -0.5094, 0.2167],
[ 0.1210, -0.3868, -0.2293],
[-0.0991, 0.6744, -0.0114],
[-0.0343, -0.6136, 0.4856],
[ 0.0505, 0.3920, -0.1662],
[ 0.1163, -0.1296, 0.2505],
[-0.1373, -0.8803, -0.4666],
[-0.0230, -0.0346, -0.8451],
[ 0.2032, 0.1847, -0.0758],
[ 0.2533, 0.1532, 0.8224]], requires_grad=True)
inputs: [tensor([[1.5381, 1.4673, 1.5951]]), tensor([[-1.5279, 1.0156, -0.2020]]), tensor([[-1.2865, 0.8231, -0.6101]]), tensor([[-1.2960, -0.9434, 0.6684]]), tensor([[ 1.1628, -0.3229, 1.8782]])]
idx: 0
tensor([[[ 0.0374, -0.0085, -0.0240]]], grad_fn=<StackBackward>)
==========
idx: 1
tensor([[[ 0.0073, -0.0110, -0.0296]]], grad_fn=<StackBackward>)
==========
idx: 2
tensor([[[-0.0314, -0.0147, -0.0136]]], grad_fn=<StackBackward>)
==========
idx: 3
tensor([[[-0.0458, -0.0118, -0.0254]]], grad_fn=<StackBackward>)
==========
idx: 4
tensor([[[-0.0096, -0.0281, -0.0440]]], grad_fn=<StackBackward>)
==========
The second method:
torch.manual_seed(1)
lstm = nn.LSTM(3, 3,1) # Input dim is 3, output dim is 3
lstm2 = nn.LSTM(3, 3,1) # Input dim is 3, output dim is 3
inputs = [torch.randn(1, 3) for _ in range(5)] # make a sequence of length 5
print(f'inputs: {inputs}')
# initialize the hidden state.
hidden1 = (torch.zeros(1, 1, 3),
torch.zeros(1, 1, 3))
hidden2 = (torch.zeros(1, 1, 3),
torch.zeros(1, 1, 3))
for name, param in lstm.named_parameters():
if 'bias' in name:
# print(f'lstm bias {name} before init: {param}')
nn.init.constant_(param, 0.0)
# print(f'lstm bias {name} after init: {param}')
elif 'weight' in name:
# print(f'lstm weight {name} before init: {param}')
if 'weight_ih' in name:
lstm.weight_ih_l0.data=weight_ih_0
print(f'lstm {name} after init: {param}')
if 'weight_hh' in name:
lstm.weight_hh_l0.data=weight_hh_0
print(f'lstm {name} after init: {param}')
for name, param in lstm2.named_parameters():
if 'bias' in name:
# print(f'lstm2 bias {name} before init: {param}')
nn.init.constant_(param, 0.0)
# print(f'lstm2 bias {name} after init: {param}')
elif 'weight' in name:
# print(f'lstm2 weight {name} before init: {param}')
if 'weight_ih' in name:
lstm2.weight_ih_l0.data=weight_ih_1
print(f'lstm2 {name} after init: {param}')
if 'weight_hh' in name:
lstm2.weight_hh_l0.data=weight_hh_1
print(f'lstm2 {name} after init: {param}')
for name, param in lstm2.named_parameters():
if 'weight' in name:
# print(f'lstm2 weight {name} before init: {param}')
print(f'lstm2 {name} after init: {param}')
idx=0
for i in inputs:
print(f'idx: {idx}')
idx+=1
# Step through the sequence one element at a time.
# after each step, hidden contains the hidden
out, hidden1 = lstm(i.view(1, 1, -1), hidden1)
out, hidden2 = lstm2(out.view(1, 1, -1), hidden2)
print(out)
print("==========")
And the output is:
inputs: [tensor([[1.5381, 1.4673, 1.5951]]), tensor([[-1.5279, 1.0156, -0.2020]]), tensor([[-1.2865, 0.8231, -0.6101]]), tensor([[-1.2960, -0.9434, 0.6684]]), tensor([[ 1.1628, -0.3229, 1.8782]])]
lstm weight_ih_l0 after init: Parameter containing:
tensor([[ 0.6025, -0.1577, -0.0990],
[-0.5255, 0.4554, 0.4651],
[ 0.1428, 0.1414, -0.0291],
[ 0.1248, 0.3465, -0.5053],
[ 0.6295, -0.8635, -0.3394],
[ 0.1072, 0.0786, 0.3427],
[ 0.5352, -0.2032, 0.8816],
[ 0.3727, -0.1608, -0.6332],
[-0.3745, 0.1903, -0.1654],
[-0.0460, -0.2148, 0.7737],
[-0.1980, -0.8980, -0.3470],
[-0.1130, 0.6074, 0.1844]], requires_grad=True)
lstm weight_hh_l0 after init: Parameter containing:
tensor([[-0.0719, -0.0122, 0.2626],
[ 0.3887, -0.3044, -0.4356],
[-0.8422, 0.2204, 0.1151],
[ 0.4171, 0.1116, -0.2114],
[ 0.2061, -0.3204, -0.0983],
[ 0.4791, -0.5683, -0.3928],
[-0.3196, -0.1726, -0.0732],
[-0.3058, -0.5667, -0.0211],
[-0.0832, -0.3168, 0.1241],
[-0.4197, 0.0525, 0.0741],
[ 0.3849, 0.0481, -0.3130],
[ 0.5788, 0.6312, -0.3627]], requires_grad=True)
lstm2 weight_ih_l0 after init: Parameter containing:
tensor([[ 3.6955e-02, 7.1276e-02, -4.3073e-01],
[-5.2666e-01, 2.7323e-02, 1.2894e-01],
[ 3.7136e-01, 3.3969e-01, 1.9601e-01],
[ 3.5802e-01, -4.3600e-01, -1.7962e-01],
[ 8.3209e-01, 1.7189e-01, 2.2195e-01],
[-2.1302e-02, -1.6867e-01, -1.3460e-01],
[ 1.3446e-01, 1.7708e-01, -5.6676e-01],
[-2.3697e-01, -2.8254e-02, -2.2063e-01],
[-2.0928e-01, 3.4973e-01, 3.5858e-04],
[-5.0565e-01, -6.8619e-02, 3.7702e-01],
[-9.0796e-02, -1.7238e-01, 4.7868e-01],
[-1.1565e-01, -6.7956e-02, -2.1049e-01]], requires_grad=True)
lstm2 weight_hh_l0 after init: Parameter containing:
tensor([[-0.3017, -0.0811, -0.6554],
[ 0.2665, -0.2052, -0.0577],
[ 0.5493, -0.5094, 0.2167],
[ 0.1210, -0.3868, -0.2293],
[-0.0991, 0.6744, -0.0114],
[-0.0343, -0.6136, 0.4856],
[ 0.0505, 0.3920, -0.1662],
[ 0.1163, -0.1296, 0.2505],
[-0.1373, -0.8803, -0.4666],
[-0.0230, -0.0346, -0.8451],
[ 0.2032, 0.1847, -0.0758],
[ 0.2533, 0.1532, 0.8224]], requires_grad=True)
lstm2 weight_ih_l0 after init: Parameter containing:
tensor([[ 3.6955e-02, 7.1276e-02, -4.3073e-01],
[-5.2666e-01, 2.7323e-02, 1.2894e-01],
[ 3.7136e-01, 3.3969e-01, 1.9601e-01],
[ 3.5802e-01, -4.3600e-01, -1.7962e-01],
[ 8.3209e-01, 1.7189e-01, 2.2195e-01],
[-2.1302e-02, -1.6867e-01, -1.3460e-01],
[ 1.3446e-01, 1.7708e-01, -5.6676e-01],
[-2.3697e-01, -2.8254e-02, -2.2063e-01],
[-2.0928e-01, 3.4973e-01, 3.5858e-04],
[-5.0565e-01, -6.8619e-02, 3.7702e-01],
[-9.0796e-02, -1.7238e-01, 4.7868e-01],
[-1.1565e-01, -6.7956e-02, -2.1049e-01]], requires_grad=True)
lstm2 weight_hh_l0 after init: Parameter containing:
tensor([[-0.3017, -0.0811, -0.6554],
[ 0.2665, -0.2052, -0.0577],
[ 0.5493, -0.5094, 0.2167],
[ 0.1210, -0.3868, -0.2293],
[-0.0991, 0.6744, -0.0114],
[-0.0343, -0.6136, 0.4856],
[ 0.0505, 0.3920, -0.1662],
[ 0.1163, -0.1296, 0.2505],
[-0.1373, -0.8803, -0.4666],
[-0.0230, -0.0346, -0.8451],
[ 0.2032, 0.1847, -0.0758],
[ 0.2533, 0.1532, 0.8224]], requires_grad=True)
idx: 0
tensor([[[ 0.0374, -0.0085, -0.0240]]], grad_fn=<StackBackward>)
==========
idx: 1
tensor([[[ 0.0073, -0.0110, -0.0296]]], grad_fn=<StackBackward>)
==========
idx: 2
tensor([[[-0.0314, -0.0147, -0.0136]]], grad_fn=<StackBackward>)
==========
idx: 3
tensor([[[-0.0458, -0.0118, -0.0254]]], grad_fn=<StackBackward>)
==========
idx: 4
tensor([[[-0.0096, -0.0281, -0.0440]]], grad_fn=<StackBackward>)
==========

ValueError: Graph disconnected: cannot obtain value for tensor Tensor in keras, how can I fix it?

I try to build a model that looks like a encoder-decoder model, but the difference is that I use the same LSTM model in both for loops.
To merge hidden states from t-1 and t-2, I built a middle layer called MyLayer. But following error always raises:
ValueError: Graph disconnected: cannot obtain value for tensor Tensor("lstm_layer_18/Identity_1:0", shape=(8, 128), dtype=float32) at layer "my_layer". The following previous layers were accessed without issue: ['input_1', 'embedding', 'input_3', 'input_2', 'lambda1', 'reshape1']
I tried whole afternoon debugging, but couldn't figured out.
I find most of answers is about avoiding Input layer names for intermediate parameters. But this method does not work for me.
I try to print out "lstm_layer_18/Identity_1:0" in the error, which is the state_h in the second for loop. But I can not fix it. Can anyone help me find error? My code is showed down below.
class MyLayer(Layer):
def __init__(self, output_dim, **kwargs):
self.output_dim = output_dim
super(MyLayer, self).__init__(**kwargs)
def build(self, input_shape):
assert isinstance(input_shape, list)
# 为该层创建一个可训练的权重
self.kernel_1 = self.add_weight(name='kernel_1',
shape=input_shape[0],
initializer='uniform',
trainable=True)
self.kernel_2 = self.add_weight(name='kernel_2',
shape=input_shape[1],
initializer='uniform',
trainable=True)
super(MyLayer, self).build(input_shape) # 一定要在最后调用它
def call(self, v):
assert isinstance(v, list)
a, b = v
return Add()([Multiply()([a, self.kernel_1]),
Multiply()([b, self.kernel_2])])
def compute_output_shape(self, input_shape):
assert isinstance(input_shape, list)
shape_a, shape_b = input_shape
assert shape_a == shape_b
return shape_a
def cf_lstm_model(input_step, output_step, features=2, embed_dim=64, lstm_dim=128, batch_size=8):
# input_shape:(batch_size, input_step, features)
x = Input(batch_shape=(batch_size, input_step, features))
h_1 = Input(batch_shape=(batch_size, lstm_dim))
h_2 = Input(batch_shape=(batch_size, lstm_dim))
c = Input(batch_shape=(batch_size, lstm_dim))
state_c = c
embedding = Dense(embed_dim, activation='relu', name='embedding')
inverse_embedding = Dense(features, activation='relu', name='inverse_embedding')
lstm_layer = LSTM(lstm_dim, activation='relu', return_state=True, name='lstm_layer')
cascade = MyLayer(lstm_dim)
# decoder = LSTM(lstm_dim, activation='relu', return_state=True, name='decoder')
# embeding ---> (batch_size, input_step, embed_dim)
emb_x = embedding(x)
# cascade_input
cascade_input = [h_2, h_1]
# Observe
for i in range(input_step - 1):
h_star = cascade(cascade_input)
m = Lambda(lambda xx: xx[:, i, :], name='lambda1')(emb_x)
m1 = Reshape((1, embed_dim), name='reshape1')(m)
lstm1, state_h, state_c = lstm_layer(m1, [h_star, state_c])
cascade_input.pop()
cascade_input.append(state_h)
out = Lambda(lambda xx: xx[:, input_step - 1, :], name='lambda2')(emb_x)
out = Reshape((1, embed_dim), name='reshape2')(out)
# Predict
for j in range(output_step):
h_star = cascade(cascade_input)
out, state_h, state_c = lstm_layer(out, [h_star, state_c])
# print('state_h', state_h)
print('state_c', state_c)
out = inverse_embedding(out)
if j == 0:
all_outputs = Reshape((1, features), name='reshape3')(out)
# print('output shape:', out.shape, K.int_shape(out))
else:
# 注意axis的纬度
all_outputs = K.concatenate([all_outputs, Reshape((1, features))(out)], axis=1)
# print('output shape:', K.int_shape(outputs))
cascade_input.pop()
cascade_input.append(state_h)
out = embedding(out)
out = Reshape((1, embed_dim), name='reshape4')(out)
# print('out', out.shape)
return Model(inputs=[x, h_1, h_2, c], outputs=all_outputs)
cf_lstm = cf_lstm_model(input_step=8,
output_step=12,
embed_dim=64,
lstm_dim=128)
Error Feedback
state_c Tensor("lstm_layer_7/Identity_2:0", shape=(8, 128), dtype=float32)
state_c Tensor("lstm_layer_8/Identity_2:0", shape=(8, 128), dtype=float32)
state_c Tensor("lstm_layer_9/Identity_2:0", shape=(8, 128), dtype=float32)
state_c Tensor("lstm_layer_10/Identity_2:0", shape=(8, 128), dtype=float32)
state_c Tensor("lstm_layer_11/Identity_2:0", shape=(8, 128), dtype=float32)
state_c Tensor("lstm_layer_12/Identity_2:0", shape=(8, 128), dtype=float32)
state_c Tensor("lstm_layer_13/Identity_2:0", shape=(8, 128), dtype=float32)
state_c Tensor("lstm_layer_14/Identity_2:0", shape=(8, 128), dtype=float32)
state_c Tensor("lstm_layer_15/Identity_2:0", shape=(8, 128), dtype=float32)
state_c Tensor("lstm_layer_16/Identity_2:0", shape=(8, 128), dtype=float32)
state_c Tensor("lstm_layer_17/Identity_2:0", shape=(8, 128), dtype=float32)
state_c Tensor("lstm_layer_18/Identity_2:0", shape=(8, 128), dtype=float32)
Traceback (most recent call last):
File "/Users/duoshaoshishi/Desktop/Pedestrian prediction/CF-lstm/model.py", line 105, in <module>
cf_lstm = cf_lstm_model(input_step=8,
File "/Users/duoshaoshishi/Desktop/Pedestrian prediction/CF-lstm/model.py", line 102, in cf_lstm_model
return Model(inputs=[x, h_1, h_2, c], outputs=all_outputs)
File "/Users/duoshaoshishi/opt/anaconda3/envs/tf2/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py", line 167, in __init__
super(Model, self).__init__(*args, **kwargs)
File "/Users/duoshaoshishi/opt/anaconda3/envs/tf2/lib/python3.8/site-packages/tensorflow/python/keras/engine/network.py", line 173, in __init__
self._init_graph_network(*args, **kwargs)
File "/Users/duoshaoshishi/opt/anaconda3/envs/tf2/lib/python3.8/site-packages/tensorflow/python/training/tracking/base.py", line 456, in _method_wrapper
result = method(self, *args, **kwargs)
File "/Users/duoshaoshishi/opt/anaconda3/envs/tf2/lib/python3.8/site-packages/tensorflow/python/keras/engine/network.py", line 306, in _init_graph_network
nodes, nodes_by_depth, layers, _ = _map_graph_network(
File "/Users/duoshaoshishi/opt/anaconda3/envs/tf2/lib/python3.8/site-packages/tensorflow/python/keras/engine/network.py", line 1787, in _map_graph_network
raise ValueError('Graph disconnected: '
ValueError: Graph disconnected: cannot obtain value for tensor Tensor("lstm_layer_18/Identity_1:0", shape=(8, 128), dtype=float32) at layer "my_layer". The following previous layers were accessed without issue: ['input_1', 'embedding', 'input_3', 'input_2', 'lambda1', 'reshape1']

ValueError: Python inputs incompatible with input_signature:

System information
OS Platform and Distribution :CentOS Linux release 7.7.1908
-TensorFlow version:2.3.0
I am following this example:https://www.tensorflow.org/tutorials/text/image_captioning?hl=en
It is working as it should be and saving checkpoints and I want to now convert this to a TF Lite model.
Here is the Link of full convert code:https://colab.research.google.com/drive/1GJkGcwWvDAWMooTsECzuSRUSPbirADhb?usp=sharing
Here is the Link of full train code:
https://colab.research.google.com/drive/1X2d9WW1EMEzN8Rgva3rtjevP0T_jFccj?usp=sharing
I also following the isssue#32999
Here is what I am running to save and them convert the inference graph:
#tf.function
def evaluate(image):
hidden = decoder.reset_states(batch_size=1)
temp_input = tf.expand_dims(load_image(image)[0], 0)
img_tensor_val = image_features_extract_model(temp_input)
img_tensor_val = tf.reshape(img_tensor_val, (img_tensor_val.shape[0], -1, img_tensor_val.shape[3]))
features = encoder(img_tensor_val)
dec_input = tf.expand_dims([tokenizer.word_index['<start>']], 0)
result = []
for i in range(max_length):
predictions, hidden, attention_weights = decoder(dec_input, features, hidden)
predicted_id = tf.random.categorical(predictions, 1)[0][0]
# print(tokenizer.index_word)
print(predicted_id,predicted_id.dtype)
# for key,value in tokenizer.index_word.items():
# key = tf.convert_to_tensor(key)
# tf.dtypes.cast(key,tf.int64)
# print(key)
# print(tokenizer.index_word)
result.append(predicted_id)
# if tokenizer.index_word[predicted_id] == '<end>':
# return result
dec_input = tf.expand_dims([predicted_id], 0)
return result
export_dir = "./"
tflite_enc_input = ''
ckpt.f = evaluate
to_save = evaluate.get_concrete_function('')
converter = tf.lite.TFLiteConverter.from_concrete_functions([to_save])
tflite_model = converter.convert()
but I get this error
ValueError: in user code:
convert2savedmodel.py:310 evaluate *
predictions, hidden, attention_weights = decoder(dec_input, features, hidden)
/share/nishome/19930072_0/miniconda3/envs/tf2.3/lib/python3.7/site-packages/tensorflow/python/keras/engine/base_layer.py:985 __call__ **
outputs = call_fn(inputs, *args, **kwargs)
/share/nishome/19930072_0/miniconda3/envs/tf2.3/lib/python3.7/site-packages/tensorflow/python/eager/def_function.py:780 __call__
result = self._call(*args, **kwds)
/share/nishome/19930072_0/miniconda3/envs/tf2.3/lib/python3.7/site-packages/tensorflow/python/eager/def_function.py:840 _call
return self._stateless_fn(*args, **kwds)
/share/nishome/19930072_0/miniconda3/envs/tf2.3/lib/python3.7/site-packages/tensorflow/python/eager/function.py:2828 __call__
graph_function, args, kwargs = self._maybe_define_function(args, kwargs)
/share/nishome/19930072_0/miniconda3/envs/tf2.3/lib/python3.7/site-packages/tensorflow/python/eager/function.py:3171 _maybe_define_function
*args, **kwargs)
/share/nishome/19930072_0/miniconda3/envs/tf2.3/lib/python3.7/site-packages/tensorflow/python/eager/function.py:2622 canonicalize_function_inputs
self._flat_input_signature)
/share/nishome/19930072_0/miniconda3/envs/tf2.3/lib/python3.7/site-packages/tensorflow/python/eager/function.py:2713 _convert_inputs_to_signature
format_error_message(inputs, input_signature))
ValueError: Python inputs incompatible with input_signature:
inputs: (
Tensor("ExpandDims_1:0", shape=(1, 1), dtype=int32),
Tensor("cnn__encoder/StatefulPartitionedCall:0", shape=(1, 64, 256), dtype=float32),
Tensor("zeros:0", shape=(1, 512), dtype=float32))
input_signature: (
TensorSpec(shape=(1, 1), dtype=tf.int64, name=None),
TensorSpec(shape=(1, 64, 256), dtype=tf.float32, name=None),
TensorSpec(shape=(1, 512), dtype=tf.float32, name=None))
Encoder Model:
class CNN_Encoder(tf.keras.Model):
def __init__(self, embedding):
super(CNN_Encoder, self).__init__()
# shape after fc == (batch_size, 64, embedding_dim)
self.fc = tf.keras.layers.Dense(embedding_dim)
#tf.function(input_signature=[tf.TensorSpec(shape=(1, 64, features_shape),dtype=tf.dtypes.float32)])
def call(self, x):
x = self.fc(x)
x = tf.nn.relu(x)
return x
Decoder model:
class RNN_Decoder(tf.keras.Model):
def __init__(self, embedding_dim, units, vocab_size):
super(RNN_Decoder, self).__init__()
self.units = units
self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
self.gru = tf.keras.layers.GRU(self.units,
return_sequences=True,
return_state=True,
recurrent_initializer='glorot_uniform',
unroll = True)
self.fc1 = tf.keras.layers.Dense(self.units)
self.fc2 = tf.keras.layers.Dense(vocab_size)
self.attention = BahdanauAttention(self.units)
#tf.function(input_signature=[tf.TensorSpec(shape=[1, 1], dtype=tf.int64),
tf.TensorSpec(shape=[1, 64, 256], dtype=tf.float32),
tf.TensorSpec(shape=[1, 512], dtype=tf.float32)])
def call(self, x , features, hidden):
context_vector, attention_weights = self.attention(features, hidden)
#x shape after passing through embedding == (batch_size, 1, embedding_dim)
x = self.embedding(x)
#x shape after concatenation == (batch_size, 1, embedding_dim + hidden_size)
x = tf.concat([tf.expand_dims(context_vector, 1), x], axis=-1)
output, state = self.gru(x)
#shape == (batch_size, max_length, hidden_size)
x = self.fc1(output)
#x shape == (batch_size, max_length, hidden_size)
x = tf.reshape(x, (-1, x.shape[2]))
# output shape == (batch_size * max_length, vocab)
x = self.fc2(x)
return x, state, attention_weights
def reset_states(self, batch_size):
return tf.zeros((batch_size, self.units))
I just change the tf.function to int32 as below:
#tf.function(input_signature=[tf.TensorSpec(shape=[1, 1], dtype=tf.int32), tf.TensorSpec(shape=[1, 64,256], dtype=tf.float32), tf.TensorSpec(shape=[1, 512], dtype=tf.float32)])
but another error came:
ValueError: Python inputs incompatible with input_signature:
Tensor("ExpandDims_2:0", shape=(1, 1), dtype=int64),
Tensor("cnn__encoder/StatefulPartitionedCall:0", shape=(1, 64, 256), dtype=float32),
Tensor("rnn__decoder/StatefulPartitionedCall:1", shape=(1, 512), dtype=float32))
input_signature: (
TensorSpec(shape=(1, 1), dtype=tf.int32, name=None),
TensorSpec(shape=(1, 64, 256), dtype=tf.float32, name=None),
TensorSpec(shape=(1, 512), dtype=tf.float32, name=None))```
Why the dtypes of inputs change from int64 to int32?

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Tensorflow saving subclass model which has multiple arguments to call() method - python

Related

Custom activation function in Tensorflow with trainable params

tf.GradientTape giving None gradient

Multi lstm layers and multi lstm in pytorch

ValueError: Graph disconnected: cannot obtain value for tensor Tensor in keras, how can I fix it?

ValueError: Python inputs incompatible with input_signature:

Categories

Resources