Conv1dTranspose creates the wrong dimensions - python

I'm trying to build an undercomplete autoencoder for music dimensionality reduction. My Autoencoder class is modular, I can give in input a list of convlayers sizes and it creates me automatically the model. The problem is that when I try to create a model with more than 2 convolutional layers the decoder returns me the wrong dimensions. Here there is my AE model:
class Autoencoder(K.Model):
"""
Modular Autoencoder class
"""
def __init__(self,
in_shape: Tuple[int,int,int],
num_hidden_layers: int,
activation_func: str = "relu",
hidden_activation: str = "sigmoid",
conv_layers_sizes: List[int] = [128, 64]):
super(Autoencoder, self).__init__()
# Structure of the model
self.in_shape = in_shape
self.latent_dim = num_hidden_layers
self.conv_layers_sizes = conv_layers_sizes
self.hidden_activation = hidden_activation
# hyperparameters of the model
self.activation_func = activation_func
# build encoder and decoder
self.encoder = self._build_encoder()
self.decoder = self._build_decoder()
#self.last_cLayer_size = None
def summary(self):
"""
Function used to show the summary of the Autoencoder
"""
self.encoder.summary()
self.decoder.summary()
def _create_conv_layer(self,
n_filters: int,
enc_model) -> None:
"""
_create_conv_layer add a Conv1D layer to a given model
"""
enc_model.add(layers.Conv1D(n_filters,
kernel_size=3,
activation=self.activation_func,
padding='same',
strides=2,
name=f"Conv{n_filters}"))
enc_model.add(layers.BatchNormalization())
# try to not use max pooling
#enc_model.add(layers.MaxPool1D(2, strides=1,
# padding="same",
# name=f"BtchNorm_{n_filters}"))
def _create_deconv_layer(self,
n_filters: int,
decon_model) -> None:
"""
_create_deconv_layer add a Conv1DTranspose layer to a given model
"""
decon_model.add(layers.Conv1DTranspose(
n_filters,
kernel_size=3,
strides=2,
activation=self.activation_func,
padding='same',
name=f"ConvTransp{n_filters}"))
def _build_encoder(self) -> K.Sequential:
"""
_build_encoder creates the encoder
"""
model_encoder = K.Sequential(name="encoder")
model_encoder.add(layers.InputLayer(input_shape=self.in_shape))
# create encoder
for layer_size in self.conv_layers_sizes:
self._create_conv_layer(n_filters=layer_size,
enc_model=model_encoder)
# get the dimensions of the last layer in order to paste it into the
# decoder
last_layer = model_encoder.layers[-2]
_, *self.last_cLayer_size = last_layer.output_shape
#model_encoder.add(layers.GlobalAveragePooling2D(name="Flatten"))
model_encoder.add(layers.Flatten(name="Flatten"))
# hidden layers
model_encoder.add(layers.Dense(self.latent_dim,
activation=self.hidden_activation,
name=f"hidden_unit_{self.latent_dim}"))
return model_encoder
def _build_decoder(self) -> K.Sequential:
"""
_build_decoder creates the decoder
"""
model_decoder = K.Sequential(name="decoder")
model_decoder.add(layers.InputLayer(input_shape=self.latent_dim,
name="decoder_input"))
# calculate the dimension of the dense layer after the hidden
# representations
dense_to_reshape = 1
for dim in self.last_cLayer_size:
dense_to_reshape *= dim
# create a trainable dense layer with dimension adeguate to be reshaped
# in the same shape of the last layer of the encoder
model_decoder.add(layers.Dense(dense_to_reshape,
activation=self.activation_func))
model_decoder.add(layers.Dropout(0.8))
model_decoder.add(layers.Reshape(self.last_cLayer_size))
# create deconvolutions
for layer_size in reversed(self.conv_layers_sizes):
self._create_deconv_layer(layer_size, model_decoder)
model_decoder.add(layers.Conv1D(self.in_shape[-1],
kernel_size=3,
activation='sigmoid',
padding='same',
name=f"last_layer"))
return model_decoder
def call(self, x):
"""
Adapts the call method to the Autoencoder.
In this case call just reapplies all ops in the graph to the new inputs
(e.g. build a new computational graph from the provided inputs).
"""
encoded = self.encoder(x)
decoded = self.decoder(encoded)
return decoded
here there are two examples of the output:
AE_model = Autoencoder(in_shape=(20, 862),
num_hidden_layers=2,
conv_layers_sizes=[128, 64],
activation_func="relu",
hidden_activation="sigmoid")
AE_model.summary()
OUT:
Model: "encoder"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
Conv128 (Conv1D) (None, 10, 128) 331136
_________________________________________________________________
batch_normalization_10 (Batc (None, 10, 128) 512
_________________________________________________________________
Conv64 (Conv1D) (None, 5, 64) 24640
_________________________________________________________________
batch_normalization_11 (Batc (None, 5, 64) 256
_________________________________________________________________
Flatten (Flatten) (None, 320) 0
_________________________________________________________________
hidden_unit_2 (Dense) (None, 2) 642
=================================================================
Total params: 357,186
Trainable params: 356,802
Non-trainable params: 384
_________________________________________________________________
Model: "decoder"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
dense_5 (Dense) (None, 320) 960
_________________________________________________________________
dropout_5 (Dropout) (None, 320) 0
_________________________________________________________________
reshape_5 (Reshape) (None, 5, 64) 0
_________________________________________________________________
ConvTransp64 (Conv1DTranspos (None, 10, 64) 12352
_________________________________________________________________
ConvTransp128 (Conv1DTranspo (None, 20, 128) 24704
_________________________________________________________________
last_layer (Conv1D) (None, 20, 862) 331870
=================================================================
AE_model = Autoencoder(in_shape=(20, 862),
num_hidden_layers=2,
conv_layers_sizes=[128, 64, 32],
activation_func="relu",
hidden_activation="sigmoid")
AE_model.summary()
OUT:
Model: "encoder"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
Conv128 (Conv1D) (None, 10, 128) 331136
_________________________________________________________________
batch_normalization_12 (Batc (None, 10, 128) 512
_________________________________________________________________
Conv64 (Conv1D) (None, 5, 64) 24640
_________________________________________________________________
batch_normalization_13 (Batc (None, 5, 64) 256
_________________________________________________________________
Conv32 (Conv1D) (None, 3, 32) 6176
_________________________________________________________________
batch_normalization_14 (Batc (None, 3, 32) 128
_________________________________________________________________
Flatten (Flatten) (None, 96) 0
_________________________________________________________________
hidden_unit_2 (Dense) (None, 2) 194
=================================================================
Total params: 363,042
Trainable params: 362,594
Non-trainable params: 448
_________________________________________________________________
Model: "decoder"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
dense_6 (Dense) (None, 96) 288
_________________________________________________________________
dropout_6 (Dropout) (None, 96) 0
_________________________________________________________________
reshape_6 (Reshape) (None, 3, 32) 0
_________________________________________________________________
ConvTransp32 (Conv1DTranspos (None, 6, 32) 3104
_________________________________________________________________
ConvTransp64 (Conv1DTranspos (None, 12, 64) 6208
_________________________________________________________________
ConvTransp128 (Conv1DTranspo (None, 24, 128) 24704
_________________________________________________________________
last_layer (Conv1D) (None, 24, 862) 331870
=================================================================
Why having more than 2 layers messes up with my final dimensions?

I ended up with a solution on my own. The problem is the stride when the stride is > 1. Setting the stride to be always 1 makes Everything works. An explanation can be found here

Related

AttributeError: 'ResourceVariable' object has no attribute '_distribute_strategy'

I try to implement ELMo word embedding in a Bi-LSTM model. But when I finish my code in Google Colab with tensorflow, I meet some problems.
There is an "AttributeError". I think the architecture of my model is correct, but I don't know where the problem is.
INFO:tensorflow:Saver not created because there are no variables in the graph to restore
INFO:tensorflow:Saver not created because there are no variables in the graph to restore
Model: "model_5"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_6 (InputLayer) [(200, 200)] 0
__________________________________________________________________________________________________
elmo_embedding_5 (ElmoEmbedding (200, 200, 1024) 4 input_6[0][0]
__________________________________________________________________________________________________
dense_11 (Dense) (200, 200, 1024) 1049600 elmo_embedding_5[0][0]
__________________________________________________________________________________________________
bidirectional_3 (Bidirectional) (200, 200, 256) 2623488 dense_11[0][0]
__________________________________________________________________________________________________
tf_op_layer_Tanh_6 (TensorFlowO [(200, 200, 256)] 0 bidirectional_3[0][0]
__________________________________________________________________________________________________
dense_12 (Dense) (200, 200, 1) 257 tf_op_layer_Tanh_6[0][0]
__________________________________________________________________________________________________
tf_op_layer_MatMul_3 (TensorFlo [(200, 1, 256)] 0 dense_12[0][0]
bidirectional_3[0][0]
__________________________________________________________________________________________________
tf_op_layer_Tanh_7 (TensorFlowO [(200, 1, 256)] 0 tf_op_layer_MatMul_3[0][0]
__________________________________________________________________________________________________
flatten_5 (Flatten) (200, 256) 0 tf_op_layer_Tanh_7[0][0]
__________________________________________________________________________________________________
dense_13 (Dense) (200, 1) 257 flatten_5[0][0]
==================================================================================================
Total params: 3,673,606
Trainable params: 3,673,606
Non-trainable params: 0
__________________________________________________________________________________________________
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-13-ec34ae4d734a> in <module>()
56
57 model = ElmoClassification()
---> 58 model.run(data.train_reviews, data.train_labels, batch_size=200, epochs=100)
12 frames
/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/distribute_lib.py in variable_created_in_scope(self, v)
3566
3567 def variable_created_in_scope(self, v):
-> 3568 return v._distribute_strategy is None # pylint: disable=protected-access
3569
3570 def _experimental_distribute_dataset(self, dataset, options):
AttributeError: 'ResourceVariable' object has no attribute '_distribute_strategy'

FailedPreconditionError while using DDPG RL algorithm, in python, with keras, keras-rl2

I am training a DDPG agent on my custom environment that I wrote using openai gym. I am getting error during training the model.
When I search for a solution on web, I found that some people who faced similar issue were able to resolve it by initializing the variable.
For example by using:
tf.global_variable_initialzer()
But I am using tensorflow version 2.5.0 which does not have this method. Which means there should be some other way to solve this error. But I am unable to find the solution.
Here are the libraries that I used with there versions
tensorflow: 2.5.0
gym: 0.18.3
numpy: 1.19.5
keras: 2.4.3
keras-rl2: 1.0.5 DDPG agent comes from this library
Error/Stacktrace:
Training for 1000 steps ...
Interval 1 (0 steps performed)
17/10000 [..............................] - ETA: 1:04 - reward: 256251545.0121
C:\Users\vchou\anaconda3\envs\AdSpendProblem\lib\site-packages\keras\engine\training.py:2401: UserWarning: `Model.state_updates` will be removed in a future version. This property should not be used in TensorFlow 2.0, as `updates` are applied automatically.
warnings.warn('`Model.state_updates` will be removed in a future version. '
100/10000 [..............................] - ETA: 1:03 - reward: 272267266.5754
C:\Users\vchou\anaconda3\envs\AdSpendProblem\lib\site-packages\tensorflow\python\keras\engine\training.py:2426: UserWarning: `Model.state_updates` will be removed in a future version. This property should not be used in TensorFlow 2.0, as `updates` are applied automatically.
warnings.warn('`Model.state_updates` will be removed in a future version. '
---------------------------------------------------------------------------
FailedPreconditionError Traceback (most recent call last)
<ipython-input-17-0938aa6056e8> in <module>
1 # Training
----> 2 ddpgAgent.fit(env, 1000, verbose=1, nb_max_episode_steps = 100)
~\anaconda3\envs\AdSpendProblem\lib\site-packages\rl\core.py in fit(self, env, nb_steps, action_repetition, callbacks, verbose, visualize, nb_max_start_steps, start_step_policy, log_interval, nb_max_episode_steps)
191 # Force a terminal state.
192 done = True
--> 193 metrics = self.backward(reward, terminal=done)
194 episode_reward += reward
195
~\anaconda3\envs\AdSpendProblem\lib\site-packages\rl\agents\ddpg.py in backward(self, reward, terminal)
279 state0_batch_with_action = [state0_batch]
280 state0_batch_with_action.insert(self.critic_action_input_idx, action_batch)
--> 281 metrics = self.critic.train_on_batch(state0_batch_with_action, targets)
282 if self.processor is not None:
283 metrics += self.processor.metrics
~\anaconda3\envs\AdSpendProblem\lib\site-packages\keras\engine\training_v1.py in train_on_batch(self, x, y, sample_weight, class_weight, reset_metrics)
1075 self._update_sample_weight_modes(sample_weights=sample_weights)
1076 self._make_train_function()
-> 1077 outputs = self.train_function(ins) # pylint: disable=not-callable
1078
1079 if reset_metrics:
~\anaconda3\envs\AdSpendProblem\lib\site-packages\keras\backend.py in __call__(self, inputs)
4017 self._make_callable(feed_arrays, feed_symbols, symbol_vals, session)
4018
-> 4019 fetched = self._callable_fn(*array_vals,
4020 run_metadata=self.run_metadata)
4021 self._call_fetch_callbacks(fetched[-len(self._fetches):])
~\anaconda3\envs\AdSpendProblem\lib\site-packages\tensorflow\python\client\session.py in __call__(self, *args, **kwargs)
1478 try:
1479 run_metadata_ptr = tf_session.TF_NewBuffer() if run_metadata else None
-> 1480 ret = tf_session.TF_SessionRunCallable(self._session._session,
1481 self._handle, args,
1482 run_metadata_ptr)
FailedPreconditionError: Could not find variable dense_5_1/kernel. This could mean that the variable has been deleted. In TF1, it can also mean the variable is uninitialized. Debug info: container=localhost, status=Not found: Resource localhost/dense_5_1/kernel/class tensorflow::Var does not exist.
[[{{node ReadVariableOp_21}}]]
The actor and critic networks are as follows:
ACTOR NETWORK
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
flatten (Flatten) (None, 10) 0
_________________________________________________________________
dense (Dense) (None, 32) 352
_________________________________________________________________
activation (Activation) (None, 32) 0
_________________________________________________________________
dense_1 (Dense) (None, 32) 1056
_________________________________________________________________
activation_1 (Activation) (None, 32) 0
_________________________________________________________________
dense_2 (Dense) (None, 32) 1056
_________________________________________________________________
activation_2 (Activation) (None, 32) 0
_________________________________________________________________
dense_3 (Dense) (None, 10) 330
_________________________________________________________________
activation_3 (Activation) (None, 10) 0
=================================================================
Total params: 2,794
Trainable params: 2,794
Non-trainable params: 0
_________________________________________________________________
None
CRITIC NETWORK
Model: "model"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
observation_input (InputLayer) [(None, 1, 10)] 0
__________________________________________________________________________________________________
action_input (InputLayer) [(None, 10)] 0
__________________________________________________________________________________________________
flatten_1 (Flatten) (None, 10) 0 observation_input[0][0]
__________________________________________________________________________________________________
concatenate (Concatenate) (None, 20) 0 action_input[0][0]
flatten_1[0][0]
__________________________________________________________________________________________________
dense_4 (Dense) (None, 32) 672 concatenate[0][0]
__________________________________________________________________________________________________
activation_4 (Activation) (None, 32) 0 dense_4[0][0]
__________________________________________________________________________________________________
dense_5 (Dense) (None, 32) 1056 activation_4[0][0]
__________________________________________________________________________________________________
activation_5 (Activation) (None, 32) 0 dense_5[0][0]
__________________________________________________________________________________________________
dense_6 (Dense) (None, 32) 1056 activation_5[0][0]
__________________________________________________________________________________________________
activation_6 (Activation) (None, 32) 0 dense_6[0][0]
__________________________________________________________________________________________________
dense_7 (Dense) (None, 1) 33 activation_6[0][0]
__________________________________________________________________________________________________
activation_7 (Activation) (None, 1) 0 dense_7[0][0]
==================================================================================================
Total params: 2,817
Trainable params: 2,817
Non-trainable params: 0
__________________________________________________________________________________________________
None
Here is the code for DDPG agent
# Create DDPG agent
ddpgAgent = DDPGAgent(
nb_actions = nb_actions,
actor = actor,
critic = critic,
critic_action_input = action_input,
memory = memory,
nb_steps_warmup_critic = 100,
nb_steps_warmup_actor = 100,
random_process = random_process,
gamma = 0.99,
target_model_update = 1e-3
)
ddpgAgent.compile(Adam(learning_rate=0.001, clipnorm=1.0), metrics=['mae'])
For now I was able to solve this error by replacing the imports from keras with imports from tensorflow.keras, although I don't know why keras itseld doesn't work

AssertionError: Could not compute output Tensor("dense_17/Sigmoid:0", shape=(None, 1), dtype=float32)

I'm trying to train DC-CNN model for text classification on a given dataset.
What am I doing wrong here?
Code for Model:
def define_model(length, vocab_size):
# channel 1
inputs1 = Input(shape=(length,))
embedding1 = Embedding(vocab_size, 100)(inputs1)
conv1 = Conv1D(filters=32, kernel_size=4, activation='relu')(embedding1)
drop1 = Dropout(0.5)(conv1)
pool1 = MaxPooling1D(pool_size=1)(drop1)
flat1 = Flatten()(pool1)
# channel 2
inputs2 = Input(shape=(length,))
embedding2 = Embedding(vocab_size, 100)(inputs2)
conv2 = Conv1D(filters=32, kernel_size=6, activation='relu')(embedding2)
drop2 = Dropout(0.5)(conv2)
pool2 = MaxPooling1D(pool_size=1)(drop2)
flat2 = Flatten()(pool2)
merged = concatenate([flat1, flat2])
# interpretation
dense1 = Dense(10, activation='relu')(merged)
outputs = Dense(1, activation='sigmoid')(dense1)
model = Model(inputs=[inputs1, inputs2], outputs=outputs)
# compile
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
# summarize
print(model.summary())
return model
model = define_model(length, vocab_size)
model.fit([trainX], array(trainLabels), epochs=10, batch_size=16)
I am getting this error:
AssertionError: in user code:
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py:806 train_function *
return step_function(self, iterator)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py:796 step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
/usr/local/lib/python3.6/dist-packages/tensorflow/python/distribute/distribute_lib.py:1211 run
return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/distribute/distribute_lib.py:2585 call_for_each_replica
return self._call_for_each_replica(fn, args, kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/distribute/distribute_lib.py:2945 _call_for_each_replica
return fn(*args, **kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py:789 run_step **
outputs = model.train_step(data)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py:747 train_step
y_pred = self(x, training=True)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/base_layer.py:985 __call__
outputs = call_fn(inputs, *args, **kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/functional.py:386 call
inputs, training=training, mask=mask)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/functional.py:517 _run_internal_graph
assert x_id in tensor_dict, 'Could not compute output ' + str(x)
AssertionError: Could not compute output Tensor("dense_17/Sigmoid:0", shape=(None, 1), dtype=float32)
I have tried to reshape the inputs "trainX" and "trainLabels" by using this code but I got the same error
trainX=np.reshape(trainX,(40, 50))
trainLabels=np.reshape(trainLabels,(40, 1))
This is the summary of the model :
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_17 (InputLayer) [(None, 20)] 0
__________________________________________________________________________________________________
input_18 (InputLayer) [(None, 20)] 0
__________________________________________________________________________________________________
embedding_16 (Embedding) (None, 20, 100) 541100 input_17[0][0]
__________________________________________________________________________________________________
embedding_17 (Embedding) (None, 20, 100) 541100 input_18[0][0]
__________________________________________________________________________________________________
conv1d_16 (Conv1D) (None, 17, 32) 12832 embedding_16[0][0]
__________________________________________________________________________________________________
conv1d_17 (Conv1D) (None, 15, 32) 19232 embedding_17[0][0]
__________________________________________________________________________________________________
dropout_16 (Dropout) (None, 17, 32) 0 conv1d_16[0][0]
__________________________________________________________________________________________________
dropout_17 (Dropout) (None, 15, 32) 0 conv1d_17[0][0]
__________________________________________________________________________________________________
max_pooling1d_16 (MaxPooling1D) (None, 17, 32) 0 dropout_16[0][0]
__________________________________________________________________________________________________
max_pooling1d_17 (MaxPooling1D) (None, 15, 32) 0 dropout_17[0][0]
__________________________________________________________________________________________________
flatten_16 (Flatten) (None, 544) 0 max_pooling1d_16[0][0]
__________________________________________________________________________________________________
flatten_17 (Flatten) (None, 480) 0 max_pooling1d_17[0][0]
__________________________________________________________________________________________________
concatenate_8 (Concatenate) (None, 1024) 0 flatten_16[0][0]
flatten_17[0][0]
__________________________________________________________________________________________________
dense_16 (Dense) (None, 10) 10250 concatenate_8[0][0]
__________________________________________________________________________________________________
dense_17 (Dense) (None, 1) 11 dense_16[0][0]
==================================================================================================
Total params: 1,124,525
Trainable params: 1,124,525
Non-trainable params: 0
How can I fix this error Please?
since you have 2 inputs in keras model, so you have to split your trainX in to 2 different arrays, or a tuple of 2 arrays. you cannot give single array as input.

How can I use tf.keras.Model.summary to see the layers of a child model which in a father model?

I have a subclass Model of tf.keras.Model,code is following
import tensorflow as tf
class Mymodel(tf.keras.Model):
def __init__(self, classes, backbone_model, *args, **kwargs):
super(Mymodel, self).__init__(self, args, kwargs)
self.backbone = backbone_model
self.classify_layer = tf.keras.layers.Dense(classes,activation='sigmoid')
def call(self, inputs):
x = self.backbone(inputs)
x = self.classify_layer(x)
return x
inputs = tf.keras.Input(shape=(224, 224, 3))
model = Mymodel(inputs=inputs, classes=61,
backbone_model=tf.keras.applications.MobileNet())
model.build(input_shape=(20, 224, 224, 3))
model.summary()
the result is :
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
mobilenet_1.00_224 (Model) (None, 1000) 4253864
_________________________________________________________________
dense (Dense) multiple 61061
=================================================================
Total params: 4,314,925
Trainable params: 4,293,037
Non-trainable params: 21,888
_________________________________________________________________
but I want to see the all layers of mobilenet,then I tried to extract all layers of mobilenet and put in the model:
import tensorflow as tf
class Mymodel(tf.keras.Model):
def __init__(self, classes, backbone_model, *args, **kwargs):
super(Mymodel, self).__init__(self, args, kwargs)
self.backbone = backbone_model
self.classify_layer = tf.keras.layers.Dense(classes,activation='sigmoid')
def my_process_layers(self,inputs):
layers = self.backbone.layers
tmp_x = inputs
for i in range(1,len(layers)):
tmp_x = layers[i](tmp_x)
return tmp_x
def call(self, inputs):
x = self.my_process_layers(inputs)
x = self.classify_layer(x)
return x
inputs = tf.keras.Input(shape=(224, 224, 3))
model = Mymodel(inputs=inputs, classes=61,
backbone_model=tf.keras.applications.MobileNet())
model.build(input_shape=(20, 224, 224, 3))
model.summary()
then the resule not changed.
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
mobilenet_1.00_224 (Model) (None, 1000) 4253864
_________________________________________________________________
dense (Dense) multiple 61061
=================================================================
Total params: 4,314,925
Trainable params: 4,293,037
Non-trainable params: 21,888
_________________________________________________________________
then I tried to extract one layer insert to the model :
import tensorflow as tf
class Mymodel(tf.keras.Model):
def __init__(self, classes, backbone_model, *args, **kwargs):
super(Mymodel, self).__init__(self, args, kwargs)
self.backbone = backbone_model
self.classify_layer = tf.keras.layers.Dense(classes,activation='sigmoid')
def call(self, inputs):
x = self.backbone.layers[1](inputs)
x = self.classify_layer(x)
return x
inputs = tf.keras.Input(shape=(224, 224, 3))
model = Mymodel(inputs=inputs, classes=61,
backbone_model=tf.keras.applications.MobileNet())
model.build(input_shape=(20, 224, 224, 3))
model.summary()
It did not change either.I am so confused.
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
mobilenet_1.00_224 (Model) (None, 1000) 4253864
_________________________________________________________________
dense (Dense) multiple 244
=================================================================
Total params: 4,254,108
Trainable params: 4,232,220
Non-trainable params: 21,888
_________________________________________________________________
but I find that the parameter of dense layer changed,I dont know what happend.
#Ioannis 's answer is perfectly fine, but unfortunately it drops the keras 'Model Subclassing' structure that is present in the question. If, just like me, you want to keep this model subclassing and still show all layers in the summary, you can branch down into all the individual layers of the more complex model using a for loop:
class MyMobileNet(tf.keras.Sequential):
def __init__(self, input_shape=(224, 224, 3), classes=61):
super(MyMobileNet, self).__init__()
self.backbone_model = [layer for layer in
tf.keras.applications.MobileNet(input_shape, include_top=False, pooling='avg').layers]
self.classificator = tf.keras.layers.Dense(classes,activation='sigmoid', name='classificator')
def call(self, inputs):
x = inputs
for layer in self.backbone_model:
x = layer(x)
x = self.classificator(x)
return x
model = MyMobileNet()
After this we can directly build the model and call the summary:
model.build(input_shape=(None, 224, 224, 3))
model.summary()
>
Model: "my_mobile_net"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv1_pad (ZeroPadding2D) (None, 225, 225, 3) 0
_________________________________________________________________
conv1 (Conv2D) (None, 112, 112, 32) 864
_________________________________________________________________
conv1_bn (BatchNormalization (None, 112, 112, 32) 128
_________________________________________________________________
....
....
conv_pw_13 (Conv2D) (None, 7, 7, 1024) 1048576
_________________________________________________________________
conv_pw_13_bn (BatchNormaliz (None, 7, 7, 1024) 4096
_________________________________________________________________
conv_pw_13_relu (ReLU) (None, 7, 7, 1024) 0
_________________________________________________________________
global_average_pooling2d_13 (None, 1024) 0
_________________________________________________________________
classificator (Dense) multiple 62525
=================================================================
Total params: 3,291,389
Trainable params: 3,269,501
Non-trainable params: 21,888
_________________________________________________________________
In order to be able to view backbone's layers, you' ll have to construct your new model using backbone.input and backbone.output
from tensorflow.keras.models import Model
def Mymodel(backbone_model, classes):
backbone = backbone_model
x = backbone.output
x = tf.keras.layers.Dense(classes,activation='sigmoid')(x)
model = Model(inputs=backbone.input, outputs=x)
return model
input_shape = (224, 224, 3)
model = Mymodel(backbone_model=tf.keras.applications.MobileNet(input_shape=input_shape, include_top=False, pooling='avg'),
classes=61)
model.summary()
There is an argument expand_nested in the Method summary.
model.summary(expand_nested=True)
for layer in model.layers:
layer.summary()

Keras - functional and sequential model give different results

I implemented two models, one time using the sequential way and one time with the functional API. Now the both models give different results, which kind of makes no sense to me.
I cannot figure out, what the problem is. Any ideas or solutions?
Here both models:
Sequential Model:
model = Sequential()
embedding_layer = Embedding(VOCAB_SIZE +1, EMBEDDING_SIZE, mask_zero= True)
model.add(embedding_layer)
model.add(Bidirectional(LSTM(HIDDEN_SIZE, return_sequences= True))),
model.add(TimeDistributed(Dense(NUM_LABELS, activation='softmax')))
model.compile(optimizer='adam',
loss='categorical_crossentropy',
metrics=['accuracy'])
model.fit(train_sents_padded, train_labels_padded, batch_size=4, epochs=10,
validation_data=(dev_sents_padded, dev_labels_padded))
score, acc = model.evaluate(dev_sents_padded, dev_labels_padded)
print("\nAccuracy: ", acc)
Functional Model:
inputs = Input(shape=(MAX_LENGTH,))
embedding = Embedding(VOCAB_SIZE +1, EMBEDDING_SIZE, mask_zero= True)(inputs)
left = LSTM(HIDDEN_SIZE, return_sequences=True)(embedding)
right = LSTM(HIDDEN_SIZE, go_backwards=True, return_sequences=True)
(embedding)
left_right = concatenate([left, right])
left_right = TimeDistributed(Dense(NUM_LABELS, activation='softmax'))
(left_right)
combined_model = Model(inputs=inputs, outputs=left_right)
combined_model.compile(loss='categorical_crossentropy', optimizer='adam',
metrics=['accuracy'])
combined_model.fit(
train_sents_padded,
train_labels_padded,
batch_size=4,
epochs=10,
validation_data=(dev_sents_padded, dev_labels_padded)
)
score, acc = combined_model.evaluate(dev_sents_padded, dev_labels_padded)
print("\nBidirectional LSTM Accuracy: ", acc)
+++
Summaries:
Sequential model:
Layer (type) Output Shape Param #
=================================================================
embedding_1 (Embedding) (None, None, 50) 26150
_________________________________________________________________
bidirectional_1 (Bidirection (None, None, 100) 40400
_________________________________________________________________
time_distributed_1 (TimeDist (None, None, 61) 6161
=================================================================
Total params: 72,711
Trainable params: 72,711
Non-trainable params: 0 `
Functional model:
Layer (type) Output Shape Param # Connected to
========================================================================
input_1 (InputLayer) (None, 34) 0
____________________________________________________________________
embedding_2 (Embedding) (None, 34, 50) 26150 input_1[0][0]
______________________________________________________________________
lstm_2 (LSTM) (None, 34, 50) 20200 embedding_2[0][0]
____________________________________________________________________
lstm_3 (LSTM) (None, 34, 50) 20200 embedding_2[0][0]
_________________________________________________________________
concatenate_1 (Concatenate)(None, 34, 100) 0 lstm_2[0][0]
lstm_3[0][0]
___________________________________________________________
time_distributed_2 (TimeDistrib (None, 34, 61) 6161 concatenate_1[0][0]
=====================================================================
Total params: 72,711
Trainable params: 72,711
Non-trainable params: 0`
+++
If I change VOCAB_SIZE + 1 to VOCAB_SIZE in the seqential model the acc is 59, but only on every third run??

Categories