Related
The code I'm trying to run to create a Neural Network model to predict some damping ratio and added mass coefficient. I have 3 inputs of the model (float numbers) and 4 outputs of the model (float numbers).
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.optimizers import SGD
from keras.models import Model
from keras.layers import Input
from keras.layers import Dense
from keras.layers import Concatenate
from sklearn.model_selection import train_test_split
from Reasearch_project_Code import *
"""Create model"""
Input1 = Input(shape=(1),name="Input1")
Input2 = Input(shape=(1),name="Input2")
Input3 = Input(shape=(1),name="Input3")
Inputs = Concatenate(axis=-1,name="Inputs")([Input1,Input2,Input3])
hidden1 = Dense(100, activation='relu',name="Hidden1")(Inputs)
hidden2 = Dense(100, activation='relu',name="Hidden2")(hidden1)
# Output layer
output1 = Dense(1, activation='linear',name="Output_1")(hidden2)
output2 = Dense(1, activation='linear',name="Output_2")(hidden2)
output3 = Dense(1, activation='linear',name="Output_3")(hidden2)
output4 = Dense(1, activation='linear',name="Output_4")(hidden2)
# output
model = Model(inputs=[Input1,Input2,Input3], outputs=[output1, output2, output3, output4])
# summarize layers
print(model.summary())
opt = SGD(0.001)
model.compile(loss='mean_squared_error', optimizer=opt)
"""Load Data"""
dpath="C:\\Users\\jules\\OneDrive - University of Southampton\\Documents\\Research Project\\5- Codes"
R=RP(dpath)
data="Data/Test3/Test3.csv"
labelss=np.genfromtxt(data,skip_header=1,usecols=(1,2,3),delimiter=",")
sampless=np.genfromtxt(data,skip_header=1,usecols=(4,5,6,7),delimiter=",")
"""scaled data"""
scaler=MinMaxScaler(feature_range=(0,1))
samples = scaler.fit_transform(sampless)
labels = scaler.fit_transform(labelss)
"""split data"""
labels_train, labels_test, samples_train , samples_test= train_test_split(labels,samples,train_size=0.9,random_state=42)
print(f"labels_train:{labels_train.shape}")
print(f"labels_test:{labels_test.shape}")
print(f"samples_train:{samples_train.shape}")
print(f"samples_test:{samples_test.shape}")
history = model.fit(labels_train, samples_train, validation_data=(labels_test, samples_test), epochs=200,batch_size=20, verbose=1)
shape of the data:
labels_train:(6753, 3)
labels_test:(751, 3)
samples_train:(6753, 4)
samples_test:(751, 4)
model:
Model: "functional_115"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
Input1 (InputLayer) [(None, 1)] 0
__________________________________________________________________________________________________
Input2 (InputLayer) [(None, 1)] 0
__________________________________________________________________________________________________
Input3 (InputLayer) [(None, 1)] 0
__________________________________________________________________________________________________
Inputs (Concatenate) (None, 3) 0 Input1[0][0]
Input2[0][0]
Input3[0][0]
__________________________________________________________________________________________________
Hidden1 (Dense) (None, 100) 400 Inputs[0][0]
__________________________________________________________________________________________________
Hidden2 (Dense) (None, 100) 10100 Hidden1[0][0]
__________________________________________________________________________________________________
Output_1 (Dense) (None, 1) 101 Hidden2[0][0]
__________________________________________________________________________________________________
Output_2 (Dense) (None, 1) 101 Hidden2[0][0]
__________________________________________________________________________________________________
Output_3 (Dense) (None, 1) 101 Hidden2[0][0]
__________________________________________________________________________________________________
Output_4 (Dense) (None, 1) 101 Hidden2[0][0]
==================================================================================================
Total params: 10,904
Trainable params: 10,904
Non-trainable params: 0
__________________________________________________________________________________________________
Assertions error:
AssertionError: in user code:
C:\Users\jules\anaconda3\lib\site-packages\tensorflow\python\keras\engine\training.py:806 train_function *
return step_function(self, iterator)
C:\Users\jules\anaconda3\lib\site-packages\tensorflow\python\keras\engine\training.py:796 step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
C:\Users\jules\anaconda3\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:1211 run
return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
C:\Users\jules\anaconda3\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:2585 call_for_each_replica
return self._call_for_each_replica(fn, args, kwargs)
C:\Users\jules\anaconda3\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:2945 _call_for_each_replica
return fn(*args, **kwargs)
C:\Users\jules\anaconda3\lib\site-packages\tensorflow\python\keras\engine\training.py:789 run_step **
outputs = model.train_step(data)
C:\Users\jules\anaconda3\lib\site-packages\tensorflow\python\keras\engine\training.py:747 train_step
y_pred = self(x, training=True)
C:\Users\jules\anaconda3\lib\site-packages\tensorflow\python\keras\engine\base_layer.py:985 __call__
outputs = call_fn(inputs, *args, **kwargs)
C:\Users\jules\anaconda3\lib\site-packages\tensorflow\python\keras\engine\functional.py:385 call
return self._run_internal_graph(
C:\Users\jules\anaconda3\lib\site-packages\tensorflow\python\keras\engine\functional.py:517 _run_internal_graph
assert x_id in tensor_dict, 'Could not compute output ' + str(x)
AssertionError: Could not compute output Tensor("Output_1/BiasAdd_57:0", shape=(None, 1), dtype=float32)
I'm a beginner and I don't understand where the error come from because my inputs and outputs seems to hae the correct dimensions.
Previously, instead of using 3 inputs layers, I used one with a shape of 3:
Inputs = Input(shape=(3,),name="Inputs")
But this gave me terrible predictions (negative R2)
I wrote the following model fn:
from tensorflow.keras.layers import Dense, LSTM, Dropout, Input, BatchNormalization
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
import tensorflow_addons as tfa
import tensorflow as tf
def get_model(num_features, output_size, output_bias=None):
output_bias = tf.keras.initializers.Constant(output_bias)
opt = Adam(learning_rate=0.0008)
inputs = Input(shape=[None, num_features], dtype=tf.float32, ragged=True)
layers = LSTM(32, activation='tanh')(
inputs.to_tensor(), mask=tf.sequence_mask(inputs.row_lengths()))
layers = BatchNormalization()(layers)
layers = Dropout(0.05)(layers)
layers = Dense(32, activation='relu')(layers)
layers = BatchNormalization()(layers)
layers = Dropout(0.05)(layers)
layers = Dense(32, activation='relu')(layers)
layers = BatchNormalization()(layers)
layers = Dropout(0.05)(layers)
layers = Dense(output_size, activation='sigmoid',
bias_initializer=output_bias)(layers)
model = Model(inputs, layers)
model.compile(loss=tf.keras.losses.BinaryCrossentropy(), optimizer=opt, metrics=[tfa.metrics.F1Score(num_classes=2)])
model.summary()
return model
here is the model summary:
Model: "model_5"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_6 (InputLayer) [(None, None, 11)] 0
__________________________________________________________________________________________________
input.row_lengths_5 (InstanceMe (None,) 0 input_6[0][0]
__________________________________________________________________________________________________
input.to_tensor_5 (InstanceMeth (None, None, 11) 0 input_6[0][0]
__________________________________________________________________________________________________
tf.sequence_mask_5 (TFOpLambda) (None, None) 0 input.row_lengths_5[0][0]
__________________________________________________________________________________________________
lstm_5 (LSTM) (None, 32) 5632 input.to_tensor_5[0][0]
tf.sequence_mask_5[0][0]
__________________________________________________________________________________________________
batch_normalization_15 (BatchNo (None, 32) 128 lstm_5[0][0]
__________________________________________________________________________________________________
dropout_15 (Dropout) (None, 32) 0 batch_normalization_15[0][0]
__________________________________________________________________________________________________
dense_15 (Dense) (None, 32) 1056 dropout_15[0][0]
__________________________________________________________________________________________________
batch_normalization_16 (BatchNo (None, 32) 128 dense_15[0][0]
__________________________________________________________________________________________________
dropout_16 (Dropout) (None, 32) 0 batch_normalization_16[0][0]
__________________________________________________________________________________________________
dense_16 (Dense) (None, 32) 1056 dropout_16[0][0]
__________________________________________________________________________________________________
batch_normalization_17 (BatchNo (None, 32) 128 dense_16[0][0]
__________________________________________________________________________________________________
dropout_17 (Dropout) (None, 32) 0 batch_normalization_17[0][0]
__________________________________________________________________________________________________
dense_17 (Dense) (None, 1) 33 dropout_17[0][0]
==================================================================================================
Total params: 8,161
Trainable params: 7,969
Non-trainable params: 192
__________________________________________________________________________________________________
And here are the shapes of my data:
print(train_x.shape,train_y.shape)
print(val_x.shape,val_y.shape)
(52499, None, 11) (52499,)
(17500, None, 11) (17500,)
When trying to fit my model, I get the following error:
model.fit(train_x, train_y, epochs=300, batch_size=500, validation_data=(val_x, val_y))
ValueError: Dimension 0 in both shapes must be equal, but are 2 and 1. Shapes are [2] and [1].
I can't understand what is wrong with the shapes.
Your model seems fine. The problem is that you are running into an open issue with the tfa.metrics.F1Score. For your binary case, you will have to change the parameters of the F1Score to tfa.metrics.F1Score(num_classes=1, threshold=0.5). Here is a complete working example:
from tensorflow.keras.layers import Dense, LSTM, Dropout, Input, BatchNormalization
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
import tensorflow_addons as tfa
import tensorflow as tf
def get_model(num_features, output_size, output_bias=0.001):
output_bias = tf.keras.initializers.Constant(output_bias)
opt = Adam(learning_rate=0.0008)
inputs = Input(shape=[None, num_features], dtype=tf.float32, ragged=True)
layers = LSTM(32, activation='tanh')(
inputs.to_tensor(), mask=tf.sequence_mask(inputs.row_lengths()))
layers = BatchNormalization()(layers)
layers = Dropout(0.05)(layers)
layers = Dense(32, activation='relu')(layers)
layers = BatchNormalization()(layers)
layers = Dropout(0.05)(layers)
layers = Dense(32, activation='relu')(layers)
layers = BatchNormalization()(layers)
layers = Dropout(0.05)(layers)
layers = Dense(output_size, activation='sigmoid',
bias_initializer=output_bias)(layers)
model = Model(inputs, layers)
model.compile(loss=tf.keras.losses.BinaryCrossentropy(), optimizer=opt, metrics=[tfa.metrics.F1Score(num_classes=1, threshold=0.5)])
model.summary()
return model
model = get_model(11, 1)
rt = tf.RaggedTensor.from_row_splits(values=tf.ones([5, 11], tf.int32),
row_splits=[0, 2, 5])
model.fit(rt, tf.random.uniform((2,1), maxval=2), epochs=300, batch_size=2, verbose=2)
Alternatively, you just define your own F1Score method and set it as metric in your model. See this post for more information.
I'm trying to fit a Keras (TF 2.3.1) model for image classification with multiple binary labels as output. The model consists of an Xception CNN + attention layer + dense classifier, and hitting an error on some TPUs only:
UnimplementedError: {{function_node __inference_train_function_644557}} Compilation failure: Dynamic Spatial Convolution is not supported. This fails on Kaggle TPUs but not on Colab - tested both on TF version 2.3.1.
I was looking here but the suggested solution implies that the image dimensions are not set, which is not the case here. train_df is of type <PrefetchDataset shapes: ((None, 750, 750, 3), (None, 11)), types: (tf.float32, tf.int64)> so each image has size 750x750x3. Each layer has a defined output shape per the below model summary, so the layers that follow them should infer their input shape correctly.
From the error, it seems that the problem is on the layer defined by attn_layer = LocallyConnected2D(.... Passing implementation = 2 is a workaround which lets training complete, but this is not suitable for large models (see LocallyConnected2D documentation)
Modelling code:
import tensorflow as tf
from tensorflow.keras import models, layers
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.applications import Xception
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout, Flatten, Input, Conv2D, multiply, LocallyConnected2D, Lambda, BatchNormalization
from tensorflow.keras.models import Model
from tensorflow.keras.metrics import mean_absolute_error
def create_model():
input_shape = (TARGET_SIZE, TARGET_SIZE, 3)
in_lay = Input(input_shape)
conv_base = Xception(include_top = False, weights = 'imagenet', input_shape = input_shape)
pt_features = conv_base(in_lay)
bn_features = BatchNormalization()(pt_features)
# here we do an attention mechanism to turn pixels in the GAP on an off
attn_layer = Conv2D(64, kernel_size = (1,1), padding = 'same', activation = 'relu')(bn_features)
attn_layer = Conv2D(16, kernel_size = (1,1), padding = 'same', activation = 'relu')(attn_layer)
attn_layer = LocallyConnected2D(1, kernel_size = (1,1), padding = 'valid', activation = 'sigmoid')(attn_layer)
# fan it out to all of the channels
pt_depth = conv_base.get_output_shape_at(0)[-1]
up_c2_w = np.ones((1, 1, 1, pt_depth))
up_c2 = Conv2D(pt_depth, kernel_size = (1,1), padding = 'same',
activation = 'linear', use_bias = False, weights = [up_c2_w])
up_c2.trainable = False
attn_layer = up_c2(attn_layer)
mask_features = multiply([attn_layer, bn_features])
gap_features = GlobalAveragePooling2D()(mask_features)
gap_mask = GlobalAveragePooling2D()(attn_layer)
# to account for missing values from the attention model
gap = Lambda(lambda x: x[0]/x[1], name = 'RescaleGAP')([gap_features, gap_mask])
gap_dr = Dropout(0.5)(gap)
dr_steps = Dropout(0.25)(Dense(1024, activation = 'elu')(gap_dr))
out_layer = Dense(11, activation = 'sigmoid')(dr_steps)
model = Model(inputs = [in_lay], outputs = [out_layer])
model.compile(optimizer = Adam(lr = 0.002), loss = 'binary_crossentropy', metrics = ["AUC"])
return model
with tpu_strategy.scope():
model = create_model()
model.summary()
history = model.fit(
train_df,
epochs = EPOCHS,
steps_per_epoch = STEPS_PER_EPOCH,
validation_data = valid_df,
validation_steps = VALIDATION_STEPS
)
The resulting model summary:
Model: "model_8"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_19 (InputLayer) [(None, 750, 750, 3) 0
__________________________________________________________________________________________________
xception (Model) (None, 24, 24, 2048) 20861480 input_19[0][0]
__________________________________________________________________________________________________
batch_normalization_49 (BatchNo (None, 24, 24, 2048) 8192 xception[1][0]
__________________________________________________________________________________________________
conv2d_67 (Conv2D) (None, 24, 24, 64) 131136 batch_normalization_49[0][0]
__________________________________________________________________________________________________
conv2d_68 (Conv2D) (None, 24, 24, 16) 1040 conv2d_67[0][0]
__________________________________________________________________________________________________
locally_connected2d_9 (LocallyC (None, 24, 24, 1) 9792 conv2d_68[0][0]
__________________________________________________________________________________________________
conv2d_69 (Conv2D) (None, 24, 24, 2048) 2048 locally_connected2d_9[0][0]
__________________________________________________________________________________________________
multiply_9 (Multiply) (None, 24, 24, 2048) 0 conv2d_69[0][0]
batch_normalization_49[0][0]
__________________________________________________________________________________________________
global_average_pooling2d_23 (Gl (None, 2048) 0 multiply_9[0][0]
__________________________________________________________________________________________________
global_average_pooling2d_24 (Gl (None, 2048) 0 conv2d_69[0][0]
__________________________________________________________________________________________________
RescaleGAP (Lambda) (None, 2048) 0 global_average_pooling2d_23[0][0]
global_average_pooling2d_24[0][0]
__________________________________________________________________________________________________
dropout_18 (Dropout) (None, 2048) 0 RescaleGAP[0][0]
__________________________________________________________________________________________________
dense_17 (Dense) (None, 1024) 2098176 dropout_18[0][0]
__________________________________________________________________________________________________
dropout_19 (Dropout) (None, 1024) 0 dense_17[0][0]
__________________________________________________________________________________________________
dense_18 (Dense) (None, 11) 11275 dropout_19[0][0]
==================================================================================================
Total params: 23,123,139
Trainable params: 23,062,467
Non-trainable params: 60,672
__________________________________________________________________________________________________
Full stacktrace + error message:
---------------------------------------------------------------------------
UnimplementedError Traceback (most recent call last)
<ipython-input-53-5130a0bcf331> in <module>
19 validation_data = valid_df,
20 validation_steps = VALIDATION_STEPS,
---> 21 callbacks = [model_save, early_stop, reduce_lr]
22 )
/opt/conda/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py in _method_wrapper(self, *args, **kwargs)
64 def _method_wrapper(self, *args, **kwargs):
65 if not self._in_multi_worker_mode(): # pylint: disable=protected-access
---> 66 return method(self, *args, **kwargs)
67
68 # Running inside `run_distribute_coordinator` already.
/opt/conda/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing)
853 context.async_wait()
854 logs = tmp_logs # No error, now safe to assign to logs.
--> 855 callbacks.on_train_batch_end(step, logs)
856 epoch_logs = copy.copy(logs)
857
/opt/conda/lib/python3.7/site-packages/tensorflow/python/keras/callbacks.py in on_train_batch_end(self, batch, logs)
387 """
388 if self._should_call_train_batch_hooks:
--> 389 logs = self._process_logs(logs)
390 self._call_batch_hook(ModeKeys.TRAIN, 'end', batch, logs=logs)
391
/opt/conda/lib/python3.7/site-packages/tensorflow/python/keras/callbacks.py in _process_logs(self, logs)
263 """Turns tensors into numpy arrays or Python scalars."""
264 if logs:
--> 265 return tf_utils.to_numpy_or_python_type(logs)
266 return {}
267
/opt/conda/lib/python3.7/site-packages/tensorflow/python/keras/utils/tf_utils.py in to_numpy_or_python_type(tensors)
521 return t # Don't turn ragged or sparse tensors to NumPy.
522
--> 523 return nest.map_structure(_to_single_numpy_or_python_type, tensors)
524
/opt/conda/lib/python3.7/site-packages/tensorflow/python/util/nest.py in map_structure(func, *structure, **kwargs)
615
616 return pack_sequence_as(
--> 617 structure[0], [func(*x) for x in entries],
618 expand_composites=expand_composites)
619
/opt/conda/lib/python3.7/site-packages/tensorflow/python/util/nest.py in <listcomp>(.0)
615
616 return pack_sequence_as(
--> 617 structure[0], [func(*x) for x in entries],
618 expand_composites=expand_composites)
619
/opt/conda/lib/python3.7/site-packages/tensorflow/python/keras/utils/tf_utils.py in _to_single_numpy_or_python_type(t)
517 def _to_single_numpy_or_python_type(t):
518 if isinstance(t, ops.Tensor):
--> 519 x = t.numpy()
520 return x.item() if np.ndim(x) == 0 else x
521 return t # Don't turn ragged or sparse tensors to NumPy.
/opt/conda/lib/python3.7/site-packages/tensorflow/python/framework/ops.py in numpy(self)
959 """
960 # TODO(slebedev): Consider avoiding a copy for non-CPU or remote tensors.
--> 961 maybe_arr = self._numpy() # pylint: disable=protected-access
962 return maybe_arr.copy() if isinstance(maybe_arr, np.ndarray) else maybe_arr
963
/opt/conda/lib/python3.7/site-packages/tensorflow/python/framework/ops.py in _numpy(self)
927 return self._numpy_internal()
928 except core._NotOkStatusException as e:
--> 929 six.raise_from(core._status_to_exception(e.code, e.message), None)
930
931 #property
/opt/conda/lib/python3.7/site-packages/six.py in raise_from(value, from_value)
UnimplementedError: {{function_node __inference_train_function_644557}} Compilation failure: Dynamic Spatial Convolution is not supported: %convolution.30660 = f32[<=8,24,24,2048]{3,2,1,0} convolution(f32[<=8,24,24,1]{3,2,1,0} %add.30633, f32[1,1,1,2048]{3,2,1,0} %get-tuple-element.354), window={size=1x1}, dim_labels=b01f_01io->b01f, metadata={op_type="Conv2D" op_name="model_8/conv2d_69/Conv2D"}
TPU compilation failed
[[{{node tpu_compile_succeeded_assert/_17367812259898276239/_5}}]]
I have a subclass Model of tf.keras.Model,code is following
import tensorflow as tf
class Mymodel(tf.keras.Model):
def __init__(self, classes, backbone_model, *args, **kwargs):
super(Mymodel, self).__init__(self, args, kwargs)
self.backbone = backbone_model
self.classify_layer = tf.keras.layers.Dense(classes,activation='sigmoid')
def call(self, inputs):
x = self.backbone(inputs)
x = self.classify_layer(x)
return x
inputs = tf.keras.Input(shape=(224, 224, 3))
model = Mymodel(inputs=inputs, classes=61,
backbone_model=tf.keras.applications.MobileNet())
model.build(input_shape=(20, 224, 224, 3))
model.summary()
the result is :
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
mobilenet_1.00_224 (Model) (None, 1000) 4253864
_________________________________________________________________
dense (Dense) multiple 61061
=================================================================
Total params: 4,314,925
Trainable params: 4,293,037
Non-trainable params: 21,888
_________________________________________________________________
but I want to see the all layers of mobilenet,then I tried to extract all layers of mobilenet and put in the model:
import tensorflow as tf
class Mymodel(tf.keras.Model):
def __init__(self, classes, backbone_model, *args, **kwargs):
super(Mymodel, self).__init__(self, args, kwargs)
self.backbone = backbone_model
self.classify_layer = tf.keras.layers.Dense(classes,activation='sigmoid')
def my_process_layers(self,inputs):
layers = self.backbone.layers
tmp_x = inputs
for i in range(1,len(layers)):
tmp_x = layers[i](tmp_x)
return tmp_x
def call(self, inputs):
x = self.my_process_layers(inputs)
x = self.classify_layer(x)
return x
inputs = tf.keras.Input(shape=(224, 224, 3))
model = Mymodel(inputs=inputs, classes=61,
backbone_model=tf.keras.applications.MobileNet())
model.build(input_shape=(20, 224, 224, 3))
model.summary()
then the resule not changed.
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
mobilenet_1.00_224 (Model) (None, 1000) 4253864
_________________________________________________________________
dense (Dense) multiple 61061
=================================================================
Total params: 4,314,925
Trainable params: 4,293,037
Non-trainable params: 21,888
_________________________________________________________________
then I tried to extract one layer insert to the model :
import tensorflow as tf
class Mymodel(tf.keras.Model):
def __init__(self, classes, backbone_model, *args, **kwargs):
super(Mymodel, self).__init__(self, args, kwargs)
self.backbone = backbone_model
self.classify_layer = tf.keras.layers.Dense(classes,activation='sigmoid')
def call(self, inputs):
x = self.backbone.layers[1](inputs)
x = self.classify_layer(x)
return x
inputs = tf.keras.Input(shape=(224, 224, 3))
model = Mymodel(inputs=inputs, classes=61,
backbone_model=tf.keras.applications.MobileNet())
model.build(input_shape=(20, 224, 224, 3))
model.summary()
It did not change either.I am so confused.
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
mobilenet_1.00_224 (Model) (None, 1000) 4253864
_________________________________________________________________
dense (Dense) multiple 244
=================================================================
Total params: 4,254,108
Trainable params: 4,232,220
Non-trainable params: 21,888
_________________________________________________________________
but I find that the parameter of dense layer changed,I dont know what happend.
#Ioannis 's answer is perfectly fine, but unfortunately it drops the keras 'Model Subclassing' structure that is present in the question. If, just like me, you want to keep this model subclassing and still show all layers in the summary, you can branch down into all the individual layers of the more complex model using a for loop:
class MyMobileNet(tf.keras.Sequential):
def __init__(self, input_shape=(224, 224, 3), classes=61):
super(MyMobileNet, self).__init__()
self.backbone_model = [layer for layer in
tf.keras.applications.MobileNet(input_shape, include_top=False, pooling='avg').layers]
self.classificator = tf.keras.layers.Dense(classes,activation='sigmoid', name='classificator')
def call(self, inputs):
x = inputs
for layer in self.backbone_model:
x = layer(x)
x = self.classificator(x)
return x
model = MyMobileNet()
After this we can directly build the model and call the summary:
model.build(input_shape=(None, 224, 224, 3))
model.summary()
>
Model: "my_mobile_net"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv1_pad (ZeroPadding2D) (None, 225, 225, 3) 0
_________________________________________________________________
conv1 (Conv2D) (None, 112, 112, 32) 864
_________________________________________________________________
conv1_bn (BatchNormalization (None, 112, 112, 32) 128
_________________________________________________________________
....
....
conv_pw_13 (Conv2D) (None, 7, 7, 1024) 1048576
_________________________________________________________________
conv_pw_13_bn (BatchNormaliz (None, 7, 7, 1024) 4096
_________________________________________________________________
conv_pw_13_relu (ReLU) (None, 7, 7, 1024) 0
_________________________________________________________________
global_average_pooling2d_13 (None, 1024) 0
_________________________________________________________________
classificator (Dense) multiple 62525
=================================================================
Total params: 3,291,389
Trainable params: 3,269,501
Non-trainable params: 21,888
_________________________________________________________________
In order to be able to view backbone's layers, you' ll have to construct your new model using backbone.input and backbone.output
from tensorflow.keras.models import Model
def Mymodel(backbone_model, classes):
backbone = backbone_model
x = backbone.output
x = tf.keras.layers.Dense(classes,activation='sigmoid')(x)
model = Model(inputs=backbone.input, outputs=x)
return model
input_shape = (224, 224, 3)
model = Mymodel(backbone_model=tf.keras.applications.MobileNet(input_shape=input_shape, include_top=False, pooling='avg'),
classes=61)
model.summary()
There is an argument expand_nested in the Method summary.
model.summary(expand_nested=True)
for layer in model.layers:
layer.summary()
I implemented two models, one time using the sequential way and one time with the functional API. Now the both models give different results, which kind of makes no sense to me.
I cannot figure out, what the problem is. Any ideas or solutions?
Here both models:
Sequential Model:
model = Sequential()
embedding_layer = Embedding(VOCAB_SIZE +1, EMBEDDING_SIZE, mask_zero= True)
model.add(embedding_layer)
model.add(Bidirectional(LSTM(HIDDEN_SIZE, return_sequences= True))),
model.add(TimeDistributed(Dense(NUM_LABELS, activation='softmax')))
model.compile(optimizer='adam',
loss='categorical_crossentropy',
metrics=['accuracy'])
model.fit(train_sents_padded, train_labels_padded, batch_size=4, epochs=10,
validation_data=(dev_sents_padded, dev_labels_padded))
score, acc = model.evaluate(dev_sents_padded, dev_labels_padded)
print("\nAccuracy: ", acc)
Functional Model:
inputs = Input(shape=(MAX_LENGTH,))
embedding = Embedding(VOCAB_SIZE +1, EMBEDDING_SIZE, mask_zero= True)(inputs)
left = LSTM(HIDDEN_SIZE, return_sequences=True)(embedding)
right = LSTM(HIDDEN_SIZE, go_backwards=True, return_sequences=True)
(embedding)
left_right = concatenate([left, right])
left_right = TimeDistributed(Dense(NUM_LABELS, activation='softmax'))
(left_right)
combined_model = Model(inputs=inputs, outputs=left_right)
combined_model.compile(loss='categorical_crossentropy', optimizer='adam',
metrics=['accuracy'])
combined_model.fit(
train_sents_padded,
train_labels_padded,
batch_size=4,
epochs=10,
validation_data=(dev_sents_padded, dev_labels_padded)
)
score, acc = combined_model.evaluate(dev_sents_padded, dev_labels_padded)
print("\nBidirectional LSTM Accuracy: ", acc)
+++
Summaries:
Sequential model:
Layer (type) Output Shape Param #
=================================================================
embedding_1 (Embedding) (None, None, 50) 26150
_________________________________________________________________
bidirectional_1 (Bidirection (None, None, 100) 40400
_________________________________________________________________
time_distributed_1 (TimeDist (None, None, 61) 6161
=================================================================
Total params: 72,711
Trainable params: 72,711
Non-trainable params: 0 `
Functional model:
Layer (type) Output Shape Param # Connected to
========================================================================
input_1 (InputLayer) (None, 34) 0
____________________________________________________________________
embedding_2 (Embedding) (None, 34, 50) 26150 input_1[0][0]
______________________________________________________________________
lstm_2 (LSTM) (None, 34, 50) 20200 embedding_2[0][0]
____________________________________________________________________
lstm_3 (LSTM) (None, 34, 50) 20200 embedding_2[0][0]
_________________________________________________________________
concatenate_1 (Concatenate)(None, 34, 100) 0 lstm_2[0][0]
lstm_3[0][0]
___________________________________________________________
time_distributed_2 (TimeDistrib (None, 34, 61) 6161 concatenate_1[0][0]
=====================================================================
Total params: 72,711
Trainable params: 72,711
Non-trainable params: 0`
+++
If I change VOCAB_SIZE + 1 to VOCAB_SIZE in the seqential model the acc is 59, but only on every third run??