I want to see all the ResNet50 layers and not the resnet50 block in model.summary. I saw a similar issue How can I use tf.keras.Model.summary to see the layers of a child model which in a father model?, but I'm having difficulties in adapting it to my model since it uses ResNet50 instead of MobileNet.
This is my model:
Resnet = ResNet101(include_top=False, weights='imagenet', input_shape=(224, 224, 3))
model = tf.keras.Sequential(Resnet)
model.add(tf.keras.layers.GlobalAveragePooling2D())
model.add(tf.keras.layers.Dropout(0.5))
model.add(tf.keras.layers.Dense(units=no_classes, activation="softmax"))
This is the code I have right now based on the previous issue.
class ResNet50(tf.keras.Sequential):
def __init__(self, input_shape=(224, 224, 3), classes=8):
super(ResNet50, self).__init__()
self.backbone_model = [layer for layer in
tf.keras.applications.ResNet50(input_shape, include_top=False, pooling='avg').layers]
self.classificator = tf.keras.layers.Dense(classes,activation='relu', name='classificator')
def call(self, inputs):
x = inputs
for layer in self.backbone_model:
x = layer(x)
x = self.classificator(x)
return x
model = ResNet50()
model.add(tf.keras.layers.GlobalAveragePooling2D())
model.add(tf.keras.layers.Dropout(0.5))
model.add(tf.keras.layers.Dense(units=no_classes, activation="softmax"))
How do I initialize it with imagenet weights? is it done automatically? Is the pooling average and the activation relu for Resnet50? Do I have to add more layers?
The model has a layers parameter so try to run this loop. I don't remember exactly but if model.layers is a list then.
summary = []
for layer, value in zip(model.layers, model.summary()):
try:
summary.append(layer.summary())
except:
summary.append(value)
you might need to modify it but it should be something similar.
Related
conv_base = Xception(include_top=False, input_tensor=None,
pooling=None, input_shape=(TARGET_SIZE, TARGET_SIZE, 3), classifier_activation='softmax')
model = conv_base.output
model = layers.GlobalAveragePooling2D()(model)
model = layers.Dense(5, activation = "softmax")(model)
model = models.Model(conv_base.input, model)
model.compile(optimizer = Adam(lr = 0.001),
loss = "sparse_categorical_crossentropy",
metrics = ["acc"])
Can anyone please explain what conv_base.output and conv_base.input mean in this code? What it is used for and what does it do??
The tf.keras.models.Model needs inputs and outputs as arguments:
Arguments:
inputs: The input(s) of the model: a keras.Input object or list of
keras.Input objects.
outputs: The output(s) of the model. See Functional API example below.
name: String, the name of the model.
In a Keras model, model.input and model.output return the input/output tensors, which you can pass as your new model's input/output:
from tensorflow.keras.applications.vgg19 import VGG19
base_model = VGG19(weights=None)
print(base_model.input)
Tensor("input_2:0", shape=(None, 224, 224, 3), dtype=float32)
Note that calling these layers return a tensor, and not a model, so you seem to have mis-assigned your variable names. Rather, use something along the lines of:
model_output = conv_base.output
layer_output = layers.GlobalAveragePooling2D()(model_output)
final_model_output = layers.Dense(5, activation = "softmax")(layer_output)
model = models.Model(conv_base.input, final_model_output)
So I want to build an autoencoder model for sequence data. I have started to build a sequential keras model in python and now I want to add an attention layer in the middle, but have no idea how to approach this. My model so far:
from keras.layers import LSTM, TimeDistributed, RepeatVector, Layer
from keras.models import Sequential
import keras.backend as K
model = Sequential()
model.add(LSTM(20, activation="relu", input_shape=(time_steps,n_features), return_sequences=False))
model.add(RepeatVector(time_steps, name="bottleneck_output"))
model.add(LSTM(30, activation="relu", return_sequences=True))
model.add(TimeDistributed(Dense(n_features)))
model.compile(optimizer="adam", loss="mae")
So far I have tried to add an attention function copied from here
class attention(Layer):
def __init__(self,**kwargs):
super(attention,self).__init__(**kwargs)
def build(self,input_shape):
self.W=self.add_weight(name="att_weight",shape=(input_shape[-1],1),initializer="normal")
self.b=self.add_weight(name="att_bias",shape=(input_shape[1],1),initializer="zeros")
super(attention, self).build(input_shape)
def call(self,x):
et=K.squeeze(K.tanh(K.dot(x,self.W)+self.b),axis=-1)
at=K.softmax(et)
at=K.expand_dims(at,axis=-1)
output=x*at
return K.sum(output,axis=1)
def compute_output_shape(self,input_shape):
return (input_shape[0],input_shape[-1])
def get_config(self):
return super(attention,self).get_config()
and added it after first LSTM, before repeat vector, i.e:
model = Sequential()
model.add(LSTM(20, activation="relu", input_shape=(time_steps,n_features), return_sequences=False))
model.add(attention()) # this is added
model.add(RepeatVector(time_steps, name="bottleneck_output"))
model.add(LSTM(30, activation="relu", return_sequences=True))
model.add(TimeDistributed(Dense(n_features)))
model.compile(optimizer="adam", loss="mae")
but the code gives error, because the dimensions somehow do not fit and the problem is in putting output of attention() to repeat vector:
ValueError: Input 0 is incompatible with layer bottleneck_output: expected ndim=2, found ndim=1
.... but according to model.summary() the output dimension of attention layer is (None, 20), which is the same also for the first lstm_1 layer . The code works without attention layer.
I would appreciate also some explanation why the solution is the solution to the problem, I am fairly new to python and have problems understanding what the class attention() is doing. I just copied it and tried to use it which is ofcrs not working....
Ok, I solved it. There has to be return_sequence = True in first LSTM layer. Then it works as it is.
With all I know. pretrained CNN can do way better than CNN. I have a dataset of 855 images. I have applied CNN and got 94% accuracy.Then I applied Pretrained model (VGG16, ResNet50, Inception_V3, MobileNet)also with fine tuning but still i got highest 60% and two of them are doing very bad on classification. Can CNN really do better than pretrained model or my implementation is wrong. I've converted my image into 100 by 100 dimensions and followed the way of keras application. Then What is the issue ??
Naive CNN approach :
def cnn_model():
size = (100,100,1)
num_cnn_layers =2
NUM_FILTERS = 32
KERNEL = (3, 3)
MAX_NEURONS = 120
model = Sequential()
for i in range(1, num_cnn_layers+1):
if i == 1:
model.add(Conv2D(NUM_FILTERS*i, KERNEL, input_shape=size,
activation='relu', padding='same'))
else:
model.add(Conv2D(NUM_FILTERS*i, KERNEL, activation='relu',
padding='same'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Flatten())
model.add(Dense(int(MAX_NEURONS), activation='relu'))
model.add(Dropout(0.25))
model.add(Dense(int(MAX_NEURONS/2), activation='relu'))
model.add(Dropout(0.4))
model.add(Dense(3, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam',
metrics=['accuracy'])
return model
VGG16 approach:
def vgg():
` `vgg_model = keras.applications.vgg16.VGG16(weights='imagenet',include_top=False,input_shape = (100,100,3))
model = Sequential()
for layer in vgg_model.layers:
model.add(layer)
# Freeze the layers
for layer in model.layers:
layer.trainable = False
model.add(keras.layers.Flatten())
model.add(keras.layers.Dense(3, activation='softmax'))
model.compile(optimizer=keras.optimizers.Adam(lr=1e-5),
loss='categorical_crossentropy',
metrics=['accuracy'])
return model
What you're referring to as CNN in both cases talk about the same thing, which is a type of a neural network model. It's just that the pre-trained model has been trained on some other data instead of the dataset you're working on and trying to classify.
What is usually used here is called Transfer Learning. Instead of freezing all the layers, trying leaving the last few layers open so they can be retrained with your own data, so that the pretrained model can edit its weights and biases to match your needs as well. It could be the case that the dataset you're trying to classify is foreign to the pretrained models.
Here's an example from my own work, there are additional pieces of code but you can make it work with your own code, the logic remains the same
#You extract the layer which you want to manipulate, usually the last few.
last_layer = pre_trained_model.get_layer(name_of_layer)
# Flatten the output layer to 1 dimension
x = layers.Flatten()(last_output)
# Add a fully connected layer with 1,024 hidden units and ReLU activation
x = layers.Dense(1024,activation='relu')(x)
# Add a dropout rate of 0.2
x = layers.Dropout(0.2)(x)
# Add a final sigmoid layer for classification
x = layers.Dense(1,activation='sigmoid')(x)
#Here we combine your newly added layers and the pre-trained model.
model = Model( pre_trained_model.input, x)
model.compile(optimizer = RMSprop(lr=0.0001),
loss = 'binary_crossentropy',
metrics = ['accuracy'])
Adding to what #Ilknur Mustafa mentioned, as your dataset may be foreign to the images used for pre-training, you can try to re-train few last layers of the pre-trained model instead of adding a whole new layers. The below example code doesn't add any additional trainable layer other than the output layer. In this way, you can benefit by retraining the last few layers on the existing weights, rather than training from scratch. This may be beneficial if you don't have a large dataset to train on.
# load model without classifier layers
vgg = VGG16(include_top=False, input_shape=(100, 100, 3), weights='imagenet', pooling='avg')
# make only last 2 conv layers trainable
for layer in vgg.layers[:-4]:
layer.trainable = False
# add output layer
out_layer = Dense(3, activation='softmax')(vgg.layers[-1].output)
model_pre_vgg = Model(vgg.input, out_layer)
# compile model
opt = SGD(lr=1e-5)
model_pre_vgg.compile(optimizer=opt, loss=keras.losses.categorical_crossentropy, metrics=['accuracy'])
#You extract the layer which you want to manipulate, usually the last few.
last_layer = pre_trained_model.get_layer(name_of_layer)
# Flatten the output layer to 1 dimension
x = layers.Flatten()(last_output)
# Add a fully connected layer with 1,024 hidden units and ReLU activation
x = layers.Dense(1024,activation='relu')(x)
# Add a dropout rate of 0.2
x = layers.Dropout(0.2)(x)
# Add a final sigmoid layer for classification
x = layers.Dense(1,activation='sigmoid')(x)
#Here we combine your newly added layers and the pre-trained model.
model = Model( pre_trained_model.input, x)
model.compile(optimizer = RMSprop(lr=0.0001),
loss = 'binary_crossentropy',
metrics = ['accuracy'])
I load a pretrained model. However, I want to add Spectral Normalization to this model.
I can do it in this way if the model is fully sequential:
import tensorflow as tf
base_model = tf.keras.applications.VGG16(input_shape=(720, 1280, 3), weights=None, include_top=False)
# Add spectral normalization
base_model_new = tf.keras.models.Sequential()
for layer in base_model.layers:
if isinstance(layer, tf.keras.layers.Conv2D):
spec_norm_layer = SpectralNormalization(layer)
base_model_new.add(spec_norm_layer)
else:
base_model_new.add(layer)
input = tf.keras.Input(shape=(720, 1280, 3), name='image')
x = base_model_new(input)
model = tf.keras.models.Model(inputs=[input], outputs=x, name='test')
However, this method does not work when for example using a ResNet where there are Add or Concatenate layers which expect a list of inputs. How to solve this for ResNets?
I have taken a standard ResNet50 model:
model = keras.applications.resnet50.ResNet50(include_top=False,
weights='imagenet',
classes=10,
input_shape=(224, 224, 3))
And added several dense layers of my own:
top_model = Sequential()
top_model.add(Flatten(input_shape=model.output_shape[1:]))
top_model.add(Dense(256, activation='relu'))
top_model.add(Dropout(0.5))
top_model.add(Dense(2, activation='softmax'))
model = Model(input=model.input, output=top_model(model.output))
This way it works great, however, when I want to delete last Dense and Dropout layers with model.pop() keras wont work well:
model.layers[-1].layers
[<keras.layers.core.Flatten at 0x16b5c00b8>,
<keras.layers.core.Dense at 0x16b5c0320>,
<keras.layers.core.Dropout at 0x16b5c02e8>,
<keras.layers.core.Dense at 0x16b5c0d68>]
model.layers[-1].pop()
model.layers[-1].pop()
model.layers[-1].layers
[<keras.layers.core.Flatten at 0x1ae6e5940>,
<keras.layers.core.Dense at 0x1ae6e9e10>]
model.layers[-1].outputs = [model.layers[-1].layers[-1].output]
model.outputs = model.layers[-1].outputs
model.layers[-1].layers[-1].outbound_nodes = []
Then I just compile the model and when trying to predict, I get an error:
You must feed a value for placeholder tensor 'flatten_7_input_12' with dtype float and shape [?,1,1,2048]
model.pop() takes care of all the underlying settings, including setting the model.output to the output of the new last layer. Therefore, you don't need to handle anything regarding the outputs.
Please also note that you are assigning to the model variable; thus, model.outputs is already referring to the extended model's output.
Here is a sample code that works fine on keras 2.0.6 with TensorFlow backend (1.4.0):
import keras
from keras.models import Sequential, Model
from keras.layers import *
import numpy as np
model = keras.applications.resnet50.ResNet50(include_top=False,
weights='imagenet',
classes=10,
input_shape=(224, 224, 3))
top_model = Sequential()
top_model.add(keras.layers.Flatten(input_shape=model.output_shape[1:]))
top_model.add(keras.layers.Dense(256, activation='relu'))
top_model.add(keras.layers.Dropout(0.5))
top_model.add(keras.layers.Dense(2, activation='softmax'))
model_extended = Model(input=model.input, output=top_model(model.output))
model_extended.layers[-1].pop()
model_extended.layers[-1].pop()
model_extended.compile(optimizer='rmsprop',
loss='categorical_crossentropy',
metrics=['accuracy'])
model_extended.predict(np.zeros((1, 224, 224, 3)))