I am trying to train a convolutional neural net. Therefore I am using a datset of 646 images/license plates which contains 8 characters (0-9, A-Z; without letter 'O' and blank spaces, in total 36 possible characters). These are my training data X_train. Their shape is (646, 40, 200, 3) with color code 3. I resized them to the same shape.
I also have a dataset which contains the labels of this images, which I one-hot-encoded to a numpy array of shape (646, 8, 36). This data is my y_train data.
Now, I am trying to apply a Neural Network which looks like this:
The architecture is taken from this paper: https://ieeexplore.ieee.org/abstract/document/8078501
I excluded the batch normalization part, because this part is not the most interesting one for me. But I am very unsure regarding the top of the layer. That means the part after the last pooling layer beginning with model.add(Flatten())...
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3), input_shape = (40, 200, 3), activation = "relu"))
model.add(Conv2D(32, kernel_size=(3, 3), activation = "relu"))
model.add(Conv2D(32, kernel_size=(3, 3), activation = "relu"))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, kernel_size=(3, 3), activation = "relu"))
model.add(Conv2D(64, kernel_size=(3, 3), activation = "relu"))
model.add(Conv2D(64, kernel_size=(3, 3), activation = "relu"))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(128, kernel_size=(3, 3), activation = "relu"))
model.add(Conv2D(128, kernel_size=(3, 3), activation = "relu"))
model.add(Conv2D(128, kernel_size=(3, 3), activation = "relu"))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(16000, activation = "relu"))
model.add(Dense(128, activation = "relu"))
model.add(Dense(36, activation = "relu"))
model.add(Dense(8*36, activation="Softmax"))
model.add(keras.layers.Reshape((8, 36)))
Thank you very much in advance!
Assuming the image below matches your model architecture, the code can be used to create the model. Ensure you have some padding for the input images.
import tensorflow as tf
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Conv2D, Flatten, MaxPooling2D, Dense, Input, Reshape, Concatenate
def create_model(input_shape = (40, 200, 3)):
input_img = Input(shape=input_shape)
model = Conv2D(32, kernel_size=(3, 3), input_shape = (40, 200, 3), activation = "relu")(input_img)
model = Conv2D(32, kernel_size=(3, 3), padding="same", activation = "relu")(model)
model = Conv2D(32, kernel_size=(3, 3), padding="same", activation = "relu")(model)
model = MaxPooling2D(pool_size=(2, 2))(model)
model = Conv2D(64, kernel_size=(3, 3), padding="same", activation = "relu")(model)
model = Conv2D(64, kernel_size=(3, 3), padding="same", activation = "relu")(model)
model = Conv2D(64, kernel_size=(3, 3), padding="same", activation = "relu")(model)
model = MaxPooling2D(pool_size=(2, 2))(model)
model = Conv2D(128, kernel_size=(3, 3), padding="same", activation = "relu")(model)
model = Conv2D(128, kernel_size=(3, 3), padding="same", activation = "relu")(model)
model = Conv2D(128, kernel_size=(3, 3), padding="same", activation = "relu")(model)
model = MaxPooling2D(pool_size=(2, 2))(model)
backbone = Flatten()(model)
branches = []
for i in range(8):
branches.append(backbone)
branches[i] = Dense(16000, activation = "relu", name="branch_"+str(i)+"_Dense_16000")(branches[i])
branches[i] = Dense(128, activation = "relu", name="branch_"+str(i)+"_Dense_128")(branches[i])
branches[i] = Dense(36, activation = "softmax", name="branch_"+str(i)+"_output")(branches[i])
output = Concatenate(axis=1)(branches)
output = Reshape((8, 36))(output)
model = Model(input_img, output)
return model
Related
I am building a VGG image pipeline, and I am trying to input 2 consecutive frames from a video as follows:
datagen = ImageDataGenerator()
datagen.fit(X_train)
model = Sequential()
model.add(Conv2D(input_shape=(224, 224, 6), filters=64, kernel_size=(3, 3), padding='same', activation='relu', kernel_initializer='he_uniform', bias_initializer='zeros'))
model.add(Conv2D(filters=64, kernel_size=(3, 3), padding='same', activation='relu', kernel_initializer='he_uniform', bias_initializer='zeros'))
model.add(MaxPool2D(pool_size=(2, 2), strides=(2, 2)))
model.add(Conv2D(filters=128, kernel_size=(3, 3), padding='same', activation='relu', kernel_initializer='he_uniform', bias_initializer='zeros'))
model.add(Conv2D(filters=128, kernel_size=(3, 3), padding='same', activation='relu', kernel_initializer='he_uniform', bias_initializer='zeros'))
model.add(MaxPool2D(pool_size=(2, 2), strides=(2, 2)))
model.add(Conv2D(filters=256, kernel_size=(3, 3), padding='same', activation='relu', kernel_initializer='he_uniform', bias_initializer='zeros'))
model.add(Conv2D(filters=256, kernel_size=(3, 3), padding='same', activation='relu', kernel_initializer='he_uniform', bias_initializer='zeros'))
model.add(Conv2D(filters=256, kernel_size=(3, 3), padding='same', activation='relu', kernel_initializer='he_uniform', bias_initializer='zeros'))
model.add(MaxPool2D(pool_size=(2, 2), strides=(2, 2)))
model.add(Conv2D(filters=512, kernel_size=(3, 3), padding='same', activation='relu', kernel_initializer='he_uniform', bias_initializer='zeros'))
model.add(Conv2D(filters=512, kernel_size=(3, 3), padding='same', activation='relu', kernel_initializer='he_uniform', bias_initializer='zeros'))
model.add(Conv2D(filters=512, kernel_size=(3, 3), padding='same', activation='relu', kernel_initializer='he_uniform', bias_initializer='zeros'))
model.add(MaxPool2D(pool_size=(2, 2), strides=(2, 2)))
model.add(GlobalAveragePooling2D())
model.add(Dense(units=4096, activation='relu', kernel_initializer='he_uniform', bias_initializer='zeros'))
model.add(Dropout(0.2))
model.add(Dense(units=4096, activation='relu', kernel_initializer='he_uniform', bias_initializer='zeros'))
model.add(Dropout(0.2))
model.add(Dense(units=1, activation='sigmoid'))
# opt = Adam(learning_rate=0.001)
opt = SGD(lr=0.01, momentum=0.3)
checkpoint = ModelCheckpoint(config.CLASH_PATH() + '/models/step_01.h5', monitor='binary_accuracy', verbose=1, save_best_only=True,
save_weights_only=False, mode='auto', period=1)
early = EarlyStopping(monitor='binary_accuracy', min_delta=0, patience=40, verbose=1, mode='auto')
model.compile(loss='binary_crossentropy', optimizer=opt, metrics=['binary_accuracy'])
model.summary()
model.fit(datagen.flow(X_train, y_train, batch_size=32,
subset='training', ignore_class_split=True), validation_data=datagen.flow(X_train, y_train, batch_size=16,
subset='validation', ignore_class_split=True), steps_per_epoch=len(X_train) / 48,
epochs=1000, verbose=1,
callbacks=[checkpoint, early])
As you'll note in the 4th line of code, I am passing 224x224x6 which represents two stacked image frames of 224x224x3. This is necessary since I am using the ImageDataGenerator to pass my data.
Unfortunately I am getting the following error message:
NumpyArrayIterator is set to use the data format convention "channels_last" (channels on axis 3), i.e. expected either 1, 3, or 4 channels on axis 3. However, it was passed an array with shape (6666, 224, 224, 6) (6 channels).
From other reading on stackoverflow, I have seen that I can stack my frames using layers.concatenate, but how would I then modify my generator to keep the flow of frames in sync?
Im currently working on a little CNN with the Cifar-10 Dataset. I just updatet my code a little bit here and there and now it isnt working. I cant figure out the mistake. The prediction tells me "not an number". Couldnt find a answer for my proplem. So i cant post the Question without adding a little bit more text sooooo. Idk what i should write here. A good breakfast would be nice now. Coffe and Pancakes something like that. I hope i can poste the question now.
from keras.datasets import cifar10
import numpy as np
(x_training, y_training), (x_test,y_test) = cifar10.load_data()
x_training = x_training / 255.0
x_test = x_test / 255.0
%matplotlib inline
import matplotlib.pyplot as plt
plt.imshow(x_training[3])
plt.show
from keras.models import Sequential
from keras.layers import Dense, Flatten,Conv2D , MaxPooling2D, Dropout
import tensorflow as tf
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3), input_shape=(32, 32, 3), activation="relu", padding="same"))
model.add(Conv2D(32, kernel_size=(3, 3), activation="relu", padding="same"))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(64, kernel_size=(3, 3), activation="relu", padding="same"))
model.add(Conv2D(64, kernel_size=(3, 3), activation="relu", padding="same"))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(128, kernel_size=(3, 3), activation="relu", padding="same"))
model.add(Conv2D(128, kernel_size=(3, 3), activation="relu", padding="same"))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(256, activation="relu"))
model.add(Dense(128, activation="relu"))
model.add(Dense(1, activation="sigmoid"))
model.compile(optimizer='RMSProp', loss="binary_crossentropy", metrics=['accuracy'])
model.summary()
model.fit(x_training, y_training,batch_size=128, epochs=10, shuffle = True )
model.evaluate(x_training, y_training)
results = model.predict(x_training[1].reshape(-1, 32, 32, 3))
results
class_names = ['airplane', 'automobile', 'bird', 'cat', 'deer',
'dog', 'frog', 'horse', 'ship', 'truck']
max = np.max(results)
max_position = np.argmax(results)
class_name_predict = class_names[max_position]
plt.imshow(x_training[1])
plt.show
test = class_name_predict
test
plt.imshow(x_training[1])
plt.show
x = class_names[y_training[1][0]]
x
There are some mistakes in your model:
The output layer for a multiclass problem must have a dimension equal to the number of classes with a softmax activation function
The standard losses for a multiclass problem are categorical_crossentropy and sparse_categoricalcrossentropy. categorical_crossentropy can be used when your target is one-hot encoded, sparse_categoricalcrossentropy is used when you have integer encoded labels (this is your case)
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3), input_shape=(32, 32, 3), activation="relu", padding="same"))
model.add(Conv2D(32, kernel_size=(3, 3), activation="relu", padding="same"))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(64, kernel_size=(3, 3), activation="relu", padding="same"))
model.add(Conv2D(64, kernel_size=(3, 3), activation="relu", padding="same"))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(128, kernel_size=(3, 3), activation="relu", padding="same"))
model.add(Conv2D(128, kernel_size=(3, 3), activation="relu", padding="same"))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(256, activation="relu"))
model.add(Dense(128, activation="relu"))
model.add(Dense(len(class_names), activation="softmax"))
model.compile(optimizer='RMSProp', loss="sparse_categorical_crossentropy", metrics=['accuracy'])
model.summary()
here the running notebook
I would like to extract the features from the final dense layer in my CNN model. However, I am very conflicted with all of the google research I have done. There are so many different methods with Tensorflow and I am struggling to get something to work.
I have sucessfully trained a model on CIFAR10. I have saved the model to a directory and have a saved_model.pb file. I have visualized the model via tensorboard but not entirely sure of the name of my final layer. The visualization seems a bit confusing.
How can I proceed to extract these features? I want to use them for a t-SNE analysis.
I am trying to load the pb graph with gfile but not sure if this is the correct approach.Thank you.
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
from tensorflow.python.platform import gfile
pb_graph_file = '../data/processed/saved_models/saved_model.pb'
f = gfile.GFile(pb_graph_file, 'rb')
graph_def = tf.GraphDef()
f.close()
My Keras Sequential model looks like:
"""
This is the CNN model's architecture
"""
weight_decay = 1e-4
model = Sequential()
model.add(Conv2D(32, (3, 3), activation = 'relu', kernel_initializer = 'he_normal', kernel_regularizer = l2(weight_decay), padding = 'same', input_shape = (32, 32, 3)))
model.add(BatchNormalization())
model.add(Conv2D(32, (3, 3), activation = 'relu', kernel_initializer = 'he_normal', kernel_regularizer = l2(weight_decay), padding = 'same'))
model.add(BatchNormalization())
model.add(MaxPooling2D((2, 2)))
model.add(Dropout(0.2))
model.add(Conv2D(64, (3, 3), activation = 'relu', kernel_initializer = 'he_normal', kernel_regularizer = l2(weight_decay), padding='same'))
model.add(BatchNormalization())
model.add(Conv2D(64, (3, 3), activation = 'relu', kernel_initializer = 'he_normal', kernel_regularizer = l2(weight_decay), padding='same'))
model.add(BatchNormalization())
model.add(MaxPooling2D((2, 2)))
model.add(Dropout(0.3))
model.add(Conv2D(128, (3, 3), activation = 'relu', kernel_initializer = 'he_normal', kernel_regularizer = l2(weight_decay), padding='same'))
model.add(BatchNormalization())
model.add(Conv2D(128, (3, 3), activation = 'relu', kernel_initializer = 'he_normal', kernel_regularizer = l2(weight_decay), padding='same'))
model.add(BatchNormalization())
model.add(MaxPooling2D((2, 2)))
model.add(Dropout(0.4))
# model.add(Conv2D(256, (3, 3), activation = 'relu', kernel_initializer = 'he_uniform', kernel_regularizer = l2(weight_decay), padding='same'))
# model.add(Conv2D(256, (3, 3), activation = 'relu', kernel_initializer = 'he_uniform', kernel_regularizer = l2(weight_decay), padding='same'))
# model.add(MaxPooling2D((2, 2)))
model.add(Flatten())
# model.add(Dense(128, acti vation='relu', kernel_initializer = 'he_normal', kernel_regularizer = l2(weight_decay)))
# model.add(BatchNormalization())
# model.add(Dropout(0.5))
# output layer
model.add(Dense(10, activation = 'softmax'))
# optimize and compile model
opt = Adam(learning_rate = 1e-3)
model.compile(optimizer = opt, loss = 'categorical_crossentropy', metrics = ['accuracy'])
return model
First get name of your desired layer using model.summary().
Then use the name of that layer in place of desired_layer in below given code:
from keras.models import Model
extractor = Model(inputs=model.inputs, outputs=model.get_layer(desired_layer).output)
features = extractor.predict(x)
Here x is data from which you want to extract features.
I have been using U-nets for a while now, and notice that in most of my applications, it generates an over-estimation surrounding a specific class.
For example, here's a grayscale image:
And a manual segmentation of 3 classes (lesion [green], tissue [magenta], background [all else]):
The issue I notice on prediction (over-estimation at boundaries):
The typical architecture used looks something like this:
def get_unet(dim=128, dropout=0.5, n_classes=3):
inputs = Input((dim, dim, 1))
conv1 = Conv2D(64, 3, activation='relu', padding='same', kernel_initializer='he_normal')(inputs)
conv1 = Conv2D(64, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv1)
pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)
conv2 = Conv2D(128, 3, activation='relu', padding='same', kernel_initializer='he_normal')(pool1)
conv2 = Conv2D(128, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv2)
pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)
conv3 = Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(pool2)
conv3 = Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv3)
pool3 = MaxPooling2D(pool_size=(2, 2))(conv3)
conv4 = Conv2D(512, 3, activation='relu', padding='same', kernel_initializer='he_normal')(pool3)
conv4 = Conv2D(512, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv4)
conv4 = Dropout(dropout)(conv4)
pool4 = MaxPooling2D(pool_size=(2, 2))(conv4)
conv5 = Conv2D(1024, 3, activation='relu', padding='same', kernel_initializer='he_normal')(pool4)
conv5 = Conv2D(1024, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv5)
conv5 = Dropout(dropout)(conv5)
up6 = concatenate([UpSampling2D(size=(2, 2))(conv5), conv4], axis=3)
conv6 = Conv2D(512, 3, activation='relu', padding='same', kernel_initializer='he_normal')(up6)
conv6 = Conv2D(512, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv6)
up7 = concatenate([UpSampling2D(size=(2, 2))(conv6), conv3], axis=3)
conv7 = Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(up7)
conv7 = Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv7)
up8 = concatenate([UpSampling2D(size=(2, 2))(conv7), conv2], axis=3)
conv8 = Conv2D(128, 3, activation='relu', padding='same', kernel_initializer='he_normal')(up8)
conv8 = Conv2D(128, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv8)
up9 = concatenate([UpSampling2D(size=(2, 2))(conv8), conv1], axis=3)
conv9 = Conv2D(64, 3, activation='relu', padding='same', kernel_initializer='he_normal')(up9)
conv9 = Conv2D(64, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv9)
conv10 = Conv2D(n_classes, (1, 1), activation='relu', padding='same', ker nel_initializer='he_normal')(conv9)
conv10 = Reshape((dim * dim, n_classes))(conv10)
output = Activation('softmax')(conv10)
model = Model(inputs=[inputs], outputs=[output])
return model
Plus:
mgpu_model.compile(optimizer='adadelta', loss='categorical_crossentropy',
metrics=['accuracy'], sample_weight_mode='temporal')
open(p, 'w').write(json_string)
model_checkpoint = callbacks.ModelCheckpoint(f, save_best_only=True)
reduce_lr_cback = callbacks.ReduceLROnPlateau(
monitor='val_loss', factor=0.2,
patience=5, verbose=1,
min_lr=0.05 * 0.0001)
h = mgpu_model.fit(train_gray, train_masks,
batch_size=64, epochs=50,
verbose=1, shuffle=True, validation_split=0.2, sample_weight=sample_weights,
callbacks=[model_checkpoint, reduce_lr_cback])
My Question:
Do you have any insight or suggestion on how to change either the architecture or hyperparameters to mitigate the over-estimation? This could include even using a different architecture that may be better at more precise segmentation. (Please note I already do class balancing/weighting to compensate for imbalances in class frequency)
You can experiment with various loss functions instead of cross entropy. For multi-class segmentation, you can try:
generalized dice loss
dice loss (summed across all classes)
categorical focal loss
boundary loss
The winner of brats 2018 used autoencoder regularization (https://github.com/IAmSuyogJadhav/3d-mri-brain-tumor-segmentation-using-autoencoder-regularization). You could try this as well. The idea in that paper is that the model is also learning how to better encode the features in the latent space, and that helps the model with segmentation somehow.
Is there a way to use the output of a given (middle) layer in Keras as the input of another network?
For example use the last dense layer of the encoder here as the input for another network?
#encoder
encoder = keras.models.Sequential()
encoder.add(L.InputLayer(img_shape))
encoder.add(L.Conv2D(32, kernel_size=(3, 3),strides=1, padding='same', activation='elu'))
encoder.add(L.MaxPool2D(pool_size=(2, 2)))
encoder.add(L.Conv2D(64, kernel_size=(3, 3),strides=1, padding='same', activation='elu'))
encoder.add(L.MaxPool2D(pool_size=(2, 2)))
encoder.add(L.Conv2D(128, kernel_size=(3, 3),strides=1, padding='same', activation='elu'))
encoder.add(L.MaxPool2D(pool_size=(2, 2)))
encoder.add(L.Conv2D(256, kernel_size=(3, 3),strides=1, padding='same', activation='elu'))
encoder.add(L.MaxPool2D(pool_size=(2, 2)))
encoder.add(L.Flatten())
encoder.add(L.Dense(code_size))
# decoder
decoder = keras.models.Sequential()
decoder.add(L.InputLayer((code_size,)))
decoder.add(L.Dense(147456))
decoder.add(L.Reshape((24, 24, 256)))
decoder.add(L.Conv2DTranspose(filters=128, kernel_size=(3, 3), strides=2, activation='elu', padding='same'))
decoder.add(L.Conv2DTranspose(filters=64, kernel_size=(3, 3), strides=2, activation='elu', padding='same'))
decoder.add(L.Conv2DTranspose(filters=32, kernel_size=(3, 3), strides=2, activation='elu', padding='same'))
decoder.add(L.Conv2DTranspose(filters=1, kernel_size=(3, 3), strides=2, activation=None, padding='same'))
If you just want to use the encoding layers of your network as an input (i.e you don't want to change the weights of your encoder network through backpropagation when training the rest of your model) you can just get the outputs of the last layer of your decoder network using the predict_generator method on you keras model, for all of your examples and use that as input data for your predictor network.
Did you try using this
decoder = keras.models.Sequential()(encoder)