The idea is to train a CNN on a cosine similarity matrix of the hidden states of two bilstms.
I try to get the following code working, but it is failing giving the error message:
Graph disconnected: cannot obtain value for tensor
Tensor("bidirectional_4/concat:0", shape=(?, ?, 100), dtype=float32)
at layer "input_11". The following previous layers were accessed without issue: []
The code to train the model is the following:
def train_model(self, sentences_pair, is_similar,
embedding_meta_data_skt, embedding_meta_data_tib ,
model_save_directory='./'):
tokenizer_skt = embedding_meta_data_skt['tokenizer']
tokenizer_tib = embedding_meta_data_tib['tokenizer']
embedding_matrix_skt = embedding_meta_data_skt['embedding_matrix']
embedding_matrix_tib = embedding_meta_data_tib['embedding_matrix']
train_data_x1, train_data_x2, train_labels, leaks_train, \
val_data_x1, val_data_x2, val_labels, leaks_val = create_train_dev_set(tokenizer_skt, sentences_pair,
is_similar, self.max_sequence_length,
self.validation_split_ratio)
nb_words_skt = len(tokenizer_skt.word_index) + 1
nb_words_tib = len(tokenizer_tib.word_index) + 1
# Creating word embedding layer
embedding_layer_skt = Embedding(nb_words_skt, self.embedding_dim, weights=[embedding_matrix_skt],
input_length=self.max_sequence_length, trainable=False)
embedding_layer_tib = Embedding(nb_words_tib, self.embedding_dim, weights=[embedding_matrix_tib],
input_length=self.max_sequence_length, trainable=False)
# Creating LSTM Encoder
lstm_layer = Bidirectional(LSTM(self.number_lstm_units, dropout=self.rate_drop_lstm, recurrent_dropout=self.rate_drop_lstm,return_sequences=True))
# Creating LSTM Encoder layer for First Sentence
sequence_1_input = Input(shape=(self.max_sequence_length,), dtype='int32')
embedded_sequences_1 = embedding_layer_skt(sequence_1_input)
skt_lstm = lstm_layer(embedded_sequences_1)
# Creating LSTM Encoder layer for Second Sentence
sequence_2_input = Input(shape=(self.max_sequence_length,), dtype='int32')
embedded_sequences_2 = embedding_layer_tib(sequence_2_input)
tib_lstm = lstm_layer(embedded_sequences_2)
A_input = keras.Input(tensor=skt_lstm)
B_input = keras.Input(tensor=tib_lstm)
dist_output = keras.layers.Lambda(pairwise_cosine_sim)([skt_lstm,tib_lstm,A_input,B_input])
dist_output = Reshape((40,40,1))(dist_output)
input_shape = (40,40,1)
cnn_model = Conv2D(128, (2, 2), input_shape=input_shape)(dist_output)
cnn_model = BatchNormalization(axis=-1)(cnn_model)
cnn_model = Activation('relu')(cnn_model)
cnn_model = Conv2D(164, (2, 2))(cnn_model)
cnn_model = BatchNormalization(axis=-1)(cnn_model)
cnn_model = Activation('relu')(cnn_model)
cnn_model = Conv2D(192,(3, 3))(cnn_model)
cnn_model = BatchNormalization(axis=-1)(cnn_model)
cnn_model = Activation('relu')(cnn_model)
cnn_model = Conv2D(192, (3, 3))(cnn_model)
cnn_model = BatchNormalization(axis=-1)(cnn_model)
cnn_model = Activation('relu')(cnn_model)
cnn_model = Conv2D(128, (3, 3))(cnn_model)
cnn_model = BatchNormalization(axis=-1)(cnn_model)
cnn_model = Activation('relu')(cnn_model)
cnn_model = MaxPooling2D(pool_size=(2,2))(cnn_model)
cnn_model = Dropout(0.40)(cnn_model)
cnn_model = Flatten()(cnn_model)
# Fully connected layer
cnn_model = Dense(256)(cnn_model)
cnn_model = BatchNormalization()(cnn_model)
cnn_model = Activation('relu')(cnn_model)
cnn_model = Dropout(0.5)(cnn_model)
cnn_model = Dense(num_classes)(cnn_model)
preds = Dense(1, activation='sigmoid')(cnn_model)
model = Model(inputs=[sequence_1_input, sequence_2_input], outputs=preds)
model.compile(loss=keras.losses.binary_crossentropy,
optimizer=keras.optimizers.Adam(lr=learning_rate),
metrics=['accuracy'])
#model.compile(loss='binary_crossentropy', optimizer='nadam', metrics=['acc'])
filepath="skt-tib-bs" + str(batch_size) + "-" + "{epoch:02d}-{val_acc:.2f}.hdf5"
checkpoint = ModelCheckpoint('skt-tib.h5', monitor='val_acc')
callbacks_list = [checkpoint]
model.fit([train_data_x1, train_data_x2, leaks_train], train_labels,validation_data=([val_data_x1, val_data_x2, leaks_val], val_labels),
batch_size=batch_size,
epochs=epochs,
verbose=1,
class_weight = class_weight,
callbacks = callbacks_list)
score = model.evaluate(x_test, y_test, verbose=1)
print('Test loss:', score[0])
print('Test accuracy:', score[1])
model.save(file_name)
The definition of the function calculating the pairwise cosine similarity is the following:
def l2_norm(x, axis=None):
square_sum = K.sum(K.square(x), axis=axis, keepdims=True)
norm = K.sqrt(K.maximum(square_sum, K.epsilon()))
return norm
def pairwise_cosine_sim(A_B):
A,B,A_tensor,B_tensor = A_B
A_mag = l2_norm(A, axis=2)
B_mag = l2_norm(B, axis=2)
num = K.batch_dot(A_tensor, K.permute_dimensions(B_tensor, (0,2,1)))
den = (A_mag * K.permute_dimensions(B_mag, (0,2,1)))
dist_mat = num / den
return dist_mat
I Have been trying for a couple of hours to fix it, but it seems to be no good. Somewhere the input and outputs are not connected, but I just can't figure out where the problem lies. Any suggestions on this?
Either remove A_input and B_input entirely as they are not input layers in the first place and use skt_lstm and tib_lstm directly instead of them, or if you would like to keep them pass them as the inputs of the model as well when you are defining the Model since they are actually input layers:
model = Model(inputs=[sequence_1_input, sequence_2_input, A_input, B_input], outputs=preds)
However, you don't need to pass any corresponding arrays for them when calling fit method as they will be fed using their corresponding tensors skt_lstm and tib_lstm (i.e. they will act as wrappers around these tensors).
Related
I got the following error: RuntimeError: You must compile your model before training/testing. Use model.compile(optimizer, loss). However, i have already compiled my model so i don't understand what the problem is.
`vgg = VGG16(weights='imagenet',include_top=False,input_shape=(224,224,3))
for layer in vgg.layers:
layer.trainable = False #making all the layers non-trainable
x = Flatten()(vgg.output) #flattening out the last layer
predictions = Dense(2,activation='sigmoid')(x) #Dense layer to predict wether there is pneumonia or not
model = Model(inputs=vgg.input, outputs=predictions)
early_stopping_callbacks = tensorflow.keras.callbacks.EarlyStopping(patience = 15,
restore_best_weights = True,
verbose = 1)
base_model1 = VGG16(include_top = False, weights = "imagenet", input_shape = (224, 224, 3), pooling = "max",
classes = 2)
#base_model1.load_weights("../input/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5")
base_model1.summary()
model2 = Sequential()
model2.add(base_model1)
model2.add(Flatten())
model2.add(Dense(128, activation = "relu"))
model2.add(Dense(64, activation = "relu"))
model2.add(Dense(32, activation = "relu"))
model2.add(Dense(1, activation = "sigmoid"))
# freeze the layers
for layer in base_model1.layers:
layer.trainable = False
model2.compile(optimizer = "adam", loss = "binary_crossentropy", metrics = ["accuracy"])
history = model2.fit_generator(train_generator, epochs = EPOCH, validation_data = val_generator, steps_per_epoch = 10,
callbacks = [early_stopping_callbacks])
test_loss, test_accuracy = base_model1.evaluate(test_generator, steps = 50)
print("The testing accuracy is: ", test_accuracy * 100, "%")
print("The testing loss is: ", test_loss * 100, "%")`
base_model1 is my vgg16 model and test_generator is my test set
I am training a model for text sentiment classification with CNN. In it, the validation accuracy is initially more than training accuracy and then it decreases. Is this behavior acceptable? If not then what can be the reason and how to solve it?
My model:
class hyper():
def __init__(self,embedding_dim,filter_sizes,num_filters,dropout_prob,hidden_dims,batch_size,num_epochs):
# Model Hyperparameters
self.embedding_dim = embedding_dim
self.filter_sizes = filter_sizes
self.num_filters = num_filters
self.dropout_prob = dropout_prob
self.hidden_dims = hidden_dims
# Training parameters
self.batch_size = batch_size
self.num_epochs = num_epochs
class prep_hyper():
def __init__(self,sequenceLength,max_words):
# Prepossessing parameters
self.sequenceLength = sequenceLength
self.max_words = max_words
m_hyper=hyper(embedding_dim=embed_dim,filter_sizes=(3,4,5,6,8),num_filters=80,dropout_prob=(0.2,0.5),
hidden_dims=50,batch_size=128,num_epochs= 30)
pr_hyper = prep_hyper(sequenceLength=sequence_length,max_words=vocab_size)
model architecture:
def build_model(pr_hyper,m_hyper):
# Convolutional block
model_input = Input(shape=(pr_hyper.sequenceLength))
# use a random embedding for the text
x = Embedding(pr_hyper.max_words, m_hyper.embedding_dim,weights=[emb],trainable=False)(model_input)
# x = SpatialDropout1D(m_hyper.dropout_prob[0])(x)
conv_kern_reg = regularizers.l2(0.0001)
conv_bias_reg = regularizers.l2(0.0001)
conv_blocks = []
for sz in m_hyper.filter_sizes:
conv = Convolution1D(filters=m_hyper.num_filters,
kernel_size=sz,
# padding="same",
activation="relu",
strides=1,
kernel_regularizer=conv_kern_reg,
bias_regularizer=conv_bias_reg
)(x)
conv = GlobalMaxPooling1D()(conv)
conv_blocks.append(conv)
# merge
x = Concatenate()(conv_blocks) if len(conv_blocks) > 1 else conv_blocks[0]
x = Dense(m_hyper.hidden_dims, activation="relu")(x)
x = Dropout(m_hyper.dropout_prob[1])(x)
x = Dense(100, activation="relu")(x)
x = Dropout(m_hyper.dropout_prob[1])(x)
model_output = Dense(3, activation="softmax")(x)
model = Model(model_input, model_output)
model.compile(loss="categorical_crossentropy", optimizer=keras.optimizers.Adam(learning_rate=0.00005), metrics=["accuracy"]) #categorical_crossentropy
print(model.summary())
tf.keras.utils.plot_model(model, show_shapes=True)#, to_file='multichannel.png')
return model
INITIAL EPOCHS:
There are several reasons that this happens, like, the dropout layers is disabled during validation. For more information I would suggest you to see this
that describes several possible reasons that this happens.
I was training a network using RSS data. The network is trained after preprocessing the data. But I am getting the error :
AttributeError: 'KerasTensor' object has no attribute '_id'
My model is shown below.
class DANN(Model):
def __init__(self):
super().__init__()
#Feature Extractor
self.feature_extractor_layer0 = Input(shape=(num_batch, num_features))
self.feature_extractor_layer1 = Dense(100, activation='relu')
self.feature_extractor_layer2 = Dense(100, activation='relu')
self.feature_extractor_layer3 = Dense(100, activation='relu')
self.feature_extractor_layer4 = Dropout(0.5)
self.feature_extractor_layer5 = Dense(100, activation='relu')
self.feature_extractor_layer6 = Dense(100, activation='relu')
#Label Predictor
self.label_predictor_layer0 = Dense(100, activation='relu')
self.label_predictor_layer1 = Dense(100, activation='relu')
self.label_predictor_layer2 = Dense(2, activation=None)
def call(self, x):
#Feature Extractor
x = Input(shape=(num_features,))
x = self.feature_extractor_layer1(x)
x = self.feature_extractor_layer2(x)
x = self.feature_extractor_layer3(x)
x = self.feature_extractor_layer4(x)
x = self.feature_extractor_layer5(x)
x = self.feature_extractor_layer6(x)
label_pred = self.label_predictor_layer0(x)
label_pred = self.label_predictor_layer1(label_pred)
label_pred = self.label_predictor_layer2(label_pred)
return label_pred
#creating a model object
model = DANN()
My training loop is,
#initializing parameters before training the model
lr = 1e-3
optimizer = tf.optimizers.SGD()
loss_fn_label = keras.losses.mean_squared_error
max_batches = len(dx_source)
source_label_loss = []
#training loop
for epoch in range(num_epochs):
print("\nStart of epoch %d" % (epoch,))
for step, (x_batch_train, y_batch_train) in enumerate(train_dataset):
with tf.GradientTape() as tape:
logits = model(x_batch_train, training=True) # Logits for this minibatch
loss_value = loss_fn_label(y_batch_train, logits)
grads = tape.gradient(loss_value, model.trainable_weights)
optimizer.apply_gradients(zip(grads, model.trainable_weights))
while ep == ls[ls_i]:
sys.stdout.write("█")
ls_i = ls_i+1
break
ep = ep+1
sys.stdout.write("|")
print(f'\ts_label_loss: {loss_s_label:.4f}')
source_label_loss.append(loss_s_label)
Please help me to solve his error.
dx_source_tensor = tf.convert_to_tensor(X_train_source, dtype=tf.float32)
dy_source_tensor = tf.convert_to_tensor(Y_train_source, dtype=tf.float32)
train_dataset_source = tf.data.Dataset.from_tensor_slices((dx_source_tensor, dy_source_tensor))
dx_source_test_tensor = tf.convert_to_tensor(X_test_source, dtype=tf.float32)
dy_source_test_tensor = tf.convert_to_tensor(Y_test_source, dtype=tf.float32)
test_dataset_source = tf.data.Dataset.from_tensor_slices((dx_source_test_tensor, dy_source_test_tensor))
#training dataset
train_source = tf.data.Dataset.from_tensor_slices(train_dataset_source).batch(num_batch)
#testing dataset
test_source = tf.data.Dataset.from_tensor_slices(test_dataset_source).batch(num_batch)
This is how I have given the dataset input. This code was working for the MNIST dataset. But not showing the result for a normal row-by-row data frame. If you know any other methods to train these types of models. That is also acceptable for me. Thank you!
I am trying to implement an handwriting ocr based on the keras ocr example: link.
However I get the following error:
InvalidArgumentError: All labels must be nonnegative integers, batch: 0 labels: 1,0,11,9,45,0,25,17,27,41,39,9,37,0,23,1,39,9,35,0,11,35,29,25,0,1,0,27,9,1,35,3,49,0,43,17,23,23,1,13,9,0,69,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
[[{{node ctc_6/CTCLoss}}]]
[[{{node training_5/SGD/gradients/ctc_6/CTCLoss_grad/mul}}]]
Here are the generator, the ctc and the train function:
def ctc_lambda_func(args):
y_pred, labels, input_length, label_length = args
# the 2 is critical here since the first couple outputs of the RNN
# tend to be garbage:
y_pred = y_pred[:, 2:, :]
return K.ctc_batch_cost(labels, y_pred, input_length, label_length)
#Generation of data: load the images, resize, gray, normalize them
class DataGenerator(keras.utils.Sequence):
def __init__(self, list_Files, labels,downsample_factor, max_string_length=80, batch_size=32, dim=(512,64), shuffle=True):
self.dim = dim
self.batch_size = batch_size
self.labels = labels
self.list_Files = list_Files
self.shuffle = shuffle
self.on_epoch_end()
self.max_string_length = max_string_length
self.downsample_factor = downsample_factor
#TODO: Add weight save
def on_epoch_end(self):
self.indexes = np.arange(len(self.list_Files))
if self.shuffle==True:
np.random.shuffle(self.indexes)
def __data_generation(self, list_Files_temp):
#*[2,2] --> 2,2 (unpack values)
X = np.ones([self.batch_size, *self.dim,1])
y = np.ones([self.batch_size, self.max_string_length])*-1 #As in the keras_ocr example why -1?
X_length = np.zeros([self.batch_size,1])
y_length = np.zeros([self.batch_size,1])
#TODO: add mix with blank inputs as it is said to be important for transitional invariance
for i, file in enumerate(list_Files_temp):
im = cv2.imread(file)# load the file as numpy array
im = cv2.cvtColor(im, cv2.COLOR_RGB2GRAY) #Transform the file into a Gray image
im = cv2.resize(im, self.dim[::-1]) #Resize it (cv2 takes width first)
im = im / 255 #Normalization
X[i,0:self.dim[0],:,0] = im
X_length[i] = self.dim[0] // self.downsample_factor -2 #?????
seq = text_to_labels(self.labels[file])
y[i,0:len(seq)] = text_to_labels(self.labels[file]) #Transform the text into a list of integers
y_length[i] = len(y[i])
print("LEN={0}".format(y_length[i]))
inputs={'the_input': X,
'the_labels': y,
'input_length':X_length,
'label_length':y_length
}
outputs = {'ctc': np.zeros([self.batch_size])}
print(y)
return (inputs, outputs)
def __len__(self):
'Number of batches per epoch'
return int(np.floor(len(self.list_Files) / self.batch_size))
def __getitem__(self, index):
indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
list_Files_temp = [self.list_Files[k] for k in indexes]
#print(list_Files_temp[0])
(inputs, outputs) = self.__data_generation(list_Files_temp)
return (inputs, outputs)
def train(dim_images,partition,labels):
#Misc parameters
absolute_max_string_length = 80
output_size = len(alphabet) + 1 #+1 for the CTC blank symbol
#Network parameters
img_h = dim_images[0]
img_w = dim_images[1]
conv_filters = 16
kernel_size = (3,3)
pool_size = 2
time_dense_size = 32
rnn_size = 512
act = 'relu'
input_shape = (*DIM_IMAGES,1)
downsample_factor = pool_size**2
#Convolutional layer
input_data = Input(name='the_input', shape=input_shape)
inner = Conv2D(conv_filters, kernel_size, padding='same',
activation=act, kernel_initializer='he_normal', name='conv1')(input_data)
inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max1')(inner)
inner = Conv2D(conv_filters, kernel_size, padding='same',
activation=act, kernel_initializer='he_normal',
name='conv2')(inner)
inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max2')(inner)
conv_to_rnn_dims = (img_w // (pool_size ** 2), (img_h // (pool_size ** 2)) * conv_filters)
inner = Reshape(target_shape=conv_to_rnn_dims, name='reshape')(inner)
#Recurrent layer
gru_1 = GRU(rnn_size, return_sequences=True, kernel_initializer='he_normal', name='gru1')(inner)
gru_1b = GRU(rnn_size, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru1_b')(inner)
gru1_merged = add([gru_1, gru_1b])
gru_2 = GRU(rnn_size, return_sequences=True, kernel_initializer='he_normal', name='gru2')(gru1_merged)
gru_2b = GRU(rnn_size, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru2_b')(gru1_merged)
# transforms RNN output to character activations:
inner = Dense(output_size, kernel_initializer='he_normal',
name='dense2')(concatenate([gru_2, gru_2b]))
#Prediction (need to be decoded)
y_pred = Activation('softmax', name='softmax')(inner)
Model(inputs=input_data, outputs=y_pred).summary()
labelsI = Input(name='the_labels',
shape =[absolute_max_string_length], dtype='float32')
input_length = Input(name='input_length', shape=[1], dtype='int64')
label_length = Input(name='label_length', shape=[1], dtype='int64')
# Keras doesn't currently support loss funcs with extra parameters
# so CTC loss is implemented in a lambda layer
loss_out = Lambda(
ctc_lambda_func, output_shape=(1,),
name='ctc')([y_pred, labelsI, input_length, label_length])
#Genrators
training_generator = DataGenerator(partition['train'],labels,downsample_factor, batch_size=BATCH_SIZE, dim=DIM_IMAGES, shuffle=True)
valid_generator = DataGenerator(partition['valid'], labels,downsample_factor, batch_size=BATCH_SIZE, dim=DIM_IMAGES, shuffle=False)
# clipnorm seems to speeds up convergence
sgd = SGD(lr=0.02, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5)
model = Model(inputs=[input_data, labelsI, input_length, label_length],
outputs=loss_out)
# the loss calc occurs elsewhere, so use a dummy lambda func for the loss
model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=sgd)
# captures output of softmax so we can decode the output during visualization
test_func = K.function([input_data], [y_pred])
model.fit_generator(
generator=training_generator,
steps_per_epoch=(len(partition['train'])-len(partition['valid'])) // BATCH_SIZE,
epochs=20,
validation_data=valid_generator,
validation_steps=len(partition['valid'])//BATCH_SIZE)
I guess the '-1' labels come from this line:
y = np.ones([self.batch_size, self.max_string_length])*-1
In the original code, the there was a similar line (line 220) but it runs well:
self.Y_data = np.ones([self.num_words, self.absolute_max_string_len]) * -1
I thought the '-1' were a way of padding the sequence, but this value seems forbidden by the ctc function, is there something I am missing here?
It seems I just mixed up my image length and image width. Plus, the "label_length" should be equal to the real length of the sentence (before paddding with -1). Therefore the line:
y_length[i] = len(y[i])
Should be replaced by:
y_length[i] = len(seq)
I am currently trying to implement a convolutional network using Keras 2.1.6 (with TensorFlow as backend) and its ImageDataGenerator to segment an image using a grayscale mask. I try to use an image as input, and a mask as label. Due to a low amount of training images, and memory constraints I utilize the ImageDataGenerator class provided in Keras.
However I get this error, after changing the values provided in the Keras example to the ones described later:
File "C:\Users\XXX\Anaconda3\lib\site-packages\keras\engine\training.py", line 2223, in fit_generator
batch_size = x.shape[0]
AttributeError: 'tuple' object has no attribute 'shape'
Which, as far as I know, happens because the generator does generate a tuple, and not an array. This first happened after I changed following parameters from the standard values provided in the Keras example to the following: color_mode='grayscale' for all mask generators, and class_mode='input' due to this being recommended for autoencoders.
The Keras example can be found in here.
The dataset I am using consists of 100 images (jpg) and 100 corresponding grayscale masks (png) and can be downloaded at this link
The architecture I wanted to implement is an autoencoder/U-Net based network and it is shown in the provided code:
from keras.preprocessing import image
from keras.models import Model
from keras import optimizers
from keras.preprocessing.image import ImageDataGenerator
from keras.layers import Input, Conv2D, MaxPooling2D, UpSampling2D
from keras import initializers
image_path =
mask_path =
valid_image_path =
valid_mask_path =
img_size=160
batchsize=10
samplesize = 60
steps = samplesize / batchsize
train_datagen = image.ImageDataGenerator(shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True)
data_gen_args = dict(rotation_range=90,
width_shift_range=0.1,
height_shift_range=0.1,
zoom_range=0.2)
image_datagen = ImageDataGenerator(**data_gen_args)
mask_datagen = ImageDataGenerator(**data_gen_args)
seed = 1
image_generator = image_datagen.flow_from_directory(
image_path,
target_size=(img_size, img_size),
class_mode='input',
batch_size = batchsize,
seed=seed)
mask_generator = mask_datagen.flow_from_directory(
mask_path,
target_size=(img_size, img_size),
class_mode='input',
color_mode = 'grayscale',
batch_size = batchsize,
seed=seed)
vimage_generator = image_datagen.flow_from_directory(
valid_image_path,
target_size=(img_size, img_size),
class_mode='input',
batch_size = batchsize,
seed=seed)
vmask_generator = mask_datagen.flow_from_directory(
valid_mask_path,
target_size=(img_size, img_size),
class_mode='input',
color_mode = 'grayscale',
batch_size = batchsize,
seed=seed)
#Model
input_img = Input(shape=(img_size,img_size,3))
c11 = Conv2D(16, (3, 3), activation='relu', padding='same', kernel_initializer=initializers.random_normal(stddev=0.01))(input_img)
mp1 = MaxPooling2D((2, 2), padding='same')(c11)
c21 = Conv2D(16, (3, 3), activation='relu', padding='same', kernel_initializer=initializers.random_normal(stddev=0.01))(mp1)
mp2 = MaxPooling2D((2, 2), padding='same')(c21)
c31 = Conv2D(32, (3, 3), activation='relu', padding='same', kernel_initializer=initializers.random_normal(stddev=0.01))(mp2)
encoded = MaxPooling2D((5, 5), padding='same')(c31)
c12 = Conv2D(32, (3, 3), activation='relu', padding='same', kernel_initializer=initializers.random_normal(stddev=0.01))(encoded)
us12 = UpSampling2D((5,5))(c12)
c22 = Conv2D(16, (3, 3), activation='relu', padding='same', kernel_initializer=initializers.random_normal(stddev=0.01))(us12)
us22 = UpSampling2D((2, 2))(c22)
c32 = Conv2D(16, (3, 3), activation='relu', padding='same', kernel_initializer=initializers.random_normal(stddev=0.01))(us22)
us32 = UpSampling2D((2, 2))(c32)
decoded = Conv2D(1, (3, 3), activation='softmax', padding='same')(us32)
model = Model(input_img, decoded)
model.compile(loss="mean_squared_error", optimizer=optimizers.Adam(),metrics=["accuracy"])
#model.summary()
#Generators, tr: training, v: validation
trgen = zip(image_generator,mask_generator)
vgen = zip(vimage_generator,vmask_generator)
model.fit_generator(
trgen,
steps_per_epoch= steps,
epochs=5,
validation_data = vgen,
validation_steps=10)
Here is a better version of Unet, you can use this code
def conv_block(tensor, nfilters, size=3, padding='same', initializer="he_normal"):
x = Conv2D(filters=nfilters, kernel_size=(size, size), padding=padding, kernel_initializer=initializer)(tensor)
x = BatchNormalization()(x)
x = Activation("relu")(x)
x = Conv2D(filters=nfilters, kernel_size=(size, size), padding=padding, kernel_initializer=initializer)(x)
x = BatchNormalization()(x)
x = Activation("relu")(x)
return x
def deconv_block(tensor, residual, nfilters, size=3, padding='same', strides=(2, 2)):
y = Conv2DTranspose(nfilters, kernel_size=(size, size), strides=strides, padding=padding)(tensor)
y = concatenate([y, residual], axis=3)
y = conv_block(y, nfilters)
return y
def Unet(img_height, img_width, nclasses=3, filters=64):
# down
input_layer = Input(shape=(img_height, img_width, 3), name='image_input')
conv1 = conv_block(input_layer, nfilters=filters)
conv1_out = MaxPooling2D(pool_size=(2, 2))(conv1)
conv2 = conv_block(conv1_out, nfilters=filters*2)
conv2_out = MaxPooling2D(pool_size=(2, 2))(conv2)
conv3 = conv_block(conv2_out, nfilters=filters*4)
conv3_out = MaxPooling2D(pool_size=(2, 2))(conv3)
conv4 = conv_block(conv3_out, nfilters=filters*8)
conv4_out = MaxPooling2D(pool_size=(2, 2))(conv4)
conv4_out = Dropout(0.5)(conv4_out)
conv5 = conv_block(conv4_out, nfilters=filters*16)
conv5 = Dropout(0.5)(conv5)
# up
deconv6 = deconv_block(conv5, residual=conv4, nfilters=filters*8)
deconv6 = Dropout(0.5)(deconv6)
deconv7 = deconv_block(deconv6, residual=conv3, nfilters=filters*4)
deconv7 = Dropout(0.5)(deconv7)
deconv8 = deconv_block(deconv7, residual=conv2, nfilters=filters*2)
deconv9 = deconv_block(deconv8, residual=conv1, nfilters=filters)
# output
output_layer = Conv2D(filters=nclasses, kernel_size=(1, 1))(deconv9)
output_layer = BatchNormalization()(output_layer)
output_layer = Activation('softmax')(output_layer)
model = Model(inputs=input_layer, outputs=output_layer, name='Unet')
return model
Note if you have only two classes ie nclasses=2, you need to change
output_layer = Conv2D(filters=nclasses, kernel_size=(1, 1))(deconv9)
output_layer = BatchNormalization()(output_layer)
output_layer = Activation('softmax')(output_layer)
to
output_layer = Conv2D(filters=2, kernel_size=(1, 1))(deconv9)
output_layer = BatchNormalization()(output_layer)
output_layer = Activation('sigmoid')(output_layer)
Now for the data generators, you can use the builtin ImageDataGenerator class
here is the code from Keras docs
# we create two instances with the same arguments
data_gen_args = dict(featurewise_center=True,
featurewise_std_normalization=True,
rotation_range=90,
width_shift_range=0.1,
height_shift_range=0.1,
zoom_range=0.2)
image_datagen = ImageDataGenerator(**data_gen_args)
mask_datagen = ImageDataGenerator(**data_gen_args)
# Provide the same seed and keyword arguments to the fit and flow methods
seed = 1
image_datagen.fit(images, augment=True, seed=seed)
mask_datagen.fit(masks, augment=True, seed=seed)
image_generator = image_datagen.flow_from_directory(
'data/images',
class_mode=None,
seed=seed)
mask_generator = mask_datagen.flow_from_directory(
'data/masks',
class_mode=None,
seed=seed)
# combine generators into one which yields image and masks
train_generator = zip(image_generator, mask_generator)
model.fit_generator(
train_generator,
steps_per_epoch=2000,
epochs=50)
Another way to go is implement your own generator by extending the Sequence class from Keras
class seg_gen(Sequence):
def __init__(self, x_set, y_set, batch_size, image_dir, mask_dir):
self.x, self.y = x_set, y_set
self.batch_size = batch_size
self.samples = len(self.x)
self.image_dir = image_dir
self.mask_dir = mask_dir
def __len__(self):
return int(np.ceil(len(self.x) / float(self.batch_size)))
def __getitem__(self, idx):
idx = np.random.randint(0, self.samples, batch_size)
batch_x, batch_y = [], []
drawn = 0
for i in idx:
_image = image.img_to_array(image.load_img(f'{self.image_dir}/{self.x[i]}', target_size=(img_height, img_width)))/255.
mask = image.img_to_array(image.load_img(f'{self.mask_dir}/{self.y[i]}', grayscale=True, target_size=(img_height, img_width)))
# mask = np.resize(mask,(img_height*img_width, classes))
batch_y.append(mask)
batch_x.append(_image)
return np.array(batch_x), np.array(batch_y)
Here is a sample code to train the model
unet = Unet(256, 256, nclasses=66, filters=64)
print(unet.output_shape)
p_unet = multi_gpu_model(unet, 4)
p_unet.load_weights('models-dr/top_weights.h5')
p_unet.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
tb = TensorBoard(log_dir='logs', write_graph=True)
mc = ModelCheckpoint(mode='max', filepath='models-dr/top_weights.h5', monitor='acc', save_best_only='True', save_weights_only='True', verbose=1)
es = EarlyStopping(mode='max', monitor='acc', patience=6, verbose=1)
callbacks = [tb, mc, es]
train_gen = seg_gen(image_list, mask_list, batch_size)
p_unet.fit_generator(train_gen, steps_per_epoch=steps, epochs=13, callbacks=callbacks, workers=8)
I got good results when i had only 2 classes by using dice loss, here is the code for it
def dice_coeff(y_true, y_pred):
smooth = 1.
y_true_f = K.flatten(y_true)
y_pred_f = K.flatten(y_pred)
intersection = K.sum(y_true_f * y_pred_f)
score = (2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)
return score
def dice_loss(y_true, y_pred):
loss = 1 - dice_coeff(y_true, y_pred)
return loss
What you are trying to build is an image segmentation model and not an autoencoder. Therefore, since you have separate generators for the images and the labels (i.e. masks), you need to set the class_mode argument to None to prevent generator from producing any labels arrays.
Further, you need to change the activation function of last layer from softmax to sigmoid, otherwise since the softmax normalizes the sum of its input elements to 1, the output would be all ones. You can also use binary_crossentropy for the loss function as well.