What is ValueError: Input 0 of layer "model" is incompatible with the layer: expected shape=(None, 32, 10000, 1), found shape=(None, 32, 1000, 1)?
When I tried to learn model by tensoflow, I get this error. I think that the shape of the data is the same as that used to input the model. Why does they think this error occure?
def EEGNet(nb_classes, Chans = 64, Samples = 128,
dropoutRate = 0.5, kernLength = 64, F1 = 8,
D = 2, F2 = 16, norm_rate = 0.25, dropoutType = 'Dropout'):
if dropoutType == 'SpatialDropout2D':
dropoutType = Dropout # SpatialDropout2D
elif dropoutType == 'Dropout':
dropoutType = Dropout
else:
raise ValueError('dropoutType must be one of SpatialDropout2D '
'or Dropout, passed as a string.')
input1 = Input(shape = (Chans, Samples, 1))
##################################################################
block1 = Conv2D(F1, (1, kernLength), padding = 'same',
input_shape = (Chans, Samples, 1),
use_bias = False)(input1)
block1 = BatchNormalization()(block1)
block1 = DepthwiseConv2D((Chans, 1), use_bias = False,
depth_multiplier = D,
depthwise_constraint = max_norm(1.))(block1)
block1 = BatchNormalization()(block1)
block1 = Activation('elu')(block1)
block1 = AveragePooling2D((1, 4))(block1)
block1 = dropoutType(dropoutRate)(block1)
block2 = SeparableConv2D(F2, (1, 16),
use_bias = False, padding = 'same')(block1)
block2 = BatchNormalization()(block2)
block2 = Activation('elu')(block2)
block2 = AveragePooling2D((1, 8))(block2)
block2 = dropoutType(dropoutRate)(block2)
flatten = Flatten(name = 'flatten')(block2)
dense = Dense(nb_classes, name = 'dense',
kernel_constraint = max_norm(norm_rate))(flatten)
softmax = Activation('softmax', name = 'softmax')(dense)
return Model(inputs=input1, outputs=softmax)
l = np.random.rand(9573,32,1000,1)
model = EEGNet(2, Chans = 32, Samples = 10000)
early_stopping = EarlyStopping(
monitor='val_loss', min_delta=0.0001,
mode='min', patience=10, verbose=1,
restore_best_weights=True)
# Fit model
optimizer = keras.optimizers.Adam(learning_rate=0.0001, beta_1=0.9,
beta_2=0.999, amsgrad=False)
model.compile(loss='binary_crossentropy', optimizer=optimizer,
metrics=['accuracy'])
So I have a problem when train deep learning with BERT with tensorflow which contain text dataset. So i want to fit() the model but got an error when training. I think it happen because the data_train did't have the label. But from my research It also same problem like SO question in here Same problem. Since it didn't have a answer is this a bug? The error is like this
ValueError: Target data is missing. Your model was compiled with loss=<keras.losses.CategoricalCrossentropy object at 0x7fa707d96fd0>, and therefore expects target data to be provided in `fit()`.
My code like this
X_input_ids = np.zeros((len(df), 256))
X_attn_masks = np.zeros((len(df), 256))
def generate_training_data(df, ids, masks, tokenizer):
for i, text in tqdm(enumerate(df['text'])):
tokenized_text = tokenizer.encode_plus(
text,
max_length=256,
truncation=True,
padding='max_length',
add_special_tokens=True,
return_tensors='tf'
)
ids[i, :] = tokenized_text.input_ids
masks[i, :] = tokenized_text.attention_mask
return ids, masks
X_input_ids, X_attn_masks = generate_training_data(df, X_input_ids, X_attn_masks, tokenizer)
labels = np.zeros((len(df), 3))
labels[np.arange(len(df)), df['label'].values] = 1
dataset = tf.data.Dataset.from_tensor_slices((X_input_ids, X_attn_masks, labels))
def SentimentDatasetMapFunction(input_ids, attn_masks, labels):
return {
'input_ids': input_ids,
'attention_mask': attn_masks
},
dataset = dataset.map(SentimentDatasetMapFunction)
dataset = dataset.shuffle(2000).batch(6, drop_remainder=True)
p = 0.8
train_size = int((len(df)//16)*p)
train_dataset = dataset.take(train_size)
val_dataset = dataset.skip(train_size)
model = TFBertModel.from_pretrained('cahya/bert-base-indonesian-522M')
input_ids = tf.keras.layers.Input(shape=(256,), name='input_ids', dtype='int32')
attn_masks = tf.keras.layers.Input(shape=(256,), name='attention_mask', dtype='int32')
bert_embds = model.bert(input_ids, attention_mask=attn_masks)[1]
intermediate_layer = tf.keras.layers.Dense(512, activation='relu', name='intermediate_layer')(bert_embds)
output_layer = tf.keras.layers.Dense(3, activation='softmax', name='output_layer')(intermediate_layer) # softmax -> calcs probs of classes
sentiment_model = tf.keras.Model(inputs=[input_ids, attn_masks], outputs=output_layer)
sentiment_model.summary()
optim = tf.keras.optimizers.Adam(learning_rate=1e-5, decay=1e-6)
loss_func = tf.keras.losses.CategoricalCrossentropy()
acc = tf.keras.metrics.CategoricalAccuracy('accuracy')
optim = tf.keras.optimizers.Adam(learning_rate=1e-5, decay=1e-6)
loss_func = tf.keras.losses.CategoricalCrossentropy()
acc = tf.keras.metrics.CategoricalAccuracy('accuracy')
sentiment_model.compile(optimizer=optim, loss=loss_func, metrics=[acc])
hist = sentiment_model.fit(
train_dataset,
validation_data=val_dataset,
epochs=2
)
I spend a bit of time finding something we can update and I download the model from the websites.
[ Sample ]:
import tensorflow as tf
import tensorflow_text as text # Registers the ops.
import tensorflow_hub as hub
import os
from os.path import exists
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
Functions
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
def generate_training_data(train_labels):
input_ids = [ ]
attn_masks = [ ]
labels = [ ]
for item in train_labels:
input_ids.append( str(item) )
attn_masks.append( int(1) )
labels.append( item )
attn_masks = tf.constant(attn_masks, shape=(1, len(attn_masks),1), dtype=tf.float32)
labels = tf.constant(labels, shape=(1, len(labels),1), dtype=tf.int64)
input_ids = tf.constant(input_ids, shape=(1, len(input_ids),1), dtype=tf.string)
return input_ids, attn_masks, labels
def SentimentDatasetMapFunction(input_ids, attn_masks, labels):
return {
'input_ids': input_ids,
'attention_mask': attn_masks,
'labels': labels
},
def build_classifier_model():
text_input = tf.keras.layers.Input(shape=(), dtype=tf.string, name='text')
preprocessing_layer = hub.KerasLayer(tfhub_handle_preprocess, name='preprocessing')
encoder_inputs = preprocessing_layer(text_input)
encoder = hub.KerasLayer(tfhub_handle_encoder, trainable=True, name='BERT_encoder')
outputs = encoder(encoder_inputs)
net = outputs['pooled_output']
net = tf.keras.layers.Dropout(0.1)(net)
net = tf.keras.layers.Dense(1, activation=None, name='classifier')(net)
return tf.keras.Model(text_input, net)
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
DataSet
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
(train_images, train_labels), (test_images, test_labels) = tf.keras.datasets.cifar10.load_data()
X_input_ids, X_attn_masks, labels = generate_training_data(train_labels)
dataset = tf.data.Dataset.from_tensor_slices((X_input_ids, X_attn_masks))
options = tf.saved_model.LoadOptions(
allow_partial_checkpoint=False,
experimental_io_device="/physical_device:GPU:0",
experimental_skip_checkpoint=True
)
text_input = tf.keras.layers.Input(shape=(), dtype=tf.string, name='sentences')
preprocessor = hub.KerasLayer(export_dir)
encoder_inputs = preprocessor(text_input)
encoder = hub.KerasLayer( export_dir_2, trainable=False, load_options=options)
outputs = encoder(encoder_inputs)
intermediate_layer = tf.keras.layers.Dense(512, activation='relu', name='intermediate_layer')(outputs['default'])
output_layer = tf.keras.layers.Dense(1, activation='softmax', name='output_layer')(intermediate_layer)
sentiment_model = tf.keras.Model(inputs=[text_input], outputs=output_layer)
sentiment_model.summary()
optim = tf.keras.optimizers.Adam(learning_rate=1e-5, decay=1e-6)
loss_func = tf.keras.losses.CategoricalCrossentropy()
acc = tf.keras.metrics.CategoricalAccuracy('accuracy')
optim = tf.keras.optimizers.Adam(learning_rate=1e-5, decay=1e-6)
loss_func = tf.keras.losses.CategoricalCrossentropy()
acc = tf.keras.metrics.CategoricalAccuracy('accuracy')
sentiment_model.compile(optimizer=optim, loss=loss_func, metrics=[acc])
hist = sentiment_model.fit(
dataset,
validation_data=dataset,
epochs=2
)
[ Output ]:
outputs: KerasTensor(type_spec=TensorSpec(shape=(None, 512), dtype=tf.float32, name=None), name='keras_layer_1/StatefulPartitionedCall:0', description="created by layer 'keras_layer_1'")
Model: "model"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
sentences (InputLayer) [(None,)] 0 []
keras_layer (KerasLayer) {'input_mask': (Non 0 ['sentences[0][0]']
e, 128),
'input_word_ids':
(None, 128),
'input_type_ids':
(None, 128)}
keras_layer_1 (KerasLayer) {'default': (None, 28763649 ['keras_layer[0][0]',
512), 'keras_layer[0][1]',
'encoder_outputs': 'keras_layer[0][2]']
[(None, 128, 512),
(None, 128, 512),
(None, 128, 512),
(None, 128, 512)],
'sequence_output':
(None, 128, 512),
'pooled_output': (
None, 512)}
intermediate_layer (Dense) (None, 512) 262656 ['keras_layer_1[0][0]']
output_layer (Dense) (None, 1) 513 ['intermediate_layer[0][0]']
==================================================================================================
Total params: 29,026,818
Trainable params: 263,169
Non-trainable params: 28,763,649
__________________________________________________________________________________________________
Epoch 1/2
I am getting a ValueError: logits and labels must have the same shape ((None, 1) vs ()) when doing a model evaluate. I get the model to train but when I evaluate is when I have the problem. I used a tf.expand_dims for logits but wondering if this needs to be applied to the labels as well?
here is my code below.
import tensorflow as tf
import tensorflow_datasets as tfds
dataset, info = tfds.load('imdb_reviews', with_info=True,
as_supervised=True)
train_dataset, test_dataset = dataset['train'], dataset['test']
BUFFER_SIZE = 10000
BATCH_SIZE = 64
train_dataset = train_dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE).prefetch(1)
VOCAB_SIZE, EMBED_SIZE, NUM_OOV_BUCKETS = 10000, 128, 1000
encoder = tf.keras.layers.experimental.preprocessing.TextVectorization(
max_tokens=VOCAB_SIZE)
encoder.adapt(train_dataset.map(lambda text, label: text))
class AttentionLayer(tf.keras.layers.Layer):
def __init__(self, **kwargs):
super(AttentionLayer, self).__init__(**kwargs)
self.query_layer = tf.keras.layers.Conv1D(
filters=100,
kernel_size=4,
padding='same'
)
self.value_layer = tf.keras.layers.Conv1D(
filters=100,
kernel_size=4,
padding='same'
)
self.attention_layer = tf.keras.layers.Attention()
def call(self, inputs):
query = self.query_layer(inputs)
value = self.value_layer(inputs)
attention = self.attention_layer([query, value])
return tf.keras.layers.concatenate([query, attention])
attention_layer = AttentionLayer()
model1 = tf.keras.models.Sequential([
tf.keras.Input(shape=(),batch_size=1, dtype=tf.string, name='InputLayer'),
encoder,
tf.keras.layers.Embedding(VOCAB_SIZE + NUM_OOV_BUCKETS, EMBED_SIZE, mask_zero=True, name='Embedding_Layer'),
attention_layer,
tf.keras.layers.Conv1D(filters=32, kernel_size=4, padding = 'same', activation = 'relu', name='Conv1DLayer'),
tf.keras.layers.MaxPooling1D(pool_size=2, name='MaxPoolLayer'),
tf.keras.layers.LSTM(64, dropout = 0.2, name='DropoutLayer'),
tf.keras.layers.Dense(250, activation = 'relu', name='DenseLayer'),
tf.keras.layers.Dense(1, activation='sigmoid', name='Output_Layer')
])
model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])
def preprocess_y(x, y):
return x, tf.expand_dims(y, -1)
history1 = model1.fit(
train_dataset.map(preprocess_y),
batch_size=BATCH_SIZE,
epochs=1)
model1.evaluate(test_dataset)
ValueError: logits and labels must have the same shape ((None, 1) vs ())
I am trying to segment road and non-road part using ENet deep learning model. I uses this github link: https://github.com/kwotsin/TensorFlow-ENet which has original image size of 340X480 and it's working fine for images of 340X480 or above but as soon as I reduxe the size of the image it's not working. It's showing a ruined image of random black and white pixel. Even I try to reduce the size in same aspect ratio but it still not working.
Here is my ENet model structure code:
#Now actually start building the network
def ENet(inputs,
num_classes,
batch_size,
num_initial_blocks=1,
stage_two_repeat=2,
skip_connections=True,
reuse=None,
is_training=True,
scope='ENet'):
'''
The ENet model for real-time semantic segmentation!
INPUTS:
- inputs(Tensor): a 4D Tensor of shape [batch_size, image_height, image_width, num_channels] that represents one batch of preprocessed images.
- num_classes(int): an integer for the number of classes to predict. This will determine the final output channels as the answer.
- batch_size(int): the batch size to explictly set the shape of the inputs in order for operations to work properly.
- num_initial_blocks(int): the number of times to repeat the initial block.
- stage_two_repeat(int): the number of times to repeat stage two in order to make the network deeper.
- skip_connections(bool): if True, add the corresponding encoder feature maps to the decoder. They are of exact same shapes.
- reuse(bool): Whether or not to reuse the variables for evaluation.
- is_training(bool): if True, switch on batch_norm and prelu only during training, otherwise they are turned off.
- scope(str): a string that represents the scope name for the variables.
OUTPUTS:
- net(Tensor): a 4D Tensor output of shape [batch_size, image_height, image_width, num_classes], where each pixel has a one-hot encoded vector
determining the label of the pixel.
'''
#Set the shape of the inputs first to get the batch_size information
inputs_shape = inputs.get_shape().as_list()
inputs.set_shape(shape=(batch_size, inputs_shape[1], inputs_shape[2], inputs_shape[3]))
with tf.variable_scope(scope, reuse=reuse):
#Set the primary arg scopes. Fused batch_norm is faster than normal batch norm.
with slim.arg_scope([initial_block, bottleneck], is_training=is_training),\
slim.arg_scope([slim.batch_norm], fused=True), \
slim.arg_scope([slim.conv2d, slim.conv2d_transpose], activation_fn=None):
#=================INITIAL BLOCK=================
net = initial_block(inputs, scope='initial_block_1')
for i in xrange(2, max(num_initial_blocks, 1) + 1):
net = initial_block(net, scope='initial_block_' + str(i))
#Save for skip connection later
if skip_connections:
net_one = net
#===================STAGE ONE=======================
net, pooling_indices_1, inputs_shape_1 = bottleneck(net, output_depth=64, filter_size=3, regularizer_prob=0.01, downsampling=True, scope='bottleneck1_0')
net = bottleneck(net, output_depth=64, filter_size=3, regularizer_prob=0.01, scope='bottleneck1_1')
net = bottleneck(net, output_depth=64, filter_size=3, regularizer_prob=0.01, scope='bottleneck1_2')
net = bottleneck(net, output_depth=64, filter_size=3, regularizer_prob=0.01, scope='bottleneck1_3')
net = bottleneck(net, output_depth=64, filter_size=3, regularizer_prob=0.01, scope='bottleneck1_4')
#Save for skip connection later
if skip_connections:
net_two = net
#regularization prob is 0.1 from bottleneck 2.0 onwards
with slim.arg_scope([bottleneck], regularizer_prob=0.1):
net, pooling_indices_2, inputs_shape_2 = bottleneck(net, output_depth=128, filter_size=3, downsampling=True, scope='bottleneck2_0')
#Repeat the stage two at least twice to get stage 2 and 3:
for i in xrange(2, max(stage_two_repeat, 2) + 2):
net = bottleneck(net, output_depth=128, filter_size=3, scope='bottleneck'+str(i)+'_1')
net = bottleneck(net, output_depth=128, filter_size=3, dilated=True, dilation_rate=2, scope='bottleneck'+str(i)+'_2')
net = bottleneck(net, output_depth=128, filter_size=5, asymmetric=True, scope='bottleneck'+str(i)+'_3')
net = bottleneck(net, output_depth=128, filter_size=3, dilated=True, dilation_rate=4, scope='bottleneck'+str(i)+'_4')
net = bottleneck(net, output_depth=128, filter_size=3, scope='bottleneck'+str(i)+'_5')
net = bottleneck(net, output_depth=128, filter_size=3, dilated=True, dilation_rate=8, scope='bottleneck'+str(i)+'_6')
net = bottleneck(net, output_depth=128, filter_size=5, asymmetric=True, scope='bottleneck'+str(i)+'_7')
net = bottleneck(net, output_depth=128, filter_size=3, dilated=True, dilation_rate=16, scope='bottleneck'+str(i)+'_8')
with slim.arg_scope([bottleneck], regularizer_prob=0.1, decoder=True):
#===================STAGE FOUR========================
bottleneck_scope_name = "bottleneck" + str(i + 1)
#The decoder section, so start to upsample.
net = bottleneck(net, output_depth=64, filter_size=3, upsampling=True,
pooling_indices=pooling_indices_2, output_shape=inputs_shape_2, scope=bottleneck_scope_name+'_0')
#Perform skip connections here
if skip_connections:
net = tf.add(net, net_two, name=bottleneck_scope_name+'_skip_connection')
net = bottleneck(net, output_depth=64, filter_size=3, scope=bottleneck_scope_name+'_1')
net = bottleneck(net, output_depth=64, filter_size=3, scope=bottleneck_scope_name+'_2')
#===================STAGE FIVE========================
bottleneck_scope_name = "bottleneck" + str(i + 2)
net = bottleneck(net, output_depth=16, filter_size=3, upsampling=True,
pooling_indices=pooling_indices_1, output_shape=inputs_shape_1, scope=bottleneck_scope_name+'_0')
#perform skip connections here
if skip_connections:
net = tf.add(net, net_one, name=bottleneck_scope_name+'_skip_connection')
net = bottleneck(net, output_depth=16, filter_size=3, scope=bottleneck_scope_name+'_1')
#=============FINAL CONVOLUTION=============
logits = slim.conv2d_transpose(net, num_classes, [2,2], stride=2, scope='fullconv')
probabilities = tf.nn.softmax(logits, name='logits_to_softmax')
return logits, probabilities
and here is the full link of the code: https://github.com/kwotsin/TensorFlow-ENet/blob/master/enet.py
Prediction segmentation code:
image_dir = './dataset/test/'
images_list = sorted([os.path.join(image_dir, file) for file in os.listdir(image_dir) if file.endswith('.png')])
checkpoint_dir = "log/original/"
listi = os.listdir(checkpoint_dir)
print(images_list)
checkpoint = tf.train.latest_checkpoint("/home/nikhil_m/TensorFlow-ENet/log/original")
print(tf.train.latest_checkpoint("/home/nikhil_m/TensorFlow-ENet/log/original"),'-DDD--------------------------------------++++++++++++++++++++++++++++++++++++++++++++++++++++')
num_initial_blocks = 1
skip_connections = False
stage_two_repeat = 2
'''
#Labels to colours are obtained from here:
https://github.com/alexgkendall/SegNet-Tutorial/blob/c922cc4a4fcc7ce279dd998fb2d4a8703f34ebd7/Scripts/test_segmentation_camvid.py
However, the road_marking class is collapsed into the road class in the dataset provided.
Classes:
------------
Sky = [128,128,128]
Building = [128,0,0]
Pole = [192,192,128]
Road_marking = [255,69,0]
Road = [128,64,128]
Pavement = [60,40,222]
Tree = [128,128,0]
SignSymbol = [192,128,128]
Fence = [64,64,128]
Car = [64,0,128]
Pedestrian = [64,64,0]
Bicyclist = [0,128,192]
Unlabelled = [0,0,0]
'''
label_to_colours = {0: [128,128,128],
1: [0, 0, 0]}
#Create the photo directory
photo_dir = checkpoint_dir + "/test_images"
if not os.path.exists(photo_dir):
os.mkdir(photo_dir)
#Create a function to convert each pixel label to colour.
def grayscale_to_colour(image):
print 'Converting image...'
image = image.reshape((256, 256, 1))
image = np.repeat(image, 3, axis=-1)
for i in xrange(image.shape[0]):
for j in xrange(image.shape[1]):
label = int(image[i][j][0])
image[i][j] = np.array(label_to_colours[label])
return image
with tf.Graph().as_default() as graph:
images_tensor = tf.train.string_input_producer(images_list, shuffle=False)
reader = tf.WholeFileReader()
key, image_tensor = reader.read(images_tensor)
image = tf.image.decode_png(image_tensor, channels=3)
print(image.shape, 'newwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww shapeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee')
# image = tf.image.resize_image_with_crop_or_pad(image, 360, 480)
# image = tf.cast(image, tf.float32)
image = preprocess(image)
images = tf.train.batch([image], batch_size = 10, allow_smaller_final_batch=True)
#Create the model inference
with slim.arg_scope(ENet_arg_scope()):
logits, probabilities = ENet(images,
num_classes=2,
batch_size=10,
is_training=True,
reuse=None,
num_initial_blocks=num_initial_blocks,
stage_two_repeat=stage_two_repeat,
skip_connections=skip_connections)
variables_to_restore = slim.get_variables_to_restore()
saver = tf.train.Saver(variables_to_restore)
def restore_fn(sess):
return saver.restore(sess, checkpoint)
predictions = tf.argmax(probabilities, -1)
predictions = tf.cast(predictions, tf.float32)
print 'HERE', predictions.get_shape()
sv = tf.train.Supervisor(logdir=None, init_fn=restore_fn)
with sv.managed_session() as sess:
for i in xrange(len(images_list) / 10):
segmentations = sess.run(predictions)
# print segmentations.shape
print(segmentations.shape, 'shape')
for j in xrange(segmentations.shape[0]):
converted_image = grayscale_to_colour(segmentations[j])
print 'Saving image %s/%s' %(i*10 + j, len(images_list))
#plt.axis('off')
#plt.imshow(converted_image)
imsave(photo_dir + "/image_%s.png" %(i*10 + j), converted_image)
# plt.show()
Here is the full code link: https://github.com/kwotsin/TensorFlow-ENet/blob/master/predict_segmentation.py
You can try this model. Its written in tf.keras
import tensorflow as tf
from tensorflow.keras.layers import *
from tensorflow.keras.models import Model
print('Tensorflow', tf.__version__)
def initial_block(inp):
inp1 = inp
conv = Conv2D(filters=13, kernel_size=3, strides=2, padding='same', kernel_initializer='he_normal')(inp)
pool = MaxPool2D(2)(inp1)
concat = concatenate([conv, pool])
return concat
def encoder_bottleneck(inp, filters, name, dilation_rate=2, downsample=False, dilated=False, asymmetric=False, drop_rate=0.1):
reduce = filters // 4
down = inp
kernel_stride = 1
#Downsample
if downsample:
kernel_stride = 2
pad_activations = filters - inp.shape.as_list()[-1]
down = MaxPool2D(2)(down)
down = Permute(dims=(1, 3, 2))(down)
down = ZeroPadding2D(padding=((0, 0), (0, pad_activations)))(down)
down = Permute(dims=(1, 3, 2))(down)
#1*1 Reduce
x = Conv2D(filters=reduce, kernel_size=kernel_stride, strides=kernel_stride, padding='same', use_bias=False, kernel_initializer='he_normal', name=f'{name}_reduce')(inp)
x = BatchNormalization(momentum=0.1)(x)
x = PReLU(shared_axes=[1, 2])(x)
#Conv
if not dilated and not asymmetric:
x = Conv2D(filters=reduce, kernel_size=3, padding='same', kernel_initializer='he_normal', name=f'{name}_conv_reg')(x)
elif dilated:
x = Conv2D(filters=reduce, kernel_size=3, padding='same', dilation_rate=dilation_rate, kernel_initializer='he_normal', name=f'{name}_reduce_dilated')(x)
elif asymmetric:
x = Conv2D(filters=reduce, kernel_size=(1,5), padding='same', use_bias=False, kernel_initializer='he_normal', name=f'{name}_asymmetric')(x)
x = Conv2D(filters=reduce, kernel_size=(5,1), padding='same', kernel_initializer='he_normal', name=name)(x)
x = BatchNormalization(momentum=0.1)(x)
x = PReLU(shared_axes=[1, 2])(x)
#1*1 Expand
x = Conv2D(filters=filters, kernel_size=1, padding='same', use_bias=False, kernel_initializer='he_normal', name=f'{name}_expand')(x)
x = BatchNormalization(momentum=0.1)(x)
x = SpatialDropout2D(rate=drop_rate)(x)
concat = Add()([x, down])
concat = PReLU(shared_axes=[1, 2])(concat)
return concat
def decoder_bottleneck(inp, filters, name, upsample=False):
reduce = filters // 4
up = inp
#Upsample
if upsample:
up = Conv2D(filters=filters, kernel_size=1, strides=1, padding='same', use_bias=False, kernel_initializer='he_normal', name=f'{name}_upsample')(up)
up = UpSampling2D(size=2)(up)
#1*1 Reduce
x = Conv2D(filters=reduce, kernel_size=1, strides=1, padding='same', use_bias=False, kernel_initializer='he_normal', name=f'{name}_reduce')(inp)
x = BatchNormalization(momentum=0.1)(x)
x = PReLU(shared_axes=[1, 2])(x)
#Conv
if not upsample:
x = Conv2D(filters=reduce, kernel_size=3, strides=1, padding='same', kernel_initializer='he_normal', name=f'{name}_conv_reg')(x)
else:
x = Conv2DTranspose(filters=reduce, kernel_size=3, strides=2, padding='same', kernel_initializer='he_normal', name=f'{name}_transpose')(x)
x = BatchNormalization(momentum=0.1)(x)
x = PReLU(shared_axes=[1, 2])(x)
#1*1 Expand
x = Conv2D(filters=filters, kernel_size=1, strides=1, padding='same', use_bias=False, kernel_initializer='he_normal', name=f'{name}_expand')(x)
x = BatchNormalization(momentum=0.1)(x)
concat = Add()([x, up])
concat = ReLU()(concat)
return concat
def ENet(H, W, nclasses):
'''
Args:
H: Height of the image
W: Width of the image
nclasses: Total no of classes
Returns:
model: Keras model in .h5 format
'''
inp = Input(shape=(H, W, 3))
enc = initial_block(inp)
#Bottleneck 1.0
enc = encoder_bottleneck(enc, 64, name='enc1', downsample=True, drop_rate=0.001)
enc = encoder_bottleneck(enc, 64, name='enc1.1', drop_rate=0.001)
enc = encoder_bottleneck(enc, 64, name='enc1.2', drop_rate=0.001)
enc = encoder_bottleneck(enc, 64, name='enc1.3', drop_rate=0.001)
enc = encoder_bottleneck(enc, 64, name='enc1.4', drop_rate=0.001)
#Bottleneck 2.0
enc = encoder_bottleneck(enc, 128, name='enc2.0', downsample=True)
enc = encoder_bottleneck(enc, 128, name='enc2.1')
enc = encoder_bottleneck(enc, 128, name='enc2.2', dilation_rate=2, dilated=True)
enc = encoder_bottleneck(enc, 128, name='enc2.3', asymmetric=True)
enc = encoder_bottleneck(enc, 128, name='enc2.4', dilation_rate=4, dilated=True)
enc = encoder_bottleneck(enc, 128, name='enc2.5')
enc = encoder_bottleneck(enc, 128, name='enc2.6', dilation_rate=8, dilated=True)
enc = encoder_bottleneck(enc, 128, name='enc2.7', asymmetric=True)
enc = encoder_bottleneck(enc, 128, name='enc2.8', dilation_rate=16, dilated=True)
#Bottleneck 3.0
enc = encoder_bottleneck(enc, 128, name='enc3.0')
enc = encoder_bottleneck(enc, 128, name='enc3.1', dilation_rate=2, dilated=True)
enc = encoder_bottleneck(enc, 128, name='enc3.2', asymmetric=True)
enc = encoder_bottleneck(enc, 128, name='enc3.3', dilation_rate=4, dilated=True)
enc = encoder_bottleneck(enc, 128, name='enc3.4')
enc = encoder_bottleneck(enc, 128, name='enc3.5', dilation_rate=8, dilated=True)
enc = encoder_bottleneck(enc, 128, name='enc3.6', asymmetric=True)
enc = encoder_bottleneck(enc, 128, name='enc3.7', dilation_rate=16, dilated=True)
#Bottleneck 4.0
dec = decoder_bottleneck(enc, 64, name='dec4.0', upsample=True)
dec = decoder_bottleneck(dec, 64, name='dec4.1')
dec = decoder_bottleneck(dec, 64, name='dec4.2')
#Bottleneck 5.0
dec = decoder_bottleneck(dec, 16, name='dec5.0', upsample=True)
dec = decoder_bottleneck(dec, 16, name='dec5.1')
dec = Conv2DTranspose(filters=nclasses, kernel_size=2, strides=2, padding='same', kernel_initializer='he_normal', name='fullconv')(dec)
dec = Activation('softmax')(dec)
model = Model(inputs=inp, outputs=dec, name='Enet')
model.save(f'enet_{nclasses}.h5')
return model