ENet sementic segmentation model is not working for smaller images - python
I am trying to segment road and non-road part using ENet deep learning model. I uses this github link: https://github.com/kwotsin/TensorFlow-ENet which has original image size of 340X480 and it's working fine for images of 340X480 or above but as soon as I reduxe the size of the image it's not working. It's showing a ruined image of random black and white pixel. Even I try to reduce the size in same aspect ratio but it still not working.
Here is my ENet model structure code:
#Now actually start building the network
def ENet(inputs,
num_classes,
batch_size,
num_initial_blocks=1,
stage_two_repeat=2,
skip_connections=True,
reuse=None,
is_training=True,
scope='ENet'):
'''
The ENet model for real-time semantic segmentation!
INPUTS:
- inputs(Tensor): a 4D Tensor of shape [batch_size, image_height, image_width, num_channels] that represents one batch of preprocessed images.
- num_classes(int): an integer for the number of classes to predict. This will determine the final output channels as the answer.
- batch_size(int): the batch size to explictly set the shape of the inputs in order for operations to work properly.
- num_initial_blocks(int): the number of times to repeat the initial block.
- stage_two_repeat(int): the number of times to repeat stage two in order to make the network deeper.
- skip_connections(bool): if True, add the corresponding encoder feature maps to the decoder. They are of exact same shapes.
- reuse(bool): Whether or not to reuse the variables for evaluation.
- is_training(bool): if True, switch on batch_norm and prelu only during training, otherwise they are turned off.
- scope(str): a string that represents the scope name for the variables.
OUTPUTS:
- net(Tensor): a 4D Tensor output of shape [batch_size, image_height, image_width, num_classes], where each pixel has a one-hot encoded vector
determining the label of the pixel.
'''
#Set the shape of the inputs first to get the batch_size information
inputs_shape = inputs.get_shape().as_list()
inputs.set_shape(shape=(batch_size, inputs_shape[1], inputs_shape[2], inputs_shape[3]))
with tf.variable_scope(scope, reuse=reuse):
#Set the primary arg scopes. Fused batch_norm is faster than normal batch norm.
with slim.arg_scope([initial_block, bottleneck], is_training=is_training),\
slim.arg_scope([slim.batch_norm], fused=True), \
slim.arg_scope([slim.conv2d, slim.conv2d_transpose], activation_fn=None):
#=================INITIAL BLOCK=================
net = initial_block(inputs, scope='initial_block_1')
for i in xrange(2, max(num_initial_blocks, 1) + 1):
net = initial_block(net, scope='initial_block_' + str(i))
#Save for skip connection later
if skip_connections:
net_one = net
#===================STAGE ONE=======================
net, pooling_indices_1, inputs_shape_1 = bottleneck(net, output_depth=64, filter_size=3, regularizer_prob=0.01, downsampling=True, scope='bottleneck1_0')
net = bottleneck(net, output_depth=64, filter_size=3, regularizer_prob=0.01, scope='bottleneck1_1')
net = bottleneck(net, output_depth=64, filter_size=3, regularizer_prob=0.01, scope='bottleneck1_2')
net = bottleneck(net, output_depth=64, filter_size=3, regularizer_prob=0.01, scope='bottleneck1_3')
net = bottleneck(net, output_depth=64, filter_size=3, regularizer_prob=0.01, scope='bottleneck1_4')
#Save for skip connection later
if skip_connections:
net_two = net
#regularization prob is 0.1 from bottleneck 2.0 onwards
with slim.arg_scope([bottleneck], regularizer_prob=0.1):
net, pooling_indices_2, inputs_shape_2 = bottleneck(net, output_depth=128, filter_size=3, downsampling=True, scope='bottleneck2_0')
#Repeat the stage two at least twice to get stage 2 and 3:
for i in xrange(2, max(stage_two_repeat, 2) + 2):
net = bottleneck(net, output_depth=128, filter_size=3, scope='bottleneck'+str(i)+'_1')
net = bottleneck(net, output_depth=128, filter_size=3, dilated=True, dilation_rate=2, scope='bottleneck'+str(i)+'_2')
net = bottleneck(net, output_depth=128, filter_size=5, asymmetric=True, scope='bottleneck'+str(i)+'_3')
net = bottleneck(net, output_depth=128, filter_size=3, dilated=True, dilation_rate=4, scope='bottleneck'+str(i)+'_4')
net = bottleneck(net, output_depth=128, filter_size=3, scope='bottleneck'+str(i)+'_5')
net = bottleneck(net, output_depth=128, filter_size=3, dilated=True, dilation_rate=8, scope='bottleneck'+str(i)+'_6')
net = bottleneck(net, output_depth=128, filter_size=5, asymmetric=True, scope='bottleneck'+str(i)+'_7')
net = bottleneck(net, output_depth=128, filter_size=3, dilated=True, dilation_rate=16, scope='bottleneck'+str(i)+'_8')
with slim.arg_scope([bottleneck], regularizer_prob=0.1, decoder=True):
#===================STAGE FOUR========================
bottleneck_scope_name = "bottleneck" + str(i + 1)
#The decoder section, so start to upsample.
net = bottleneck(net, output_depth=64, filter_size=3, upsampling=True,
pooling_indices=pooling_indices_2, output_shape=inputs_shape_2, scope=bottleneck_scope_name+'_0')
#Perform skip connections here
if skip_connections:
net = tf.add(net, net_two, name=bottleneck_scope_name+'_skip_connection')
net = bottleneck(net, output_depth=64, filter_size=3, scope=bottleneck_scope_name+'_1')
net = bottleneck(net, output_depth=64, filter_size=3, scope=bottleneck_scope_name+'_2')
#===================STAGE FIVE========================
bottleneck_scope_name = "bottleneck" + str(i + 2)
net = bottleneck(net, output_depth=16, filter_size=3, upsampling=True,
pooling_indices=pooling_indices_1, output_shape=inputs_shape_1, scope=bottleneck_scope_name+'_0')
#perform skip connections here
if skip_connections:
net = tf.add(net, net_one, name=bottleneck_scope_name+'_skip_connection')
net = bottleneck(net, output_depth=16, filter_size=3, scope=bottleneck_scope_name+'_1')
#=============FINAL CONVOLUTION=============
logits = slim.conv2d_transpose(net, num_classes, [2,2], stride=2, scope='fullconv')
probabilities = tf.nn.softmax(logits, name='logits_to_softmax')
return logits, probabilities
and here is the full link of the code: https://github.com/kwotsin/TensorFlow-ENet/blob/master/enet.py
Prediction segmentation code:
image_dir = './dataset/test/'
images_list = sorted([os.path.join(image_dir, file) for file in os.listdir(image_dir) if file.endswith('.png')])
checkpoint_dir = "log/original/"
listi = os.listdir(checkpoint_dir)
print(images_list)
checkpoint = tf.train.latest_checkpoint("/home/nikhil_m/TensorFlow-ENet/log/original")
print(tf.train.latest_checkpoint("/home/nikhil_m/TensorFlow-ENet/log/original"),'-DDD--------------------------------------++++++++++++++++++++++++++++++++++++++++++++++++++++')
num_initial_blocks = 1
skip_connections = False
stage_two_repeat = 2
'''
#Labels to colours are obtained from here:
https://github.com/alexgkendall/SegNet-Tutorial/blob/c922cc4a4fcc7ce279dd998fb2d4a8703f34ebd7/Scripts/test_segmentation_camvid.py
However, the road_marking class is collapsed into the road class in the dataset provided.
Classes:
------------
Sky = [128,128,128]
Building = [128,0,0]
Pole = [192,192,128]
Road_marking = [255,69,0]
Road = [128,64,128]
Pavement = [60,40,222]
Tree = [128,128,0]
SignSymbol = [192,128,128]
Fence = [64,64,128]
Car = [64,0,128]
Pedestrian = [64,64,0]
Bicyclist = [0,128,192]
Unlabelled = [0,0,0]
'''
label_to_colours = {0: [128,128,128],
1: [0, 0, 0]}
#Create the photo directory
photo_dir = checkpoint_dir + "/test_images"
if not os.path.exists(photo_dir):
os.mkdir(photo_dir)
#Create a function to convert each pixel label to colour.
def grayscale_to_colour(image):
print 'Converting image...'
image = image.reshape((256, 256, 1))
image = np.repeat(image, 3, axis=-1)
for i in xrange(image.shape[0]):
for j in xrange(image.shape[1]):
label = int(image[i][j][0])
image[i][j] = np.array(label_to_colours[label])
return image
with tf.Graph().as_default() as graph:
images_tensor = tf.train.string_input_producer(images_list, shuffle=False)
reader = tf.WholeFileReader()
key, image_tensor = reader.read(images_tensor)
image = tf.image.decode_png(image_tensor, channels=3)
print(image.shape, 'newwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww shapeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee')
# image = tf.image.resize_image_with_crop_or_pad(image, 360, 480)
# image = tf.cast(image, tf.float32)
image = preprocess(image)
images = tf.train.batch([image], batch_size = 10, allow_smaller_final_batch=True)
#Create the model inference
with slim.arg_scope(ENet_arg_scope()):
logits, probabilities = ENet(images,
num_classes=2,
batch_size=10,
is_training=True,
reuse=None,
num_initial_blocks=num_initial_blocks,
stage_two_repeat=stage_two_repeat,
skip_connections=skip_connections)
variables_to_restore = slim.get_variables_to_restore()
saver = tf.train.Saver(variables_to_restore)
def restore_fn(sess):
return saver.restore(sess, checkpoint)
predictions = tf.argmax(probabilities, -1)
predictions = tf.cast(predictions, tf.float32)
print 'HERE', predictions.get_shape()
sv = tf.train.Supervisor(logdir=None, init_fn=restore_fn)
with sv.managed_session() as sess:
for i in xrange(len(images_list) / 10):
segmentations = sess.run(predictions)
# print segmentations.shape
print(segmentations.shape, 'shape')
for j in xrange(segmentations.shape[0]):
converted_image = grayscale_to_colour(segmentations[j])
print 'Saving image %s/%s' %(i*10 + j, len(images_list))
#plt.axis('off')
#plt.imshow(converted_image)
imsave(photo_dir + "/image_%s.png" %(i*10 + j), converted_image)
# plt.show()
Here is the full code link: https://github.com/kwotsin/TensorFlow-ENet/blob/master/predict_segmentation.py
You can try this model. Its written in tf.keras
import tensorflow as tf
from tensorflow.keras.layers import *
from tensorflow.keras.models import Model
print('Tensorflow', tf.__version__)
def initial_block(inp):
inp1 = inp
conv = Conv2D(filters=13, kernel_size=3, strides=2, padding='same', kernel_initializer='he_normal')(inp)
pool = MaxPool2D(2)(inp1)
concat = concatenate([conv, pool])
return concat
def encoder_bottleneck(inp, filters, name, dilation_rate=2, downsample=False, dilated=False, asymmetric=False, drop_rate=0.1):
reduce = filters // 4
down = inp
kernel_stride = 1
#Downsample
if downsample:
kernel_stride = 2
pad_activations = filters - inp.shape.as_list()[-1]
down = MaxPool2D(2)(down)
down = Permute(dims=(1, 3, 2))(down)
down = ZeroPadding2D(padding=((0, 0), (0, pad_activations)))(down)
down = Permute(dims=(1, 3, 2))(down)
#1*1 Reduce
x = Conv2D(filters=reduce, kernel_size=kernel_stride, strides=kernel_stride, padding='same', use_bias=False, kernel_initializer='he_normal', name=f'{name}_reduce')(inp)
x = BatchNormalization(momentum=0.1)(x)
x = PReLU(shared_axes=[1, 2])(x)
#Conv
if not dilated and not asymmetric:
x = Conv2D(filters=reduce, kernel_size=3, padding='same', kernel_initializer='he_normal', name=f'{name}_conv_reg')(x)
elif dilated:
x = Conv2D(filters=reduce, kernel_size=3, padding='same', dilation_rate=dilation_rate, kernel_initializer='he_normal', name=f'{name}_reduce_dilated')(x)
elif asymmetric:
x = Conv2D(filters=reduce, kernel_size=(1,5), padding='same', use_bias=False, kernel_initializer='he_normal', name=f'{name}_asymmetric')(x)
x = Conv2D(filters=reduce, kernel_size=(5,1), padding='same', kernel_initializer='he_normal', name=name)(x)
x = BatchNormalization(momentum=0.1)(x)
x = PReLU(shared_axes=[1, 2])(x)
#1*1 Expand
x = Conv2D(filters=filters, kernel_size=1, padding='same', use_bias=False, kernel_initializer='he_normal', name=f'{name}_expand')(x)
x = BatchNormalization(momentum=0.1)(x)
x = SpatialDropout2D(rate=drop_rate)(x)
concat = Add()([x, down])
concat = PReLU(shared_axes=[1, 2])(concat)
return concat
def decoder_bottleneck(inp, filters, name, upsample=False):
reduce = filters // 4
up = inp
#Upsample
if upsample:
up = Conv2D(filters=filters, kernel_size=1, strides=1, padding='same', use_bias=False, kernel_initializer='he_normal', name=f'{name}_upsample')(up)
up = UpSampling2D(size=2)(up)
#1*1 Reduce
x = Conv2D(filters=reduce, kernel_size=1, strides=1, padding='same', use_bias=False, kernel_initializer='he_normal', name=f'{name}_reduce')(inp)
x = BatchNormalization(momentum=0.1)(x)
x = PReLU(shared_axes=[1, 2])(x)
#Conv
if not upsample:
x = Conv2D(filters=reduce, kernel_size=3, strides=1, padding='same', kernel_initializer='he_normal', name=f'{name}_conv_reg')(x)
else:
x = Conv2DTranspose(filters=reduce, kernel_size=3, strides=2, padding='same', kernel_initializer='he_normal', name=f'{name}_transpose')(x)
x = BatchNormalization(momentum=0.1)(x)
x = PReLU(shared_axes=[1, 2])(x)
#1*1 Expand
x = Conv2D(filters=filters, kernel_size=1, strides=1, padding='same', use_bias=False, kernel_initializer='he_normal', name=f'{name}_expand')(x)
x = BatchNormalization(momentum=0.1)(x)
concat = Add()([x, up])
concat = ReLU()(concat)
return concat
def ENet(H, W, nclasses):
'''
Args:
H: Height of the image
W: Width of the image
nclasses: Total no of classes
Returns:
model: Keras model in .h5 format
'''
inp = Input(shape=(H, W, 3))
enc = initial_block(inp)
#Bottleneck 1.0
enc = encoder_bottleneck(enc, 64, name='enc1', downsample=True, drop_rate=0.001)
enc = encoder_bottleneck(enc, 64, name='enc1.1', drop_rate=0.001)
enc = encoder_bottleneck(enc, 64, name='enc1.2', drop_rate=0.001)
enc = encoder_bottleneck(enc, 64, name='enc1.3', drop_rate=0.001)
enc = encoder_bottleneck(enc, 64, name='enc1.4', drop_rate=0.001)
#Bottleneck 2.0
enc = encoder_bottleneck(enc, 128, name='enc2.0', downsample=True)
enc = encoder_bottleneck(enc, 128, name='enc2.1')
enc = encoder_bottleneck(enc, 128, name='enc2.2', dilation_rate=2, dilated=True)
enc = encoder_bottleneck(enc, 128, name='enc2.3', asymmetric=True)
enc = encoder_bottleneck(enc, 128, name='enc2.4', dilation_rate=4, dilated=True)
enc = encoder_bottleneck(enc, 128, name='enc2.5')
enc = encoder_bottleneck(enc, 128, name='enc2.6', dilation_rate=8, dilated=True)
enc = encoder_bottleneck(enc, 128, name='enc2.7', asymmetric=True)
enc = encoder_bottleneck(enc, 128, name='enc2.8', dilation_rate=16, dilated=True)
#Bottleneck 3.0
enc = encoder_bottleneck(enc, 128, name='enc3.0')
enc = encoder_bottleneck(enc, 128, name='enc3.1', dilation_rate=2, dilated=True)
enc = encoder_bottleneck(enc, 128, name='enc3.2', asymmetric=True)
enc = encoder_bottleneck(enc, 128, name='enc3.3', dilation_rate=4, dilated=True)
enc = encoder_bottleneck(enc, 128, name='enc3.4')
enc = encoder_bottleneck(enc, 128, name='enc3.5', dilation_rate=8, dilated=True)
enc = encoder_bottleneck(enc, 128, name='enc3.6', asymmetric=True)
enc = encoder_bottleneck(enc, 128, name='enc3.7', dilation_rate=16, dilated=True)
#Bottleneck 4.0
dec = decoder_bottleneck(enc, 64, name='dec4.0', upsample=True)
dec = decoder_bottleneck(dec, 64, name='dec4.1')
dec = decoder_bottleneck(dec, 64, name='dec4.2')
#Bottleneck 5.0
dec = decoder_bottleneck(dec, 16, name='dec5.0', upsample=True)
dec = decoder_bottleneck(dec, 16, name='dec5.1')
dec = Conv2DTranspose(filters=nclasses, kernel_size=2, strides=2, padding='same', kernel_initializer='he_normal', name='fullconv')(dec)
dec = Activation('softmax')(dec)
model = Model(inputs=inp, outputs=dec, name='Enet')
model.save(f'enet_{nclasses}.h5')
return model
Related
Why is my GAN only producing grey images and not making any progress?
I am trying to make a GAN but whenever I try to train it, the output is just gray images and doesn't make any progress. My code is below, I'd appreciate any help. I think it might have something to do with the way I am saving the images, but even if I change that, the images are colored but the generator is making no progress and I don't know why. def save_images(cnt, noise, generator): image_array = np.full((PREVIEW_MARGIN + (PREVIEW_ROWS * (IMAGE_SIZE + PREVIEW_MARGIN)), PREVIEW_MARGIN + (PREVIEW_COLS * (IMAGE_SIZE + PREVIEW_MARGIN)), 3), 255, dtype=np.uint8) generated_images = generator.predict(noise) generated_images = 0.5 * generated_images + 0.5 image_count = 0 for row in range(PREVIEW_ROWS): for col in range(PREVIEW_COLS): r = row * (IMAGE_SIZE + PREVIEW_MARGIN) + PREVIEW_MARGIN c = col * (IMAGE_SIZE + PREVIEW_MARGIN) + PREVIEW_MARGIN image_array[r:r + IMAGE_SIZE, c:c + IMAGE_SIZE] = generated_images[image_count] * 255 image_count += 1 output_path = 'output' if not os.path.exists(output_path): os.makedirs(output_path) filename = os.path.join(output_path, f"trained-{cnt}.png") im = Image.fromarray(image_array) im.save(filename) def get_optimizer(): return Adam(lr=0.0002, beta_1=0.5) def get_generator(): gen_input = Input(shape=random_dim) generator = Sequential() generator.add(Dense(128 * 16 * 16, input_dim=random_dim)) generator.add(LeakyReLU()) generator.add(Reshape((16, 16, 128))) generator.add(Conv2D(256, 5, padding='same')) generator.add(LeakyReLU()) generator.add(Conv2DTranspose(256, 4, strides=2, padding='same')) generator.add(LeakyReLU()) generator.add(Conv2DTranspose(256, 4, strides=2, padding='same')) generator.add(LeakyReLU()) generator.add(Conv2DTranspose(256, 4, strides=2, padding='same')) generator.add(LeakyReLU()) generator.add(Conv2D(512, 5, padding='same')) generator.add(LeakyReLU()) generator.add(Conv2D(512, 5, padding='same')) generator.add(LeakyReLU()) generator.add(Conv2D(3, 7, activation='tanh', padding='same')) input = Input(shape=(random_dim,)) generated_image = generator(input) return Model(input, generated_image) def get_discriminator(): disc_input = Input(shape=(128, 128, 3)) discriminator = Sequential() discriminator.add(Conv2D(32, 3, input_shape=(128, 128, 3))) discriminator.add(LeakyReLU()) discriminator.add(Conv2D(64, 4, strides=2)) discriminator.add(LeakyReLU()) discriminator.add(Conv2D(128, 4, strides=2)) discriminator.add(LeakyReLU()) discriminator.add(Conv2D(256, 4, strides=2)) discriminator.add(LeakyReLU()) discriminator.add(Conv2D(512, 4, strides=2)) discriminator.add(LeakyReLU()) discriminator.add(Flatten()) discriminator.add(Dropout(0.4)) discriminator.add(Dense(1, activation='sigmoid')) discriminator = Model(disc_input, discriminator(disc_input)) optimizer = RMSprop( lr = .0001, clipvalue = 1.0, decay = 1e-8 ) discriminator.compile(loss='binary_crossentropy', optimizer=optimizer) return discriminator def get_gan_network(discriminator, random_dim, generator, optimizer): # We initially set trainable to False since we only want to train either the # generator or discriminator at a time discriminator.trainable = False # gan input (noise) will be 100-dimensional vectors gan_input = Input(shape=(random_dim,)) # the output of the generator (an image) x = generator(gan_input) # get the output of the discriminator (probability if the image is real or not) gan_output = discriminator(x) gan = Model(inputs=gan_input, outputs=gan_output) gan.compile(loss='binary_crossentropy', optimizer=optimizer) return gan X_train = training_data fixed_noise = np.random.normal(0, 1, (PREVIEW_ROWS * PREVIEW_COLS, 100)) def train(epochs=1, batchSize=128): batchCount = X_train.shape[0] / batchSize print(X_train.shape[0]) print('Epochs:', epochs) print('Batch size:', batchSize) print('Batches per epoch:', batchCount) adam = get_optimizer() generator = get_generator() discriminator = get_discriminator() gan = get_gan_network(discriminator, random_dim, generator, adam) for step in tdqm(range(1000)): # Get a random set of input noise and images noise = np.random.normal(0, 1, size=[batchSize, random_dim]) imageBatch = X_train[np.random.randint(0, X_train.shape[0], size=batchSize)] # Generate fake images generatedImages = generator.predict(noise) # Labels for generated and real data yDis = np.zeros(2*batchSize) # One-sided label smoothing yDis[:batchSize] = 0.9 # Train generator noise = np.random.normal(0, 1, size=[batchSize, random_dim]) yGen = np.ones(batchSize) discriminator.trainable = False if step == 1 or step % 20 == 0: save_images(step, fixed_noise, generator) if __name__ == '__main__': train(1000, 128)
How to combine RNN with CNN
I am working on a program that subtracts using two images from MNIST. In doing so, we are using an algorithm that combines CNN and RNN. enter image description here class CRNN_Model(tf.keras.Model): def __init__(self): super(CRNN_Model, self).__init__() self.hidden_size = 256 self.batch_size = 128 self.sequence_size = 2 self.output_size = 19 self.img_size = 28 #cnn self.reshape = tf.keras.layers.Reshape((10000, 28, 28, 1)) self.conv1 = tf.keras.layers.Conv2D(16, 3, padding='same', activation='relu', input_shape=(28, 28, 1)) self.maxpooling1 = tf.keras.layers.MaxPooling2D() self.conv2 = tf.keras.layers.Conv2D(32, 3, padding='same', activation='relu') self.maxpooling2 = tf.keras.layers.MaxPooling2D() self.flatten1 = tf.keras.layers.Flatten() self.hidden = tf.keras.layers.Dense(self.hidden_size, activation='relu') #rnn self.rnn_cell = tf.keras.layers.SimpleRNNCell(self.hidden_size, activation=None) def call(self, x): #cnn hidden_list = [] for i in range(2): x = x[0][i] x = self.conv1(x) x = self.maxpooling1(x) x = self.conv2(x) x = self.maxpooling2(x) x = self.flatten1(x) x = self.hidden(x) hidden_list.append(x) self.hidden_list = tf.transpose(hidden_list, perm=[1, 0, 2]) #rnn self.initial_state = self.rnn_cell.zero_state(self.batch_size, tf_float32) state = self.initial_state outputs = [] for t in range(self.sequence_size): (output, state) = self.rnn_cell(self.hidden_list[:, t, :], state) outputs.append(output) self.outputs = outputs self.prediction = tf.keras.layers.Dense(self.outputs[-1], self.output_size) self.pred_output = tf.keras.activations.softmax(self.prediction) return self.pred_output However, an error occurs at the input to the first conv1 layer. ValueError: Input 0 of layer conv2d_111 is incompatible with the layer: : expected min_ndim=4, found ndim=3. Full shape received: (28, 28, 1) The shape of the data we are giving is as follows. print(np.shape(train_x)) print(np.shape(train_y)) print(np.shape(test_x)) print(np.shape(test_y)) (10000, 2, 28, 28, 1) →(number of data * number of image * width * height * channel) (10000, 1) (2000, 2, 28, 28, 1) (2000, 1) How do I give the data?
Incompatible shape TensorFlow keras
I am getting this error when I am trying to train my model: ValueError: Input 0 of layer dense_encoder is incompatible with the layer: expected axis -1 of input shape to have value 2048 but received input with shape [446, 98, 1024] My model architecture is: input1 = Input(shape=(2048), name='Image_1') dense1 = Dense(256, kernel_initializer=tf.keras.initializers.glorot_uniform(seed = 56), name='dense_encoder')(input1) input2 = Input(shape=(153), name='Text_Input') emb_layer = Embedding(input_dim = vocab_size, output_dim = 300, input_length=153, mask_zero=True, trainable=False, weights=[embedding_matrix], name="Embedding_layer") emb = emb_layer(input2) LSTM1 = LSTM(units=256, activation='tanh', recurrent_activation='sigmoid', use_bias=True, kernel_initializer=tf.keras.initializers.glorot_uniform(seed=23), recurrent_initializer=tf.keras.initializers.orthogonal(seed=7), bias_initializer=tf.keras.initializers.zeros(), return_sequences=True, name="LSTM1")(emb) #LSTM1_output = LSTM1(emb) LSTM2 = LSTM(units=256, activation='tanh', recurrent_activation='sigmoid', use_bias=True, kernel_initializer=tf.keras.initializers.glorot_uniform(seed=23), recurrent_initializer=tf.keras.initializers.orthogonal(seed=7), bias_initializer=tf.keras.initializers.zeros(), name="LSTM2") LSTM2_output = LSTM2(LSTM1) dropout1 = Dropout(0.5, name='dropout1')(LSTM2_output) dec = tf.keras.layers.Add()([dense1, dropout1]) fc1 = Dense(256, activation='relu', kernel_initializer=tf.keras.initializers.he_normal(seed = 63), name='fc1') fc1_output = fc1(dec) dropout2 = Dropout(0.4, name='dropout2')(fc1_output) output_layer = Dense(vocab_size, activation='softmax', name='Output_layer') output = output_layer(dropout2) encoder_decoder = Model(inputs = [input1, input2], outputs = output) encoder_decoder.summary() Here's my code for training the model: for epoch in range(20): print('EPOCH : ',epoch+1) start = time.time() batch_loss_tr = 0 batch_loss_vl = 0 for img, report in train_generator: r1 = bytes_to_string(report.numpy()) img_input, rep_input, output_word = convert(img.numpy(), r1) rep_input = pad_sequences(rep_input, maxlen=153, padding='post') results = encoder_decoder.train_on_batch([img_input, rep_input], output_word) batch_loss_tr += results train_loss = batch_loss_tr/(X_train.shape[0]//14) with train_summary_writer.as_default(): tf.summary.scalar('loss', train_loss, step = epoch) for img, report in cv_generator: r1 = bytes_to_string(report.numpy()) img_input, rep_input, output_word = convert(img.numpy(), r1) rep_input = pad_sequences(rep_input, maxlen=153, padding='post') results = encoder_decoder.test_on_batch([img_input, rep_input], output_word) batch_loss_vl += results The img_input shape is (417, 98, 1024) and I am getting the error for Image_1 layer. What could be the reasons? Any help would be appreciated.
Softmax output returning only ones and zeros?
my convolutional neural network is returning only ones and zeros on softmax output (out1), anyone knows why? def build(self): inp = Input(self.obs_shape) conv0 = Conv2D(32, 2, 1, padding="same", activation = "relu")(inp) drop0 = MaxPool2D((2,2))(conv0) conv1 = Conv2D(64, 3, 2, padding="same", activation = "relu")(drop0) drop1 = MaxPool2D((2,2))(conv1) flat = Flatten()(drop1) hid0 = Dense(128, activation='relu')(flat) hid1 = Dense(256, activation='relu')(hid0) hid = Dense(128, activation='relu')(hid1) out1 = Dense(self.action_count, activation='softmax')(hid) out2 = Dense(1, activation='linear')(hid) model = Model(inputs = [inp], outputs = [out1, out2]) model.compile(optimizer = tf.keras.optimizers.Adam(lr = self.lr), loss = [self.actor_loss, "mse"]) return model def actor_loss(self, y_actual, y_pred): actions = tf.cast(y_actual[:, 0], tf.int32) returns = y_actual[:, 1] mask = tf.one_hot(actions, self.action_count) logps = tf.math.log(tf.boolean_mask(y_pred, mask) + 1e-3) entropy = -tf.math.reduce_sum(y_pred * tf.math.log(y_pred)) return -tf.math.reduce_sum(logps * returns) - 0.0001*entropy
model = Model(inputs = [inp], outputs = [out1, out2]) look at above, there only two output. so, you function build was lock the number of output, so only get 1 or 0 ; in one word :you need change your models sorry ,my english is bad .
Implementing U-net for multi-class road segmentation
I am trying to train a U-net for image segmentation on satellite data and therewith extract a road network with nine different road types. Thus far I have tried many different U-net codes that are freely available on the web, however I was not able to tailor them to my specific case. I'm sincerely hoping you are able to help me. The satellite image and associated labels can be downloaded via the following link: Satellite image and associated labels Additionally, I've written the following code to prep the data for the Unet import skimage from skimage.io import imread, imshow, imread_collection, concatenate_images from skimage.transform import resize from skimage.morphology import label import numpy as np import matplotlib.pyplot as plt from keras.models import Model from keras.layers import Input, merge, Convolution2D, MaxPooling2D, UpSampling2D, Reshape, core, Dropout from keras.optimizers import Adam from keras.callbacks import ModelCheckpoint, LearningRateScheduler from keras import backend as K from sklearn.metrics import jaccard_similarity_score from shapely.geometry import MultiPolygon, Polygon import shapely.wkt import shapely.affinity from collections import defaultdict #Importing image and labels labels = skimage.io.imread("ede_subset_293_wegen.tif") images = skimage.io.imread("ede_subset_293_20180502_planetscope.tif")[...,:-1] #Scaling image img_scaled = images / images.max() #Make non-roads 0 labels[labels == 15] = 0 #Resizing image and mask and labels img_scaled_resized = img_scaled[:6400, :6400,:4 ] print(img_scaled_resized.shape) labels_resized = labels[:6400, :6400] print(labels_resized.shape) #splitting images split_img = [ np.split(array, 25, axis=0) for array in np.split(img_scaled_resized, 25, axis=1) ] split_img[-1][-1].shape #splitting labels split_labels = [ np.split(array, 25, axis=0) for array in np.split(labels_resized, 25, axis=1) ] #Convert to np.array split_labels = np.array(split_labels) split_img = np.array(split_img) train_images = np.reshape(split_img, (625, 256, 256, 4)) train_labels = np.reshape(split_labels, (625, 256, 256)) x_trn = train_images[:400,:,:,:] x_val = train_images[400:500,:,:,:] x_test = train_images[500:625,:,:,:] y_trn = train_labels[:400,:,:] y_val = train_labels[400:500,:,:] y_test = train_labels[500:625,:,:] plt.imshow(train_images[88,:,:,:]) skimage.io.imshow(train_labels[88,:,:]) Furthermore, I found the following U-net on kaggle, which I think should have to work for this particular case: def get_unet(): inputs = Input((8, ISZ, ISZ)) conv1 = Convolution2D(32, 3, 3, activation='relu', border_mode='same')(inputs) conv1 = Convolution2D(32, 3, 3, activation='relu', border_mode='same')(conv1) pool1 = MaxPooling2D(pool_size=(2, 2))(conv1) conv2 = Convolution2D(64, 3, 3, activation='relu', border_mode='same')(pool1) conv2 = Convolution2D(64, 3, 3, activation='relu', border_mode='same')(conv2) pool2 = MaxPooling2D(pool_size=(2, 2))(conv2) conv3 = Convolution2D(128, 3, 3, activation='relu', border_mode='same')(pool2) conv3 = Convolution2D(128, 3, 3, activation='relu', border_mode='same')(conv3) pool3 = MaxPooling2D(pool_size=(2, 2))(conv3) conv4 = Convolution2D(256, 3, 3, activation='relu', border_mode='same')(pool3) conv4 = Convolution2D(256, 3, 3, activation='relu', border_mode='same')(conv4) pool4 = MaxPooling2D(pool_size=(2, 2))(conv4) conv5 = Convolution2D(512, 3, 3, activation='relu', border_mode='same')(pool4) conv5 = Convolution2D(512, 3, 3, activation='relu', border_mode='same')(conv5) up6 = merge([UpSampling2D(size=(2, 2))(conv5), conv4], mode='concat', concat_axis=1) conv6 = Convolution2D(256, 3, 3, activation='relu', border_mode='same')(up6) conv6 = Convolution2D(256, 3, 3, activation='relu', border_mode='same')(conv6) up7 = merge([UpSampling2D(size=(2, 2))(conv6), conv3], mode='concat', concat_axis=1) conv7 = Convolution2D(128, 3, 3, activation='relu', border_mode='same')(up7) conv7 = Convolution2D(128, 3, 3, activation='relu', border_mode='same')(conv7) up8 = merge([UpSampling2D(size=(2, 2))(conv7), conv2], mode='concat', concat_axis=1) conv8 = Convolution2D(64, 3, 3, activation='relu', border_mode='same')(up8) conv8 = Convolution2D(64, 3, 3, activation='relu', border_mode='same')(conv8) up9 = merge([UpSampling2D(size=(2, 2))(conv8), conv1], mode='concat', concat_axis=1) conv9 = Convolution2D(32, 3, 3, activation='relu', border_mode='same')(up9) conv9 = Convolution2D(32, 3, 3, activation='relu', border_mode='same')(conv9) conv10 = Convolution2D(N_Cls, 1, 1, activation='sigmoid')(conv9) model = Model(input=inputs, output=conv10) model.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=[jaccard_coef, jaccard_coef_int, 'accuracy']) return model I know it is a big question, but I'm getting pretty desperate. Any help is greatly appreciated!
i found that Conv2DTranspose works better than UpSampling2D and here is a quick implementation using the same def conv_block(tensor, nfilters, size=3, padding='same', initializer="he_normal"): x = Conv2D(filters=nfilters, kernel_size=(size, size), padding=padding, kernel_initializer=initializer)(tensor) x = BatchNormalization()(x) x = Activation("relu")(x) x = Conv2D(filters=nfilters, kernel_size=(size, size), padding=padding, kernel_initializer=initializer)(x) x = BatchNormalization()(x) x = Activation("relu")(x) return x def deconv_block(tensor, residual, nfilters, size=3, padding='same', strides=(2, 2)): y = Conv2DTranspose(nfilters, kernel_size=(size, size), strides=strides, padding=padding)(tensor) y = concatenate([y, residual], axis=3) y = conv_block(y, nfilters) return y def Unet(img_height, img_width, nclasses=3, filters=64): # down input_layer = Input(shape=(img_height, img_width, 3), name='image_input') conv1 = conv_block(input_layer, nfilters=filters) conv1_out = MaxPooling2D(pool_size=(2, 2))(conv1) conv2 = conv_block(conv1_out, nfilters=filters*2) conv2_out = MaxPooling2D(pool_size=(2, 2))(conv2) conv3 = conv_block(conv2_out, nfilters=filters*4) conv3_out = MaxPooling2D(pool_size=(2, 2))(conv3) conv4 = conv_block(conv3_out, nfilters=filters*8) conv4_out = MaxPooling2D(pool_size=(2, 2))(conv4) conv4_out = Dropout(0.5)(conv4_out) conv5 = conv_block(conv4_out, nfilters=filters*16) conv5 = Dropout(0.5)(conv5) # up deconv6 = deconv_block(conv5, residual=conv4, nfilters=filters*8) deconv6 = Dropout(0.5)(deconv6) deconv7 = deconv_block(deconv6, residual=conv3, nfilters=filters*4) deconv7 = Dropout(0.5)(deconv7) deconv8 = deconv_block(deconv7, residual=conv2, nfilters=filters*2) deconv9 = deconv_block(deconv8, residual=conv1, nfilters=filters) # output output_layer = Conv2D(filters=nclasses, kernel_size=(1, 1))(deconv9) output_layer = BatchNormalization()(output_layer) output_layer = Activation('softmax')(output_layer) model = Model(inputs=input_layer, outputs=output_layer, name='Unet') return model Now for the data generators, you can use the builtin ImageDataGenerator class here is the code from Keras docs # we create two instances with the same arguments data_gen_args = dict(featurewise_center=True, featurewise_std_normalization=True, rotation_range=90, width_shift_range=0.1, height_shift_range=0.1, zoom_range=0.2) image_datagen = ImageDataGenerator(**data_gen_args) mask_datagen = ImageDataGenerator(**data_gen_args) # Provide the same seed and keyword arguments to the fit and flow methods seed = 1 image_datagen.fit(images, augment=True, seed=seed) mask_datagen.fit(masks, augment=True, seed=seed) image_generator = image_datagen.flow_from_directory( 'data/images', class_mode=None, seed=seed) mask_generator = mask_datagen.flow_from_directory( 'data/masks', class_mode=None, seed=seed) # combine generators into one which yields image and masks train_generator = zip(image_generator, mask_generator) model.fit_generator( train_generator, steps_per_epoch=2000, epochs=50) Another way to go is implement your own generator by extending the Sequence class from Keras class seg_gen(Sequence): def __init__(self, x_set, y_set, batch_size, image_dir, mask_dir): self.x, self.y = x_set, y_set self.batch_size = batch_size self.samples = len(self.x) self.image_dir = image_dir self.mask_dir = mask_dir def __len__(self): return int(np.ceil(len(self.x) / float(self.batch_size))) def __getitem__(self, idx): idx = np.random.randint(0, self.samples, batch_size) batch_x, batch_y = [], [] drawn = 0 for i in idx: _image = image.img_to_array(image.load_img(f'{self.image_dir}/{self.x[i]}', target_size=(img_height, img_width)))/255. mask = image.img_to_array(image.load_img(f'{self.mask_dir}/{self.y[i]}', grayscale=True, target_size=(img_height, img_width))) # mask = np.resize(mask,(img_height*img_width, classes)) batch_y.append(mask) batch_x.append(_image) return np.array(batch_x), np.array(batch_y) Here is a sample code to train the model unet = Unet(256, 256, nclasses=66, filters=64) print(unet.output_shape) p_unet = multi_gpu_model(unet, 4) p_unet.load_weights('models-dr/top_weights.h5') p_unet.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy']) tb = TensorBoard(log_dir='logs', write_graph=True) mc = ModelCheckpoint(mode='max', filepath='models-dr/top_weights.h5', monitor='acc', save_best_only='True', save_weights_only='True', verbose=1) es = EarlyStopping(mode='max', monitor='acc', patience=6, verbose=1) callbacks = [tb, mc, es] train_gen = seg_gen(image_list, mask_list, batch_size) p_unet.fit_generator(train_gen, steps_per_epoch=steps, epochs=13, callbacks=callbacks, workers=8) I have tried using the dice loss when i had only two classes, here is the code for it def dice_coeff(y_true, y_pred): smooth = 1. y_true_f = K.flatten(y_true) y_pred_f = K.flatten(y_pred) intersection = K.sum(y_true_f * y_pred_f) score = (2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth) return score def dice_loss(y_true, y_pred): loss = 1 - dice_coeff(y_true, y_pred) return loss