ENet sementic segmentation model is not working for smaller images

ENet sementic segmentation model is not working for smaller images - python

I am trying to segment road and non-road part using ENet deep learning model. I uses this github link: https://github.com/kwotsin/TensorFlow-ENet which has original image size of 340X480 and it's working fine for images of 340X480 or above but as soon as I reduxe the size of the image it's not working. It's showing a ruined image of random black and white pixel. Even I try to reduce the size in same aspect ratio but it still not working.
Here is my ENet model structure code:
#Now actually start building the network
def ENet(inputs,
num_classes,
batch_size,
num_initial_blocks=1,
stage_two_repeat=2,
skip_connections=True,
reuse=None,
is_training=True,
scope='ENet'):
'''
The ENet model for real-time semantic segmentation!
INPUTS:
- inputs(Tensor): a 4D Tensor of shape [batch_size, image_height, image_width, num_channels] that represents one batch of preprocessed images.
- num_classes(int): an integer for the number of classes to predict. This will determine the final output channels as the answer.
- batch_size(int): the batch size to explictly set the shape of the inputs in order for operations to work properly.
- num_initial_blocks(int): the number of times to repeat the initial block.
- stage_two_repeat(int): the number of times to repeat stage two in order to make the network deeper.
- skip_connections(bool): if True, add the corresponding encoder feature maps to the decoder. They are of exact same shapes.
- reuse(bool): Whether or not to reuse the variables for evaluation.
- is_training(bool): if True, switch on batch_norm and prelu only during training, otherwise they are turned off.
- scope(str): a string that represents the scope name for the variables.
OUTPUTS:
- net(Tensor): a 4D Tensor output of shape [batch_size, image_height, image_width, num_classes], where each pixel has a one-hot encoded vector
determining the label of the pixel.
'''
#Set the shape of the inputs first to get the batch_size information
inputs_shape = inputs.get_shape().as_list()
inputs.set_shape(shape=(batch_size, inputs_shape[1], inputs_shape[2], inputs_shape[3]))
with tf.variable_scope(scope, reuse=reuse):
#Set the primary arg scopes. Fused batch_norm is faster than normal batch norm.
with slim.arg_scope([initial_block, bottleneck], is_training=is_training),\
slim.arg_scope([slim.batch_norm], fused=True), \
slim.arg_scope([slim.conv2d, slim.conv2d_transpose], activation_fn=None):
#=================INITIAL BLOCK=================
net = initial_block(inputs, scope='initial_block_1')
for i in xrange(2, max(num_initial_blocks, 1) + 1):
net = initial_block(net, scope='initial_block_' + str(i))
#Save for skip connection later
if skip_connections:
net_one = net
#===================STAGE ONE=======================
net, pooling_indices_1, inputs_shape_1 = bottleneck(net, output_depth=64, filter_size=3, regularizer_prob=0.01, downsampling=True, scope='bottleneck1_0')
net = bottleneck(net, output_depth=64, filter_size=3, regularizer_prob=0.01, scope='bottleneck1_1')
net = bottleneck(net, output_depth=64, filter_size=3, regularizer_prob=0.01, scope='bottleneck1_2')
net = bottleneck(net, output_depth=64, filter_size=3, regularizer_prob=0.01, scope='bottleneck1_3')
net = bottleneck(net, output_depth=64, filter_size=3, regularizer_prob=0.01, scope='bottleneck1_4')
#Save for skip connection later
if skip_connections:
net_two = net
#regularization prob is 0.1 from bottleneck 2.0 onwards
with slim.arg_scope([bottleneck], regularizer_prob=0.1):
net, pooling_indices_2, inputs_shape_2 = bottleneck(net, output_depth=128, filter_size=3, downsampling=True, scope='bottleneck2_0')
#Repeat the stage two at least twice to get stage 2 and 3:
for i in xrange(2, max(stage_two_repeat, 2) + 2):
net = bottleneck(net, output_depth=128, filter_size=3, scope='bottleneck'+str(i)+'_1')
net = bottleneck(net, output_depth=128, filter_size=3, dilated=True, dilation_rate=2, scope='bottleneck'+str(i)+'_2')
net = bottleneck(net, output_depth=128, filter_size=5, asymmetric=True, scope='bottleneck'+str(i)+'_3')
net = bottleneck(net, output_depth=128, filter_size=3, dilated=True, dilation_rate=4, scope='bottleneck'+str(i)+'_4')
net = bottleneck(net, output_depth=128, filter_size=3, scope='bottleneck'+str(i)+'_5')
net = bottleneck(net, output_depth=128, filter_size=3, dilated=True, dilation_rate=8, scope='bottleneck'+str(i)+'_6')
net = bottleneck(net, output_depth=128, filter_size=5, asymmetric=True, scope='bottleneck'+str(i)+'_7')
net = bottleneck(net, output_depth=128, filter_size=3, dilated=True, dilation_rate=16, scope='bottleneck'+str(i)+'_8')
with slim.arg_scope([bottleneck], regularizer_prob=0.1, decoder=True):
#===================STAGE FOUR========================
bottleneck_scope_name = "bottleneck" + str(i + 1)
#The decoder section, so start to upsample.
net = bottleneck(net, output_depth=64, filter_size=3, upsampling=True,
pooling_indices=pooling_indices_2, output_shape=inputs_shape_2, scope=bottleneck_scope_name+'_0')
#Perform skip connections here
if skip_connections:
net = tf.add(net, net_two, name=bottleneck_scope_name+'_skip_connection')
net = bottleneck(net, output_depth=64, filter_size=3, scope=bottleneck_scope_name+'_1')
net = bottleneck(net, output_depth=64, filter_size=3, scope=bottleneck_scope_name+'_2')
#===================STAGE FIVE========================
bottleneck_scope_name = "bottleneck" + str(i + 2)
net = bottleneck(net, output_depth=16, filter_size=3, upsampling=True,
pooling_indices=pooling_indices_1, output_shape=inputs_shape_1, scope=bottleneck_scope_name+'_0')
#perform skip connections here
if skip_connections:
net = tf.add(net, net_one, name=bottleneck_scope_name+'_skip_connection')
net = bottleneck(net, output_depth=16, filter_size=3, scope=bottleneck_scope_name+'_1')
#=============FINAL CONVOLUTION=============
logits = slim.conv2d_transpose(net, num_classes, [2,2], stride=2, scope='fullconv')
probabilities = tf.nn.softmax(logits, name='logits_to_softmax')
return logits, probabilities
and here is the full link of the code: https://github.com/kwotsin/TensorFlow-ENet/blob/master/enet.py
Prediction segmentation code:
image_dir = './dataset/test/'
images_list = sorted([os.path.join(image_dir, file) for file in os.listdir(image_dir) if file.endswith('.png')])
checkpoint_dir = "log/original/"
listi = os.listdir(checkpoint_dir)
print(images_list)
checkpoint = tf.train.latest_checkpoint("/home/nikhil_m/TensorFlow-ENet/log/original")
print(tf.train.latest_checkpoint("/home/nikhil_m/TensorFlow-ENet/log/original"),'-DDD--------------------------------------++++++++++++++++++++++++++++++++++++++++++++++++++++')
num_initial_blocks = 1
skip_connections = False
stage_two_repeat = 2
'''
#Labels to colours are obtained from here:
https://github.com/alexgkendall/SegNet-Tutorial/blob/c922cc4a4fcc7ce279dd998fb2d4a8703f34ebd7/Scripts/test_segmentation_camvid.py
However, the road_marking class is collapsed into the road class in the dataset provided.
Classes:
------------
Sky = [128,128,128]
Building = [128,0,0]
Pole = [192,192,128]
Road_marking = [255,69,0]
Road = [128,64,128]
Pavement = [60,40,222]
Tree = [128,128,0]
SignSymbol = [192,128,128]
Fence = [64,64,128]
Car = [64,0,128]
Pedestrian = [64,64,0]
Bicyclist = [0,128,192]
Unlabelled = [0,0,0]
'''
label_to_colours = {0: [128,128,128],
1: [0, 0, 0]}
#Create the photo directory
photo_dir = checkpoint_dir + "/test_images"
if not os.path.exists(photo_dir):
os.mkdir(photo_dir)
#Create a function to convert each pixel label to colour.
def grayscale_to_colour(image):
print 'Converting image...'
image = image.reshape((256, 256, 1))
image = np.repeat(image, 3, axis=-1)
for i in xrange(image.shape[0]):
for j in xrange(image.shape[1]):
label = int(image[i][j][0])
image[i][j] = np.array(label_to_colours[label])
return image
with tf.Graph().as_default() as graph:
images_tensor = tf.train.string_input_producer(images_list, shuffle=False)
reader = tf.WholeFileReader()
key, image_tensor = reader.read(images_tensor)
image = tf.image.decode_png(image_tensor, channels=3)
print(image.shape, 'newwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww shapeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee')
# image = tf.image.resize_image_with_crop_or_pad(image, 360, 480)
# image = tf.cast(image, tf.float32)
image = preprocess(image)
images = tf.train.batch([image], batch_size = 10, allow_smaller_final_batch=True)
#Create the model inference
with slim.arg_scope(ENet_arg_scope()):
logits, probabilities = ENet(images,
num_classes=2,
batch_size=10,
is_training=True,
reuse=None,
num_initial_blocks=num_initial_blocks,
stage_two_repeat=stage_two_repeat,
skip_connections=skip_connections)
variables_to_restore = slim.get_variables_to_restore()
saver = tf.train.Saver(variables_to_restore)
def restore_fn(sess):
return saver.restore(sess, checkpoint)
predictions = tf.argmax(probabilities, -1)
predictions = tf.cast(predictions, tf.float32)
print 'HERE', predictions.get_shape()
sv = tf.train.Supervisor(logdir=None, init_fn=restore_fn)
with sv.managed_session() as sess:
for i in xrange(len(images_list) / 10):
segmentations = sess.run(predictions)
# print segmentations.shape
print(segmentations.shape, 'shape')
for j in xrange(segmentations.shape[0]):
converted_image = grayscale_to_colour(segmentations[j])
print 'Saving image %s/%s' %(i*10 + j, len(images_list))
#plt.axis('off')
#plt.imshow(converted_image)
imsave(photo_dir + "/image_%s.png" %(i*10 + j), converted_image)
# plt.show()
Here is the full code link: https://github.com/kwotsin/TensorFlow-ENet/blob/master/predict_segmentation.py

You can try this model. Its written in tf.keras
import tensorflow as tf
from tensorflow.keras.layers import *
from tensorflow.keras.models import Model
print('Tensorflow', tf.__version__)
def initial_block(inp):
inp1 = inp
conv = Conv2D(filters=13, kernel_size=3, strides=2, padding='same', kernel_initializer='he_normal')(inp)
pool = MaxPool2D(2)(inp1)
concat = concatenate([conv, pool])
return concat
def encoder_bottleneck(inp, filters, name, dilation_rate=2, downsample=False, dilated=False, asymmetric=False, drop_rate=0.1):
reduce = filters // 4
down = inp
kernel_stride = 1
#Downsample
if downsample:
kernel_stride = 2
pad_activations = filters - inp.shape.as_list()[-1]
down = MaxPool2D(2)(down)
down = Permute(dims=(1, 3, 2))(down)
down = ZeroPadding2D(padding=((0, 0), (0, pad_activations)))(down)
down = Permute(dims=(1, 3, 2))(down)
#1*1 Reduce
x = Conv2D(filters=reduce, kernel_size=kernel_stride, strides=kernel_stride, padding='same', use_bias=False, kernel_initializer='he_normal', name=f'{name}_reduce')(inp)
x = BatchNormalization(momentum=0.1)(x)
x = PReLU(shared_axes=[1, 2])(x)
#Conv
if not dilated and not asymmetric:
x = Conv2D(filters=reduce, kernel_size=3, padding='same', kernel_initializer='he_normal', name=f'{name}_conv_reg')(x)
elif dilated:
x = Conv2D(filters=reduce, kernel_size=3, padding='same', dilation_rate=dilation_rate, kernel_initializer='he_normal', name=f'{name}_reduce_dilated')(x)
elif asymmetric:
x = Conv2D(filters=reduce, kernel_size=(1,5), padding='same', use_bias=False, kernel_initializer='he_normal', name=f'{name}_asymmetric')(x)
x = Conv2D(filters=reduce, kernel_size=(5,1), padding='same', kernel_initializer='he_normal', name=name)(x)
x = BatchNormalization(momentum=0.1)(x)
x = PReLU(shared_axes=[1, 2])(x)
#1*1 Expand
x = Conv2D(filters=filters, kernel_size=1, padding='same', use_bias=False, kernel_initializer='he_normal', name=f'{name}_expand')(x)
x = BatchNormalization(momentum=0.1)(x)
x = SpatialDropout2D(rate=drop_rate)(x)
concat = Add()([x, down])
concat = PReLU(shared_axes=[1, 2])(concat)
return concat
def decoder_bottleneck(inp, filters, name, upsample=False):
reduce = filters // 4
up = inp
#Upsample
if upsample:
up = Conv2D(filters=filters, kernel_size=1, strides=1, padding='same', use_bias=False, kernel_initializer='he_normal', name=f'{name}_upsample')(up)
up = UpSampling2D(size=2)(up)
#1*1 Reduce
x = Conv2D(filters=reduce, kernel_size=1, strides=1, padding='same', use_bias=False, kernel_initializer='he_normal', name=f'{name}_reduce')(inp)
x = BatchNormalization(momentum=0.1)(x)
x = PReLU(shared_axes=[1, 2])(x)
#Conv
if not upsample:
x = Conv2D(filters=reduce, kernel_size=3, strides=1, padding='same', kernel_initializer='he_normal', name=f'{name}_conv_reg')(x)
else:
x = Conv2DTranspose(filters=reduce, kernel_size=3, strides=2, padding='same', kernel_initializer='he_normal', name=f'{name}_transpose')(x)
x = BatchNormalization(momentum=0.1)(x)
x = PReLU(shared_axes=[1, 2])(x)
#1*1 Expand
x = Conv2D(filters=filters, kernel_size=1, strides=1, padding='same', use_bias=False, kernel_initializer='he_normal', name=f'{name}_expand')(x)
x = BatchNormalization(momentum=0.1)(x)
concat = Add()([x, up])
concat = ReLU()(concat)
return concat
def ENet(H, W, nclasses):
'''
Args:
H: Height of the image
W: Width of the image
nclasses: Total no of classes
Returns:
model: Keras model in .h5 format
'''
inp = Input(shape=(H, W, 3))
enc = initial_block(inp)
#Bottleneck 1.0
enc = encoder_bottleneck(enc, 64, name='enc1', downsample=True, drop_rate=0.001)
enc = encoder_bottleneck(enc, 64, name='enc1.1', drop_rate=0.001)
enc = encoder_bottleneck(enc, 64, name='enc1.2', drop_rate=0.001)
enc = encoder_bottleneck(enc, 64, name='enc1.3', drop_rate=0.001)
enc = encoder_bottleneck(enc, 64, name='enc1.4', drop_rate=0.001)
#Bottleneck 2.0
enc = encoder_bottleneck(enc, 128, name='enc2.0', downsample=True)
enc = encoder_bottleneck(enc, 128, name='enc2.1')
enc = encoder_bottleneck(enc, 128, name='enc2.2', dilation_rate=2, dilated=True)
enc = encoder_bottleneck(enc, 128, name='enc2.3', asymmetric=True)
enc = encoder_bottleneck(enc, 128, name='enc2.4', dilation_rate=4, dilated=True)
enc = encoder_bottleneck(enc, 128, name='enc2.5')
enc = encoder_bottleneck(enc, 128, name='enc2.6', dilation_rate=8, dilated=True)
enc = encoder_bottleneck(enc, 128, name='enc2.7', asymmetric=True)
enc = encoder_bottleneck(enc, 128, name='enc2.8', dilation_rate=16, dilated=True)
#Bottleneck 3.0
enc = encoder_bottleneck(enc, 128, name='enc3.0')
enc = encoder_bottleneck(enc, 128, name='enc3.1', dilation_rate=2, dilated=True)
enc = encoder_bottleneck(enc, 128, name='enc3.2', asymmetric=True)
enc = encoder_bottleneck(enc, 128, name='enc3.3', dilation_rate=4, dilated=True)
enc = encoder_bottleneck(enc, 128, name='enc3.4')
enc = encoder_bottleneck(enc, 128, name='enc3.5', dilation_rate=8, dilated=True)
enc = encoder_bottleneck(enc, 128, name='enc3.6', asymmetric=True)
enc = encoder_bottleneck(enc, 128, name='enc3.7', dilation_rate=16, dilated=True)
#Bottleneck 4.0
dec = decoder_bottleneck(enc, 64, name='dec4.0', upsample=True)
dec = decoder_bottleneck(dec, 64, name='dec4.1')
dec = decoder_bottleneck(dec, 64, name='dec4.2')
#Bottleneck 5.0
dec = decoder_bottleneck(dec, 16, name='dec5.0', upsample=True)
dec = decoder_bottleneck(dec, 16, name='dec5.1')
dec = Conv2DTranspose(filters=nclasses, kernel_size=2, strides=2, padding='same', kernel_initializer='he_normal', name='fullconv')(dec)
dec = Activation('softmax')(dec)
model = Model(inputs=inp, outputs=dec, name='Enet')
model.save(f'enet_{nclasses}.h5')
return model

Related

Why is my GAN only producing grey images and not making any progress?

I am trying to make a GAN but whenever I try to train it, the output is just gray images and doesn't make any progress. My code is below, I'd appreciate any help. I think it might have something to do with the way I am saving the images, but even if I change that, the images are colored but the generator is making no progress and I don't know why.
def save_images(cnt, noise, generator):
image_array = np.full((PREVIEW_MARGIN + (PREVIEW_ROWS * (IMAGE_SIZE + PREVIEW_MARGIN)), PREVIEW_MARGIN + (PREVIEW_COLS * (IMAGE_SIZE + PREVIEW_MARGIN)), 3), 255, dtype=np.uint8)
generated_images = generator.predict(noise)
generated_images = 0.5 * generated_images + 0.5
image_count = 0
for row in range(PREVIEW_ROWS):
for col in range(PREVIEW_COLS):
r = row * (IMAGE_SIZE + PREVIEW_MARGIN) + PREVIEW_MARGIN
c = col * (IMAGE_SIZE + PREVIEW_MARGIN) + PREVIEW_MARGIN
image_array[r:r + IMAGE_SIZE, c:c + IMAGE_SIZE] = generated_images[image_count] * 255
image_count += 1
output_path = 'output'
if not os.path.exists(output_path):
os.makedirs(output_path)
filename = os.path.join(output_path, f"trained-{cnt}.png")
im = Image.fromarray(image_array)
im.save(filename)
def get_optimizer():
return Adam(lr=0.0002, beta_1=0.5)
def get_generator():
gen_input = Input(shape=random_dim)
generator = Sequential()
generator.add(Dense(128 * 16 * 16, input_dim=random_dim))
generator.add(LeakyReLU())
generator.add(Reshape((16, 16, 128)))
generator.add(Conv2D(256, 5, padding='same'))
generator.add(LeakyReLU())
generator.add(Conv2DTranspose(256, 4, strides=2, padding='same'))
generator.add(LeakyReLU())
generator.add(Conv2DTranspose(256, 4, strides=2, padding='same'))
generator.add(LeakyReLU())
generator.add(Conv2DTranspose(256, 4, strides=2, padding='same'))
generator.add(LeakyReLU())
generator.add(Conv2D(512, 5, padding='same'))
generator.add(LeakyReLU())
generator.add(Conv2D(512, 5, padding='same'))
generator.add(LeakyReLU())
generator.add(Conv2D(3, 7, activation='tanh', padding='same'))
input = Input(shape=(random_dim,))
generated_image = generator(input)
return Model(input, generated_image)
def get_discriminator():
disc_input = Input(shape=(128, 128, 3))
discriminator = Sequential()
discriminator.add(Conv2D(32, 3, input_shape=(128, 128, 3)))
discriminator.add(LeakyReLU())
discriminator.add(Conv2D(64, 4, strides=2))
discriminator.add(LeakyReLU())
discriminator.add(Conv2D(128, 4, strides=2))
discriminator.add(LeakyReLU())
discriminator.add(Conv2D(256, 4, strides=2))
discriminator.add(LeakyReLU())
discriminator.add(Conv2D(512, 4, strides=2))
discriminator.add(LeakyReLU())
discriminator.add(Flatten())
discriminator.add(Dropout(0.4))
discriminator.add(Dense(1, activation='sigmoid'))
discriminator = Model(disc_input, discriminator(disc_input))
optimizer = RMSprop(
lr = .0001,
clipvalue = 1.0,
decay = 1e-8
)
discriminator.compile(loss='binary_crossentropy', optimizer=optimizer)
return discriminator
def get_gan_network(discriminator, random_dim, generator, optimizer):
# We initially set trainable to False since we only want to train either the
# generator or discriminator at a time
discriminator.trainable = False
# gan input (noise) will be 100-dimensional vectors
gan_input = Input(shape=(random_dim,))
# the output of the generator (an image)
x = generator(gan_input)
# get the output of the discriminator (probability if the image is real or not)
gan_output = discriminator(x)
gan = Model(inputs=gan_input, outputs=gan_output)
gan.compile(loss='binary_crossentropy', optimizer=optimizer)
return gan
X_train = training_data
fixed_noise = np.random.normal(0, 1, (PREVIEW_ROWS * PREVIEW_COLS, 100))
def train(epochs=1, batchSize=128):
batchCount = X_train.shape[0] / batchSize
print(X_train.shape[0])
print('Epochs:', epochs)
print('Batch size:', batchSize)
print('Batches per epoch:', batchCount)
adam = get_optimizer()
generator = get_generator()
discriminator = get_discriminator()
gan = get_gan_network(discriminator, random_dim, generator, adam)
for step in tdqm(range(1000)):
# Get a random set of input noise and images
noise = np.random.normal(0, 1, size=[batchSize, random_dim])
imageBatch = X_train[np.random.randint(0, X_train.shape[0], size=batchSize)]
# Generate fake images
generatedImages = generator.predict(noise)
# Labels for generated and real data
yDis = np.zeros(2*batchSize)
# One-sided label smoothing
yDis[:batchSize] = 0.9
# Train generator
noise = np.random.normal(0, 1, size=[batchSize, random_dim])
yGen = np.ones(batchSize)
discriminator.trainable = False
if step == 1 or step % 20 == 0:
save_images(step, fixed_noise, generator)
if __name__ == '__main__':
train(1000, 128)

How to combine RNN with CNN

I am working on a program that subtracts using two images from MNIST. In doing so, we are using an algorithm that combines CNN and RNN.
enter image description here
class CRNN_Model(tf.keras.Model):
def __init__(self):
super(CRNN_Model, self).__init__()
self.hidden_size = 256
self.batch_size = 128
self.sequence_size = 2
self.output_size = 19
self.img_size = 28
#cnn
self.reshape = tf.keras.layers.Reshape((10000, 28, 28, 1))
self.conv1 = tf.keras.layers.Conv2D(16, 3, padding='same', activation='relu', input_shape=(28, 28, 1))
self.maxpooling1 = tf.keras.layers.MaxPooling2D()
self.conv2 = tf.keras.layers.Conv2D(32, 3, padding='same', activation='relu')
self.maxpooling2 = tf.keras.layers.MaxPooling2D()
self.flatten1 = tf.keras.layers.Flatten()
self.hidden = tf.keras.layers.Dense(self.hidden_size, activation='relu')
#rnn
self.rnn_cell = tf.keras.layers.SimpleRNNCell(self.hidden_size, activation=None)
def call(self, x):
#cnn
hidden_list = []
for i in range(2):
x = x[0][i]
x = self.conv1(x)
x = self.maxpooling1(x)
x = self.conv2(x)
x = self.maxpooling2(x)
x = self.flatten1(x)
x = self.hidden(x)
hidden_list.append(x)
self.hidden_list = tf.transpose(hidden_list, perm=[1, 0, 2])
#rnn
self.initial_state = self.rnn_cell.zero_state(self.batch_size, tf_float32)
state = self.initial_state
outputs = []
for t in range(self.sequence_size):
(output, state) = self.rnn_cell(self.hidden_list[:, t, :], state)
outputs.append(output)
self.outputs = outputs
self.prediction = tf.keras.layers.Dense(self.outputs[-1], self.output_size)
self.pred_output = tf.keras.activations.softmax(self.prediction)
return self.pred_output
However, an error occurs at the input to the first conv1 layer.
ValueError: Input 0 of layer conv2d_111 is incompatible with the layer: : expected min_ndim=4, found ndim=3. Full shape received: (28, 28, 1)
The shape of the data we are giving is as follows.
print(np.shape(train_x))
print(np.shape(train_y))
print(np.shape(test_x))
print(np.shape(test_y))
(10000, 2, 28, 28, 1)
→(number of data * number of image * width * height * channel)
(10000, 1)
(2000, 2, 28, 28, 1)
(2000, 1)
How do I give the data?

Incompatible shape TensorFlow keras

I am getting this error when I am trying to train my model:
ValueError: Input 0 of layer dense_encoder is incompatible with the layer: expected axis -1 of input shape to have value 2048 but received input with shape [446, 98, 1024]
My model architecture is:
input1 = Input(shape=(2048), name='Image_1')
dense1 = Dense(256, kernel_initializer=tf.keras.initializers.glorot_uniform(seed = 56), name='dense_encoder')(input1)
input2 = Input(shape=(153), name='Text_Input')
emb_layer = Embedding(input_dim = vocab_size, output_dim = 300, input_length=153, mask_zero=True, trainable=False,
weights=[embedding_matrix], name="Embedding_layer")
emb = emb_layer(input2)
LSTM1 = LSTM(units=256, activation='tanh', recurrent_activation='sigmoid', use_bias=True,
kernel_initializer=tf.keras.initializers.glorot_uniform(seed=23),
recurrent_initializer=tf.keras.initializers.orthogonal(seed=7),
bias_initializer=tf.keras.initializers.zeros(), return_sequences=True, name="LSTM1")(emb)
#LSTM1_output = LSTM1(emb)
LSTM2 = LSTM(units=256, activation='tanh', recurrent_activation='sigmoid', use_bias=True,
kernel_initializer=tf.keras.initializers.glorot_uniform(seed=23),
recurrent_initializer=tf.keras.initializers.orthogonal(seed=7),
bias_initializer=tf.keras.initializers.zeros(), name="LSTM2")
LSTM2_output = LSTM2(LSTM1)
dropout1 = Dropout(0.5, name='dropout1')(LSTM2_output)
dec = tf.keras.layers.Add()([dense1, dropout1])
fc1 = Dense(256, activation='relu', kernel_initializer=tf.keras.initializers.he_normal(seed = 63), name='fc1')
fc1_output = fc1(dec)
dropout2 = Dropout(0.4, name='dropout2')(fc1_output)
output_layer = Dense(vocab_size, activation='softmax', name='Output_layer')
output = output_layer(dropout2)
encoder_decoder = Model(inputs = [input1, input2], outputs = output)
encoder_decoder.summary()
Here's my code for training the model:
for epoch in range(20):
print('EPOCH : ',epoch+1)
start = time.time()
batch_loss_tr = 0
batch_loss_vl = 0
for img, report in train_generator:
r1 = bytes_to_string(report.numpy())
img_input, rep_input, output_word = convert(img.numpy(), r1)
rep_input = pad_sequences(rep_input, maxlen=153, padding='post')
results = encoder_decoder.train_on_batch([img_input, rep_input], output_word)
batch_loss_tr += results
train_loss = batch_loss_tr/(X_train.shape[0]//14)
with train_summary_writer.as_default():
tf.summary.scalar('loss', train_loss, step = epoch)
for img, report in cv_generator:
r1 = bytes_to_string(report.numpy())
img_input, rep_input, output_word = convert(img.numpy(), r1)
rep_input = pad_sequences(rep_input, maxlen=153, padding='post')
results = encoder_decoder.test_on_batch([img_input, rep_input], output_word)
batch_loss_vl += results
The img_input shape is (417, 98, 1024) and I am getting the error for Image_1 layer.
What could be the reasons? Any help would be appreciated.

Softmax output returning only ones and zeros?

my convolutional neural network is returning only ones and zeros on softmax output (out1), anyone knows why?
def build(self):
inp = Input(self.obs_shape)
conv0 = Conv2D(32, 2, 1, padding="same", activation = "relu")(inp)
drop0 = MaxPool2D((2,2))(conv0)
conv1 = Conv2D(64, 3, 2, padding="same", activation = "relu")(drop0)
drop1 = MaxPool2D((2,2))(conv1)
flat = Flatten()(drop1)
hid0 = Dense(128, activation='relu')(flat)
hid1 = Dense(256, activation='relu')(hid0)
hid = Dense(128, activation='relu')(hid1)
out1 = Dense(self.action_count, activation='softmax')(hid)
out2 = Dense(1, activation='linear')(hid)
model = Model(inputs = [inp], outputs = [out1, out2])
model.compile(optimizer = tf.keras.optimizers.Adam(lr = self.lr),
loss = [self.actor_loss, "mse"])
return model
def actor_loss(self, y_actual, y_pred):
actions = tf.cast(y_actual[:, 0], tf.int32)
returns = y_actual[:, 1]
mask = tf.one_hot(actions, self.action_count)
logps = tf.math.log(tf.boolean_mask(y_pred, mask) + 1e-3)
entropy = -tf.math.reduce_sum(y_pred * tf.math.log(y_pred))
return -tf.math.reduce_sum(logps * returns) - 0.0001*entropy

model = Model(inputs = [inp], outputs = [out1, out2])
look at above， there only two output.
so, you function build was lock the number of output,
so only get 1 or 0 ;
in one word :you need change your models
sorry ,my english is bad .

Implementing U-net for multi-class road segmentation

I am trying to train a U-net for image segmentation on satellite data and therewith extract a road network with nine different road types. Thus far I have tried many different U-net codes that are freely available on the web, however I was not able to tailor them to my specific case. I'm sincerely hoping you are able to help me.
The satellite image and associated labels can be downloaded via the following link:
Satellite image and associated labels
Additionally, I've written the following code to prep the data for the Unet
import skimage
from skimage.io import imread, imshow, imread_collection, concatenate_images
from skimage.transform import resize
from skimage.morphology import label
import numpy as np
import matplotlib.pyplot as plt
from keras.models import Model
from keras.layers import Input, merge, Convolution2D, MaxPooling2D, UpSampling2D, Reshape, core, Dropout
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint, LearningRateScheduler
from keras import backend as K
from sklearn.metrics import jaccard_similarity_score
from shapely.geometry import MultiPolygon, Polygon
import shapely.wkt
import shapely.affinity
from collections import defaultdict
#Importing image and labels
labels = skimage.io.imread("ede_subset_293_wegen.tif")
images = skimage.io.imread("ede_subset_293_20180502_planetscope.tif")[...,:-1]
#Scaling image
img_scaled = images / images.max()
#Make non-roads 0
labels[labels == 15] = 0
#Resizing image and mask and labels
img_scaled_resized = img_scaled[:6400, :6400,:4 ]
print(img_scaled_resized.shape)
labels_resized = labels[:6400, :6400]
print(labels_resized.shape)
#splitting images
split_img = [
np.split(array, 25, axis=0)
for array in np.split(img_scaled_resized, 25, axis=1)
]
split_img[-1][-1].shape
#splitting labels
split_labels = [
np.split(array, 25, axis=0)
for array in np.split(labels_resized, 25, axis=1)
]
#Convert to np.array
split_labels = np.array(split_labels)
split_img = np.array(split_img)
train_images = np.reshape(split_img, (625, 256, 256, 4))
train_labels = np.reshape(split_labels, (625, 256, 256))
x_trn = train_images[:400,:,:,:]
x_val = train_images[400:500,:,:,:]
x_test = train_images[500:625,:,:,:]
y_trn = train_labels[:400,:,:]
y_val = train_labels[400:500,:,:]
y_test = train_labels[500:625,:,:]
plt.imshow(train_images[88,:,:,:])
skimage.io.imshow(train_labels[88,:,:])
Furthermore, I found the following U-net on kaggle, which I think should have to work for this particular case:
def get_unet():
inputs = Input((8, ISZ, ISZ))
conv1 = Convolution2D(32, 3, 3, activation='relu', border_mode='same')(inputs)
conv1 = Convolution2D(32, 3, 3, activation='relu', border_mode='same')(conv1)
pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)
conv2 = Convolution2D(64, 3, 3, activation='relu', border_mode='same')(pool1)
conv2 = Convolution2D(64, 3, 3, activation='relu', border_mode='same')(conv2)
pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)
conv3 = Convolution2D(128, 3, 3, activation='relu', border_mode='same')(pool2)
conv3 = Convolution2D(128, 3, 3, activation='relu', border_mode='same')(conv3)
pool3 = MaxPooling2D(pool_size=(2, 2))(conv3)
conv4 = Convolution2D(256, 3, 3, activation='relu', border_mode='same')(pool3)
conv4 = Convolution2D(256, 3, 3, activation='relu', border_mode='same')(conv4)
pool4 = MaxPooling2D(pool_size=(2, 2))(conv4)
conv5 = Convolution2D(512, 3, 3, activation='relu', border_mode='same')(pool4)
conv5 = Convolution2D(512, 3, 3, activation='relu', border_mode='same')(conv5)
up6 = merge([UpSampling2D(size=(2, 2))(conv5), conv4], mode='concat', concat_axis=1)
conv6 = Convolution2D(256, 3, 3, activation='relu', border_mode='same')(up6)
conv6 = Convolution2D(256, 3, 3, activation='relu', border_mode='same')(conv6)
up7 = merge([UpSampling2D(size=(2, 2))(conv6), conv3], mode='concat', concat_axis=1)
conv7 = Convolution2D(128, 3, 3, activation='relu', border_mode='same')(up7)
conv7 = Convolution2D(128, 3, 3, activation='relu', border_mode='same')(conv7)
up8 = merge([UpSampling2D(size=(2, 2))(conv7), conv2], mode='concat', concat_axis=1)
conv8 = Convolution2D(64, 3, 3, activation='relu', border_mode='same')(up8)
conv8 = Convolution2D(64, 3, 3, activation='relu', border_mode='same')(conv8)
up9 = merge([UpSampling2D(size=(2, 2))(conv8), conv1], mode='concat', concat_axis=1)
conv9 = Convolution2D(32, 3, 3, activation='relu', border_mode='same')(up9)
conv9 = Convolution2D(32, 3, 3, activation='relu', border_mode='same')(conv9)
conv10 = Convolution2D(N_Cls, 1, 1, activation='sigmoid')(conv9)
model = Model(input=inputs, output=conv10)
model.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=[jaccard_coef, jaccard_coef_int, 'accuracy'])
return model
I know it is a big question, but I'm getting pretty desperate. Any help is greatly appreciated!

i found that Conv2DTranspose works better than UpSampling2D and here is a quick implementation using the same
def conv_block(tensor, nfilters, size=3, padding='same', initializer="he_normal"):
x = Conv2D(filters=nfilters, kernel_size=(size, size), padding=padding, kernel_initializer=initializer)(tensor)
x = BatchNormalization()(x)
x = Activation("relu")(x)
x = Conv2D(filters=nfilters, kernel_size=(size, size), padding=padding, kernel_initializer=initializer)(x)
x = BatchNormalization()(x)
x = Activation("relu")(x)
return x
def deconv_block(tensor, residual, nfilters, size=3, padding='same', strides=(2, 2)):
y = Conv2DTranspose(nfilters, kernel_size=(size, size), strides=strides, padding=padding)(tensor)
y = concatenate([y, residual], axis=3)
y = conv_block(y, nfilters)
return y
def Unet(img_height, img_width, nclasses=3, filters=64):
# down
input_layer = Input(shape=(img_height, img_width, 3), name='image_input')
conv1 = conv_block(input_layer, nfilters=filters)
conv1_out = MaxPooling2D(pool_size=(2, 2))(conv1)
conv2 = conv_block(conv1_out, nfilters=filters*2)
conv2_out = MaxPooling2D(pool_size=(2, 2))(conv2)
conv3 = conv_block(conv2_out, nfilters=filters*4)
conv3_out = MaxPooling2D(pool_size=(2, 2))(conv3)
conv4 = conv_block(conv3_out, nfilters=filters*8)
conv4_out = MaxPooling2D(pool_size=(2, 2))(conv4)
conv4_out = Dropout(0.5)(conv4_out)
conv5 = conv_block(conv4_out, nfilters=filters*16)
conv5 = Dropout(0.5)(conv5)
# up
deconv6 = deconv_block(conv5, residual=conv4, nfilters=filters*8)
deconv6 = Dropout(0.5)(deconv6)
deconv7 = deconv_block(deconv6, residual=conv3, nfilters=filters*4)
deconv7 = Dropout(0.5)(deconv7)
deconv8 = deconv_block(deconv7, residual=conv2, nfilters=filters*2)
deconv9 = deconv_block(deconv8, residual=conv1, nfilters=filters)
# output
output_layer = Conv2D(filters=nclasses, kernel_size=(1, 1))(deconv9)
output_layer = BatchNormalization()(output_layer)
output_layer = Activation('softmax')(output_layer)
model = Model(inputs=input_layer, outputs=output_layer, name='Unet')
return model
Now for the data generators, you can use the builtin ImageDataGenerator class
here is the code from Keras docs
# we create two instances with the same arguments
data_gen_args = dict(featurewise_center=True,
featurewise_std_normalization=True,
rotation_range=90,
width_shift_range=0.1,
height_shift_range=0.1,
zoom_range=0.2)
image_datagen = ImageDataGenerator(**data_gen_args)
mask_datagen = ImageDataGenerator(**data_gen_args)
# Provide the same seed and keyword arguments to the fit and flow methods
seed = 1
image_datagen.fit(images, augment=True, seed=seed)
mask_datagen.fit(masks, augment=True, seed=seed)
image_generator = image_datagen.flow_from_directory(
'data/images',
class_mode=None,
seed=seed)
mask_generator = mask_datagen.flow_from_directory(
'data/masks',
class_mode=None,
seed=seed)
# combine generators into one which yields image and masks
train_generator = zip(image_generator, mask_generator)
model.fit_generator(
train_generator,
steps_per_epoch=2000,
epochs=50)
Another way to go is implement your own generator by extending the Sequence class from Keras
class seg_gen(Sequence):
def __init__(self, x_set, y_set, batch_size, image_dir, mask_dir):
self.x, self.y = x_set, y_set
self.batch_size = batch_size
self.samples = len(self.x)
self.image_dir = image_dir
self.mask_dir = mask_dir
def __len__(self):
return int(np.ceil(len(self.x) / float(self.batch_size)))
def __getitem__(self, idx):
idx = np.random.randint(0, self.samples, batch_size)
batch_x, batch_y = [], []
drawn = 0
for i in idx:
_image = image.img_to_array(image.load_img(f'{self.image_dir}/{self.x[i]}', target_size=(img_height, img_width)))/255.
mask = image.img_to_array(image.load_img(f'{self.mask_dir}/{self.y[i]}', grayscale=True, target_size=(img_height, img_width)))
# mask = np.resize(mask,(img_height*img_width, classes))
batch_y.append(mask)
batch_x.append(_image)
return np.array(batch_x), np.array(batch_y)
Here is a sample code to train the model
unet = Unet(256, 256, nclasses=66, filters=64)
print(unet.output_shape)
p_unet = multi_gpu_model(unet, 4)
p_unet.load_weights('models-dr/top_weights.h5')
p_unet.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
tb = TensorBoard(log_dir='logs', write_graph=True)
mc = ModelCheckpoint(mode='max', filepath='models-dr/top_weights.h5', monitor='acc', save_best_only='True', save_weights_only='True', verbose=1)
es = EarlyStopping(mode='max', monitor='acc', patience=6, verbose=1)
callbacks = [tb, mc, es]
train_gen = seg_gen(image_list, mask_list, batch_size)
p_unet.fit_generator(train_gen, steps_per_epoch=steps, epochs=13, callbacks=callbacks, workers=8)
I have tried using the dice loss when i had only two classes, here is the code for it
def dice_coeff(y_true, y_pred):
smooth = 1.
y_true_f = K.flatten(y_true)
y_pred_f = K.flatten(y_pred)
intersection = K.sum(y_true_f * y_pred_f)
score = (2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)
return score
def dice_loss(y_true, y_pred):
loss = 1 - dice_coeff(y_true, y_pred)
return loss

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

ENet sementic segmentation model is not working for smaller images - python

Related

Why is my GAN only producing grey images and not making any progress?

How to combine RNN with CNN

Incompatible shape TensorFlow keras

Softmax output returning only ones and zeros?

Implementing U-net for multi-class road segmentation

Categories

Resources