How to combine RNN with CNN - python

I am working on a program that subtracts using two images from MNIST. In doing so, we are using an algorithm that combines CNN and RNN.
enter image description here
class CRNN_Model(tf.keras.Model):
def __init__(self):
super(CRNN_Model, self).__init__()
self.hidden_size = 256
self.batch_size = 128
self.sequence_size = 2
self.output_size = 19
self.img_size = 28
#cnn
self.reshape = tf.keras.layers.Reshape((10000, 28, 28, 1))
self.conv1 = tf.keras.layers.Conv2D(16, 3, padding='same', activation='relu', input_shape=(28, 28, 1))
self.maxpooling1 = tf.keras.layers.MaxPooling2D()
self.conv2 = tf.keras.layers.Conv2D(32, 3, padding='same', activation='relu')
self.maxpooling2 = tf.keras.layers.MaxPooling2D()
self.flatten1 = tf.keras.layers.Flatten()
self.hidden = tf.keras.layers.Dense(self.hidden_size, activation='relu')
#rnn
self.rnn_cell = tf.keras.layers.SimpleRNNCell(self.hidden_size, activation=None)
def call(self, x):
#cnn
hidden_list = []
for i in range(2):
x = x[0][i]
x = self.conv1(x)
x = self.maxpooling1(x)
x = self.conv2(x)
x = self.maxpooling2(x)
x = self.flatten1(x)
x = self.hidden(x)
hidden_list.append(x)
self.hidden_list = tf.transpose(hidden_list, perm=[1, 0, 2])
#rnn
self.initial_state = self.rnn_cell.zero_state(self.batch_size, tf_float32)
state = self.initial_state
outputs = []
for t in range(self.sequence_size):
(output, state) = self.rnn_cell(self.hidden_list[:, t, :], state)
outputs.append(output)
self.outputs = outputs
self.prediction = tf.keras.layers.Dense(self.outputs[-1], self.output_size)
self.pred_output = tf.keras.activations.softmax(self.prediction)
return self.pred_output
However, an error occurs at the input to the first conv1 layer.
ValueError: Input 0 of layer conv2d_111 is incompatible with the layer: : expected min_ndim=4, found ndim=3. Full shape received: (28, 28, 1)
The shape of the data we are giving is as follows.
print(np.shape(train_x))
print(np.shape(train_y))
print(np.shape(test_x))
print(np.shape(test_y))
(10000, 2, 28, 28, 1)
→(number of data * number of image * width * height * channel)
(10000, 1)
(2000, 2, 28, 28, 1)
(2000, 1)
How do I give the data?

Related

ValueError: Input 0 of layer "model" is incompatible with the layer: expected shape=(None, 32, 10000, 1), found shape=(None, 32, 1000, 1)

What is ValueError: Input 0 of layer "model" is incompatible with the layer: expected shape=(None, 32, 10000, 1), found shape=(None, 32, 1000, 1)?
When I tried to learn model by tensoflow, I get this error. I think that the shape of the data is the same as that used to input the model. Why does they think this error occure?
def EEGNet(nb_classes, Chans = 64, Samples = 128,
dropoutRate = 0.5, kernLength = 64, F1 = 8,
D = 2, F2 = 16, norm_rate = 0.25, dropoutType = 'Dropout'):
if dropoutType == 'SpatialDropout2D':
dropoutType = Dropout # SpatialDropout2D
elif dropoutType == 'Dropout':
dropoutType = Dropout
else:
raise ValueError('dropoutType must be one of SpatialDropout2D '
'or Dropout, passed as a string.')
input1 = Input(shape = (Chans, Samples, 1))
##################################################################
block1 = Conv2D(F1, (1, kernLength), padding = 'same',
input_shape = (Chans, Samples, 1),
use_bias = False)(input1)
block1 = BatchNormalization()(block1)
block1 = DepthwiseConv2D((Chans, 1), use_bias = False,
depth_multiplier = D,
depthwise_constraint = max_norm(1.))(block1)
block1 = BatchNormalization()(block1)
block1 = Activation('elu')(block1)
block1 = AveragePooling2D((1, 4))(block1)
block1 = dropoutType(dropoutRate)(block1)
block2 = SeparableConv2D(F2, (1, 16),
use_bias = False, padding = 'same')(block1)
block2 = BatchNormalization()(block2)
block2 = Activation('elu')(block2)
block2 = AveragePooling2D((1, 8))(block2)
block2 = dropoutType(dropoutRate)(block2)
flatten = Flatten(name = 'flatten')(block2)
dense = Dense(nb_classes, name = 'dense',
kernel_constraint = max_norm(norm_rate))(flatten)
softmax = Activation('softmax', name = 'softmax')(dense)
return Model(inputs=input1, outputs=softmax)
l = np.random.rand(9573,32,1000,1)
model = EEGNet(2, Chans = 32, Samples = 10000)
early_stopping = EarlyStopping(
monitor='val_loss', min_delta=0.0001,
mode='min', patience=10, verbose=1,
restore_best_weights=True)
# Fit model
optimizer = keras.optimizers.Adam(learning_rate=0.0001, beta_1=0.9,
beta_2=0.999, amsgrad=False)
model.compile(loss='binary_crossentropy', optimizer=optimizer,
metrics=['accuracy'])

How can I match a Decoder's input to a Pretrained Resnet18 Encoder?

I am trying to build a custom Decoder with skip connections to run with a pretrained Resnet18 Encoder for Image Segmentation task. The total number of classes are 150.
The Resnet18 Encoder has fc output of 512. In order to match the Encoder's output to Decoder's input, I am trying to set the Conv layers of Decoder such that it matches the output from Encoder i.e [151, 512, 1, 1]. However, no matter what combination of layers I make, I am not able to match the input and output tensors.
Here is the relevant part of Decoder Code
class ResNet18Transpose(nn.Module):
def __init__(self, transblock, layers, num_classes=150):
self.inplanes = 512
super(ResNet18Transpose, self).__init__()
self.deconv1 = self._make_transpose(transblock, 256 * transblock.expansion, layers[0], stride=2)
self.deconv2 = self._make_transpose(transblock, 128 * transblock.expansion, layers[1], stride=2)
self.deconv3 = self._make_transpose(transblock, 64 * transblock.expansion, layers[2], stride=2)
self.deconv4 = self._make_transpose(transblock, 32 * transblock.expansion, layers[3], stride=2)
self.skip0 = self._make_skip_layer(64, 64 * transblock.expansion)
self.skip1 = self._make_skip_layer(128, 64 * transblock.expansion)
self.skip2 = self._make_skip_layer(256, 64 * transblock.expansion)
self.skip3 = self._make_skip_layer(512, 128 * transblock.expansion)
self.inplanes = 64
self.final_conv = self._make_transpose(transblock, 64 * transblock.expansion, 3)
self.final_deconv = nn.ConvTranspose2d(self.inplanes * transblock.expansion, num_classes, kernel_size=2,
stride=2, padding=0, bias=True)
self.out6_conv = nn.Conv2d(1024, num_classes, kernel_size=1, stride=1, bias=True)
self.out5_conv = nn.Conv2d(128 * transblock.expansion, num_classes, kernel_size=1, stride=1, bias=True)
self.out4_conv = nn.Conv2d(128 * transblock.expansion, num_classes, kernel_size=1, stride=1, bias=True)
self.out3_conv = nn.Conv2d(64 * transblock.expansion, num_classes, kernel_size=1, stride=1, bias=True)
self.out2_conv = nn.Conv2d(32 * transblock.expansion, num_classes, kernel_size=1, stride=1, bias=True)
for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
elif isinstance(m, nn.ConvTranspose2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
elif isinstance(m, SynchronizedBatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()
The forward block for Decoder is as follows:
def forward(self, x, labels=None, sparse_mode=False, use_skip=True):
[in0, in1, in2, in3, in4] = x
if labels:
[lab0, lab1, lab2, lab3, lab4] = labels
out6 = self.out6_conv(in4)
if sparse_mode:
if labels:
mask4 = (lab4==0).unsqueeze(1).repeat(1,in4.shape[1],1,1).type(in4.dtype)
else:
mask4 = (torch.argmax(out6, dim=1)==0).unsqueeze(1).repeat(1,in4.shape[1],1,1).type(in4.dtype)
in4 = in4 * mask4
# upsample 1
x = self.deconv1(in4)
out5 = self.out5_conv(x)
if sparse_mode:
if labels:
mask3 = (lab3==0).unsqueeze(1).repeat(1,in3.shape[1],1,1).type(in3.dtype)
else:
mask3 = (torch.argmax(out5, dim=1)==0).unsqueeze(1).repeat(1,in3.shape[1],1,1).type(in3.dtype)
in3 = in3 * mask3
if use_skip:
x = x + self.skip3(in3)
# upsample 2
x = self.deconv2(x)
out4 = self.out4_conv(x)
if sparse_mode:
if labels:
mask2 = (lab2==0).unsqueeze(1).repeat(1,in2.shape[1],1,1).type(in2.dtype)
else:
mask2 = (torch.argmax(out4, dim=1)==0).unsqueeze(1).repeat(1,in2.shape[1],1,1).type(in2.dtype)
in2 = in2 * mask2
if use_skip:
x = x + self.skip2(in2)
# upsample 3
x = self.deconv3(x)
out3 = self.out3_conv(x)
if sparse_mode:
if labels:
mask1 = (lab1==0).unsqueeze(1).repeat(1,in1.shape[1],1,1).type(in1.dtype)
else:
mask1 = (torch.argmax(out3, dim=1)==0).unsqueeze(1).repeat(1,in1.shape[1],1,1).type(in1.dtype)
in1 = in1 * mask1
if use_skip:
x = x + self.skip1(in1)
# upsample 4
x = self.deconv4(x)
out2 = self.out2_conv(x)
if sparse_mode:
if labels:
mask0 = (lab0==0).unsqueeze(1).repeat(1,in0.shape[1],1,1).type(in0.dtype)
else:
mask0 = (torch.argmax(out2, dim=1)==0).unsqueeze(1).repeat(1,in0.shape[1],1,1).type(in0.dtype)
in0 = in0 * mask0
if use_skip:
x = x + self.skip0(in0)
# final
x = self.final_conv(x)
out1 = self.final_deconv(x)
return [out6, out5, out4, out3, out2, out1]
I get the following error:
File "/project/xfu/aamir/Golden-QGN/models/resnet.py", line 447, in forward
out6 = self.out6_conv(in4)
File "/project/xfu/aamir/anaconda3/envs/QGN/lib/python3.8/site-packages/torch/nn/modules/module.py", line 722, in _call_impl
result = self.forward(*input, **kwargs)
File "/project/xfu/aamir/anaconda3/envs/QGN/lib/python3.8/site-packages/torch/nn/modules/conv.py", line 419, in forward
return self._conv_forward(input, self.weight)
File "/project/xfu/aamir/anaconda3/envs/QGN/lib/python3.8/site-packages/torch/nn/modules/conv.py", line 415, in _conv_forward
return F.conv2d(input, weight, self.bias, self.stride,
RuntimeError: Given groups=1, weight of size [151, 1024, 1, 1], expected input[8, 512, 8, 8] to have 1024 channels, but got 512 channels instead
Exception in thread Thread-1:
If I change the Decoder Layers as follows:
self.out6_conv = nn.Conv2d(512, num_classes, kernel_size=1, stride=1, bias=True)
self.out5_conv = nn.Conv2d(256 * transblock.expansion, num_classes, kernel_size=1, stride=1, bias=True)
self.out4_conv = nn.Conv2d(128 * transblock.expansion, num_classes, kernel_size=1, stride=1, bias=True)
self.out3_conv = nn.Conv2d(64 * transblock.expansion, num_classes, kernel_size=1, stride=1, bias=True)
self.out2_conv = nn.Conv2d(32 * transblock.expansion, num_classes, kernel_size=1, stride=1, bias=True)
I get the followng error. Note that the input tensor has also changed.
File "/project/xfu/aamir/anaconda3/envs/QGN/lib/python3.8/site-packages/torch/nn/modules/conv.py", line 415, in _conv_forward
return F.conv2d(input, weight, self.bias, self.stride,
RuntimeError: Given groups=1, weight of size [64, 64, 3, 3], expected input[8, 32, 128, 128] to have 64 channels, but got 32 channels instead
I also tried altering the Resnet18 Encoder's fc output to 1024 instead of 512 as follows:
def resnet18(pretrained=False, **kwargs):
"""Constructs a ResNet-18 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
if pretrained:
model.load_state_dict(load_url(model_urls['resnet18']))
model.fc = torch.nn.Linear(1024, 150)
return model
But nothing seems to work. I also could not find a skip connections based decoder for resnet18 on github / internet. Any help will be appreciated.
NOTE: I only want to work on Resnet18. Also the images being input to the network through dataloader are working perfectly fine with a Resnet50 Encoder + Custom Decoder. I also tried changing crop size from 0 to 128 to 256 to 512 but in vain.
Here is the code that is being run in models.py for image segmentation. I tried using the set_trace() method to debug the code. The code stops running just before the (pred, pred_quad) = self.decoder(fmap, labels_scaled) line in following code
class SegmentationModule(SegmentationModuleBase):
def __init__(self, net_enc, net_dec, crit, deep_sup_scale=None, quad_sup=False, running_avg_param=0.99):
super(SegmentationModule, self).__init__()
self.encoder = net_enc
self.decoder = net_dec
self.crit = crit
if deep_sup_scale:
if deep_sup_scale < 0:
self.adapt_weights = True
self.running_avg_param = running_avg_param
deep_sup_scale = 1
else:
self.adapt_weights = False
self.loss_weights = [(deep_sup_scale**(i+1)) for i in range(5)]
self.quad_sup = quad_sup
def forward(self, feed_dict, *, segSize=None):
inputs = feed_dict['img_data'].cuda()
if segSize is None: # training
labels_orig_scale = feed_dict['seg_label_0'].cuda()
labels_scaled = []
fmap = self.encoder(inputs, return_feature_maps=True)
if self.quad_sup:
labels_scaled.append(feed_dict['seg_label_1'].cuda())
labels_scaled.append(feed_dict['seg_label_2'].cuda())
labels_scaled.append(feed_dict['seg_label_3'].cuda())
labels_scaled.append(feed_dict['seg_label_4'].cuda())
labels_scaled.append(feed_dict['seg_label_5'].cuda())
(pred, pred_quad) = self.decoder(fmap, labels_scaled)
else:
pred = self.decoder(fmap)
loss = self.crit(pred, labels_orig_scale)
if self.quad_sup:
loss_orig = loss
for i in range(len(pred_quad)):
loss_quad = self.crit(pred_quad[i], labels_scaled[i])
loss = loss + loss_quad * self.loss_weights[i]
if self.adapt_weights:
self.loss_weights[i] = self.running_avg_param * self.loss_weights[i] + \
(1 - self.running_avg_param) * (loss_quad/loss_orig).data.cpu().numpy()
acc = self.pixel_acc(pred, labels_orig_scale, self.quad_sup)
return loss, acc
else: # inference
if 'qtree' in feed_dict:
labels_scaled = [feed_dict['qtree'][l].cuda() for l in range(1,6)]
else:
labels_scaled = None
pred = self.decoder(self.encoder(inputs, return_feature_maps=True), labels_scaled, segSize=segSize)
return pred
So here is how I resolved the error.
Disabled the skip connections. Not sure why the decoder doesnt work with skip connections.
I had given incorrect layers [6, 3, 4, 3] input to Decoder. i.e for Resnet18 the layers should be [2,2,2,2]

Setting CNN input dimensions correctly (Pytorch)

I keep getting this error:
Traceback (most recent call last):
File "/Users/robbymoseley/Desktop/IOP/IOP-ML/Model/IOP_model.py", line 201, in <module>
logps = model(images)
File "/Applications/Python Virtual Environment/lib/python3.7/site-packages/torch/nn/modules/module.py", line 889, in _call_impl
result = self.forward(*input, **kwargs)
File "/Users/robbymoseley/Desktop/IOP/IOP-ML/Model/IOP_model.py", line 95, in forward
x = self.pool(F.relu(self.conv1(x)))
File "/Applications/Python Virtual Environment/lib/python3.7/site-packages/torch/nn/modules/module.py", line 889, in _call_impl
result = self.forward(*input, **kwargs)
File "/Applications/Python Virtual Environment/lib/python3.7/site-packages/torch/nn/modules/conv.py", line 399, in forward
return self._conv_forward(input, self.weight, self.bias)
File "/Applications/Python Virtual Environment/lib/python3.7/site-packages/torch/nn/modules/conv.py", line 396, in _conv_forward
self.padding, self.dilation, self.groups)
RuntimeError: Expected 4-dimensional input for 4-dimensional weight [6, 1, 5, 5], but got 2-dimensional input of size [10, 307200] instead
This is the model I am trying to use:
class Net(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(1, 6, 5)
# we use the maxpool multiple times, but define it once
self.pool = nn.MaxPool2d(2, 2)
# in_channels = 6 because self.conv1 output 6 channel
self.conv2 = nn.Conv2d(6, 16, 5)
# 5*5 comes from the dimension of the last convnet layer
self.fc1 = nn.Linear(input_size, hidden_sizes[0])
self.fc2 = nn.Linear(hidden_sizes[0], hidden_sizes[1])
self.fc3 = nn.Linear(hidden_sizes[1], output_size)
def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = x.view(-1, 16 * 5 * 5)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x) # no activation on final layer
return x
Input images are 640x480.
Here is my main function as well:
if __name__ == __main__:
if torch.cuda.is_available():
device = torch.device("cuda:0")
else:
device = torch.device("cpu")
model = Net().to(device)
criterion = nn.NLLLoss()
images, labels = next(iter(train_loader))
# images = images.view(images.shape[0], -1)
print(images.shape)
images = images.view(images.shape[0], -1)
logps = model(images)
loss = criterion(logps, labels) # calculate the NLL loss
# training process
optimizer = optim.SGD(model.parameters(), lr=0.003, momentum=0.9)
loss_fn = nn.NLLLoss()
# train the model
train_model(num_epochs, model)
# validate the model
validate_model(model)
print("Execution has finished")
How can I adjust either the expected input dimensions or the weights? Or can I change the dimensions of my input to properly reflect the structure of my model? If so, how can I do this?
based off the convolutional part i did notice the problem, where your final pooling layer out channel was not calculated correctly
The following is how the code should work based off your input size that you mentioned 640x480x1
import torch
import torch.nn as nn
import torch.nn.functional as F
class Model(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(1, 6, 5)
# we use the maxpool multiple times, but define it once
self.pool = nn.MaxPool2d(2, 2)
# in_channels = 6 because self.conv1 output 6 channel
self.conv2 = nn.Conv2d(6, 16, 5)
# 5*5 comes from the dimension of the last convnet layer
self.fc1 = nn.Linear(16 * 157 * 117, 256) # You didnt provide the numbers here but i did calculate the in channels based off the prev layer
self.fc2 = nn.Linear(256, 10)
self.fc3 = nn.Linear(10, 2)
def forward(self, x):
x = self.conv1(x)
print('Conv1 Shape: {}'.format(x.shape))
x = self.pool(F.relu(x))
print('Pool1 Shape: {}'.format(x.shape))
x = self.conv2(x)
print('Conv2 Shape: {}'.format(x.shape))
x = self.pool(F.relu(x))
print('Pool2 Shape: {}'.format(x.shape))
x = x.view(-1, 16 * 157 * 117)
print('Flatten Shape: {}'.format(x.shape))
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x) # no activation on final layer
return x
model = Model()
model(torch.randn(1, 1, 640, 480)) # 1 Channel since your first conv layer input inchannel is 1
Output shape for each layer
model(torch.randn(1, 1, 640, 480))
Conv1 Shape: torch.Size([1, 6, 636, 476])
Pool1 Shape: torch.Size([1, 6, 318, 238])
Conv2 Shape: torch.Size([1, 16, 314, 234])
Pool2 Shape: torch.Size([1, 16, 157, 117])
Flatten Shape: torch.Size([1, 293904])

ValueError: Input 0 of layer conv1d is incompatible with the layer: : expected min_ndim=3, found ndim=2. Full shape received: (None, 19)

I am building a prediction model for the sequence data using conv1d layer provided by Keras. This is how I did`
def autoencoder():
#autoencoder = Model(inputs=input_layer, outputs=decoder)
input_dim = x_train_scaled.shape[1]
input_layer = Input(shape=(input_dim,))
conv1 = Conv1D(filters = 32, kernel_size=3,activation='relu') (input_layer)
batch1 = BatchNormalization()(conv1)
maxp1 = MaxPooling1D(pool_size=2)(batch1)
dropout1 = Dropout(0.2)(maxp1)
conv2 = Conv1D(filters = 16, kernel_size=3,activation='relu') (dropout1)
batch2 = BatchNormalization()(conv2)
maxp2 = MaxPooling1D(2)(batch2)
dropout2 = Dropout(0.2)(maxp2)
conv3 = Conv1D(filters = 8, kernel_size=3,activation='relu') (dropout2)
batch3 = BatchNormalization()(conv3)
maxp3 = MaxPooling1D(2)(batch3)
dropout3 = Dropout(0.2)(maxp3)
#decoder layers
conv4 = Conv1D(filters = 8, kernel_size=3,activation='relu') (dropout3)
batch4 = BatchNormalization()(conv4)
dropout4 = Dropout(0.2)(batch4)
conv5 = Conv1D(filters = 16, kernel_size=3,activation='relu') (dropout4)
batch5 = BatchNormalization()(conv5)
unsamp5 = UpSampling1D(2)(batch5)
dropout5 = Dropout(0.2)(unsamp5)
conv6 = Conv1D(filters = 32, kernel_size=3,activation='relu') (dropout5)
batch6 = BatchNormalization()(conv6)
unsamp6 = UpSampling1D(2)(batch6)
dropout6 = Dropout(0.2)(unsamp6)
decoder = Conv1D(filters = 1, kernel_size=3,activation='sigmoid') (dropout6)
return Model(input_layer, decoder)
Train model to reduce the data dimension using autoencoder
model.compile(loss='categorical_crossentropy', optimizer='adam',
metrics=['accuracy'])
model.fit(x_train_scaled, x_train_scaled, epochs=15, batch_size=32,
verbose=verbose, shuffle=True)
However, the debugging information has
ValueError: Input 0 of layer conv1d is incompatible with the layer: :
expected min_ndim=3, found ndim=2. Full shape received: (None, 19)
The training data and validation data shape are as follows
x_train_scaled shape (125973, 19)
Dataset use to train model NSL-KDD(https://www.unb.ca/cic/datasets/nsl.html)
Conv1D expects 3+D tensor with shape: batch_shape + (steps, input_dim). Add extra dimension to your input.
Working sample code
# The inputs are 128-length vectors with 10 timesteps, and the batch size
# is 4.
input_shape = (4, 10, 128)
x = tf.random.normal(input_shape)
y = tf.keras.layers.Conv1D(
32, 3, activation='relu',input_shape=input_shape[1:])(x)
print(y.shape)
Output
(4, 8, 32)

Incompatible shape TensorFlow keras

I am getting this error when I am trying to train my model:
ValueError: Input 0 of layer dense_encoder is incompatible with the layer: expected axis -1 of input shape to have value 2048 but received input with shape [446, 98, 1024]
My model architecture is:
input1 = Input(shape=(2048), name='Image_1')
dense1 = Dense(256, kernel_initializer=tf.keras.initializers.glorot_uniform(seed = 56), name='dense_encoder')(input1)
input2 = Input(shape=(153), name='Text_Input')
emb_layer = Embedding(input_dim = vocab_size, output_dim = 300, input_length=153, mask_zero=True, trainable=False,
weights=[embedding_matrix], name="Embedding_layer")
emb = emb_layer(input2)
LSTM1 = LSTM(units=256, activation='tanh', recurrent_activation='sigmoid', use_bias=True,
kernel_initializer=tf.keras.initializers.glorot_uniform(seed=23),
recurrent_initializer=tf.keras.initializers.orthogonal(seed=7),
bias_initializer=tf.keras.initializers.zeros(), return_sequences=True, name="LSTM1")(emb)
#LSTM1_output = LSTM1(emb)
LSTM2 = LSTM(units=256, activation='tanh', recurrent_activation='sigmoid', use_bias=True,
kernel_initializer=tf.keras.initializers.glorot_uniform(seed=23),
recurrent_initializer=tf.keras.initializers.orthogonal(seed=7),
bias_initializer=tf.keras.initializers.zeros(), name="LSTM2")
LSTM2_output = LSTM2(LSTM1)
dropout1 = Dropout(0.5, name='dropout1')(LSTM2_output)
dec = tf.keras.layers.Add()([dense1, dropout1])
fc1 = Dense(256, activation='relu', kernel_initializer=tf.keras.initializers.he_normal(seed = 63), name='fc1')
fc1_output = fc1(dec)
dropout2 = Dropout(0.4, name='dropout2')(fc1_output)
output_layer = Dense(vocab_size, activation='softmax', name='Output_layer')
output = output_layer(dropout2)
encoder_decoder = Model(inputs = [input1, input2], outputs = output)
encoder_decoder.summary()
Here's my code for training the model:
for epoch in range(20):
print('EPOCH : ',epoch+1)
start = time.time()
batch_loss_tr = 0
batch_loss_vl = 0
for img, report in train_generator:
r1 = bytes_to_string(report.numpy())
img_input, rep_input, output_word = convert(img.numpy(), r1)
rep_input = pad_sequences(rep_input, maxlen=153, padding='post')
results = encoder_decoder.train_on_batch([img_input, rep_input], output_word)
batch_loss_tr += results
train_loss = batch_loss_tr/(X_train.shape[0]//14)
with train_summary_writer.as_default():
tf.summary.scalar('loss', train_loss, step = epoch)
for img, report in cv_generator:
r1 = bytes_to_string(report.numpy())
img_input, rep_input, output_word = convert(img.numpy(), r1)
rep_input = pad_sequences(rep_input, maxlen=153, padding='post')
results = encoder_decoder.test_on_batch([img_input, rep_input], output_word)
batch_loss_vl += results
The img_input shape is (417, 98, 1024) and I am getting the error for Image_1 layer.
What could be the reasons? Any help would be appreciated.

Categories