System information
OS Platform and Distribution :CentOS Linux release 7.7.1908
-TensorFlow version:2.3.0
I am following this example:https://www.tensorflow.org/tutorials/text/image_captioning?hl=en
It is working as it should be and saving checkpoints and I want to now convert this to a TF Lite model.
Here is the Link of full convert code:https://colab.research.google.com/drive/1GJkGcwWvDAWMooTsECzuSRUSPbirADhb?usp=sharing
Here is the Link of full train code:
https://colab.research.google.com/drive/1X2d9WW1EMEzN8Rgva3rtjevP0T_jFccj?usp=sharing
I also following the isssue#32999
Here is what I am running to save and them convert the inference graph:
#tf.function
def evaluate(image):
hidden = decoder.reset_states(batch_size=1)
temp_input = tf.expand_dims(load_image(image)[0], 0)
img_tensor_val = image_features_extract_model(temp_input)
img_tensor_val = tf.reshape(img_tensor_val, (img_tensor_val.shape[0], -1, img_tensor_val.shape[3]))
features = encoder(img_tensor_val)
dec_input = tf.expand_dims([tokenizer.word_index['<start>']], 0)
result = []
for i in range(max_length):
predictions, hidden, attention_weights = decoder(dec_input, features, hidden)
predicted_id = tf.random.categorical(predictions, 1)[0][0]
# print(tokenizer.index_word)
print(predicted_id,predicted_id.dtype)
# for key,value in tokenizer.index_word.items():
# key = tf.convert_to_tensor(key)
# tf.dtypes.cast(key,tf.int64)
# print(key)
# print(tokenizer.index_word)
result.append(predicted_id)
# if tokenizer.index_word[predicted_id] == '<end>':
# return result
dec_input = tf.expand_dims([predicted_id], 0)
return result
export_dir = "./"
tflite_enc_input = ''
ckpt.f = evaluate
to_save = evaluate.get_concrete_function('')
converter = tf.lite.TFLiteConverter.from_concrete_functions([to_save])
tflite_model = converter.convert()
but I get this error
ValueError: in user code:
convert2savedmodel.py:310 evaluate *
predictions, hidden, attention_weights = decoder(dec_input, features, hidden)
/share/nishome/19930072_0/miniconda3/envs/tf2.3/lib/python3.7/site-packages/tensorflow/python/keras/engine/base_layer.py:985 __call__ **
outputs = call_fn(inputs, *args, **kwargs)
/share/nishome/19930072_0/miniconda3/envs/tf2.3/lib/python3.7/site-packages/tensorflow/python/eager/def_function.py:780 __call__
result = self._call(*args, **kwds)
/share/nishome/19930072_0/miniconda3/envs/tf2.3/lib/python3.7/site-packages/tensorflow/python/eager/def_function.py:840 _call
return self._stateless_fn(*args, **kwds)
/share/nishome/19930072_0/miniconda3/envs/tf2.3/lib/python3.7/site-packages/tensorflow/python/eager/function.py:2828 __call__
graph_function, args, kwargs = self._maybe_define_function(args, kwargs)
/share/nishome/19930072_0/miniconda3/envs/tf2.3/lib/python3.7/site-packages/tensorflow/python/eager/function.py:3171 _maybe_define_function
*args, **kwargs)
/share/nishome/19930072_0/miniconda3/envs/tf2.3/lib/python3.7/site-packages/tensorflow/python/eager/function.py:2622 canonicalize_function_inputs
self._flat_input_signature)
/share/nishome/19930072_0/miniconda3/envs/tf2.3/lib/python3.7/site-packages/tensorflow/python/eager/function.py:2713 _convert_inputs_to_signature
format_error_message(inputs, input_signature))
ValueError: Python inputs incompatible with input_signature:
inputs: (
Tensor("ExpandDims_1:0", shape=(1, 1), dtype=int32),
Tensor("cnn__encoder/StatefulPartitionedCall:0", shape=(1, 64, 256), dtype=float32),
Tensor("zeros:0", shape=(1, 512), dtype=float32))
input_signature: (
TensorSpec(shape=(1, 1), dtype=tf.int64, name=None),
TensorSpec(shape=(1, 64, 256), dtype=tf.float32, name=None),
TensorSpec(shape=(1, 512), dtype=tf.float32, name=None))
Encoder Model:
class CNN_Encoder(tf.keras.Model):
def __init__(self, embedding):
super(CNN_Encoder, self).__init__()
# shape after fc == (batch_size, 64, embedding_dim)
self.fc = tf.keras.layers.Dense(embedding_dim)
#tf.function(input_signature=[tf.TensorSpec(shape=(1, 64, features_shape),dtype=tf.dtypes.float32)])
def call(self, x):
x = self.fc(x)
x = tf.nn.relu(x)
return x
Decoder model:
class RNN_Decoder(tf.keras.Model):
def __init__(self, embedding_dim, units, vocab_size):
super(RNN_Decoder, self).__init__()
self.units = units
self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
self.gru = tf.keras.layers.GRU(self.units,
return_sequences=True,
return_state=True,
recurrent_initializer='glorot_uniform',
unroll = True)
self.fc1 = tf.keras.layers.Dense(self.units)
self.fc2 = tf.keras.layers.Dense(vocab_size)
self.attention = BahdanauAttention(self.units)
#tf.function(input_signature=[tf.TensorSpec(shape=[1, 1], dtype=tf.int64),
tf.TensorSpec(shape=[1, 64, 256], dtype=tf.float32),
tf.TensorSpec(shape=[1, 512], dtype=tf.float32)])
def call(self, x , features, hidden):
context_vector, attention_weights = self.attention(features, hidden)
#x shape after passing through embedding == (batch_size, 1, embedding_dim)
x = self.embedding(x)
#x shape after concatenation == (batch_size, 1, embedding_dim + hidden_size)
x = tf.concat([tf.expand_dims(context_vector, 1), x], axis=-1)
output, state = self.gru(x)
#shape == (batch_size, max_length, hidden_size)
x = self.fc1(output)
#x shape == (batch_size, max_length, hidden_size)
x = tf.reshape(x, (-1, x.shape[2]))
# output shape == (batch_size * max_length, vocab)
x = self.fc2(x)
return x, state, attention_weights
def reset_states(self, batch_size):
return tf.zeros((batch_size, self.units))
I just change the tf.function to int32 as below:
#tf.function(input_signature=[tf.TensorSpec(shape=[1, 1], dtype=tf.int32), tf.TensorSpec(shape=[1, 64,256], dtype=tf.float32), tf.TensorSpec(shape=[1, 512], dtype=tf.float32)])
but another error came:
ValueError: Python inputs incompatible with input_signature:
Tensor("ExpandDims_2:0", shape=(1, 1), dtype=int64),
Tensor("cnn__encoder/StatefulPartitionedCall:0", shape=(1, 64, 256), dtype=float32),
Tensor("rnn__decoder/StatefulPartitionedCall:1", shape=(1, 512), dtype=float32))
input_signature: (
TensorSpec(shape=(1, 1), dtype=tf.int32, name=None),
TensorSpec(shape=(1, 64, 256), dtype=tf.float32, name=None),
TensorSpec(shape=(1, 512), dtype=tf.float32, name=None))```
Why the dtypes of inputs change from int64 to int32?
Related
I am trying to build a custom Decoder with skip connections to run with a pretrained Resnet18 Encoder for Image Segmentation task. The total number of classes are 150.
The Resnet18 Encoder has fc output of 512. In order to match the Encoder's output to Decoder's input, I am trying to set the Conv layers of Decoder such that it matches the output from Encoder i.e [151, 512, 1, 1]. However, no matter what combination of layers I make, I am not able to match the input and output tensors.
Here is the relevant part of Decoder Code
class ResNet18Transpose(nn.Module):
def __init__(self, transblock, layers, num_classes=150):
self.inplanes = 512
super(ResNet18Transpose, self).__init__()
self.deconv1 = self._make_transpose(transblock, 256 * transblock.expansion, layers[0], stride=2)
self.deconv2 = self._make_transpose(transblock, 128 * transblock.expansion, layers[1], stride=2)
self.deconv3 = self._make_transpose(transblock, 64 * transblock.expansion, layers[2], stride=2)
self.deconv4 = self._make_transpose(transblock, 32 * transblock.expansion, layers[3], stride=2)
self.skip0 = self._make_skip_layer(64, 64 * transblock.expansion)
self.skip1 = self._make_skip_layer(128, 64 * transblock.expansion)
self.skip2 = self._make_skip_layer(256, 64 * transblock.expansion)
self.skip3 = self._make_skip_layer(512, 128 * transblock.expansion)
self.inplanes = 64
self.final_conv = self._make_transpose(transblock, 64 * transblock.expansion, 3)
self.final_deconv = nn.ConvTranspose2d(self.inplanes * transblock.expansion, num_classes, kernel_size=2,
stride=2, padding=0, bias=True)
self.out6_conv = nn.Conv2d(1024, num_classes, kernel_size=1, stride=1, bias=True)
self.out5_conv = nn.Conv2d(128 * transblock.expansion, num_classes, kernel_size=1, stride=1, bias=True)
self.out4_conv = nn.Conv2d(128 * transblock.expansion, num_classes, kernel_size=1, stride=1, bias=True)
self.out3_conv = nn.Conv2d(64 * transblock.expansion, num_classes, kernel_size=1, stride=1, bias=True)
self.out2_conv = nn.Conv2d(32 * transblock.expansion, num_classes, kernel_size=1, stride=1, bias=True)
for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
elif isinstance(m, nn.ConvTranspose2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
elif isinstance(m, SynchronizedBatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()
The forward block for Decoder is as follows:
def forward(self, x, labels=None, sparse_mode=False, use_skip=True):
[in0, in1, in2, in3, in4] = x
if labels:
[lab0, lab1, lab2, lab3, lab4] = labels
out6 = self.out6_conv(in4)
if sparse_mode:
if labels:
mask4 = (lab4==0).unsqueeze(1).repeat(1,in4.shape[1],1,1).type(in4.dtype)
else:
mask4 = (torch.argmax(out6, dim=1)==0).unsqueeze(1).repeat(1,in4.shape[1],1,1).type(in4.dtype)
in4 = in4 * mask4
# upsample 1
x = self.deconv1(in4)
out5 = self.out5_conv(x)
if sparse_mode:
if labels:
mask3 = (lab3==0).unsqueeze(1).repeat(1,in3.shape[1],1,1).type(in3.dtype)
else:
mask3 = (torch.argmax(out5, dim=1)==0).unsqueeze(1).repeat(1,in3.shape[1],1,1).type(in3.dtype)
in3 = in3 * mask3
if use_skip:
x = x + self.skip3(in3)
# upsample 2
x = self.deconv2(x)
out4 = self.out4_conv(x)
if sparse_mode:
if labels:
mask2 = (lab2==0).unsqueeze(1).repeat(1,in2.shape[1],1,1).type(in2.dtype)
else:
mask2 = (torch.argmax(out4, dim=1)==0).unsqueeze(1).repeat(1,in2.shape[1],1,1).type(in2.dtype)
in2 = in2 * mask2
if use_skip:
x = x + self.skip2(in2)
# upsample 3
x = self.deconv3(x)
out3 = self.out3_conv(x)
if sparse_mode:
if labels:
mask1 = (lab1==0).unsqueeze(1).repeat(1,in1.shape[1],1,1).type(in1.dtype)
else:
mask1 = (torch.argmax(out3, dim=1)==0).unsqueeze(1).repeat(1,in1.shape[1],1,1).type(in1.dtype)
in1 = in1 * mask1
if use_skip:
x = x + self.skip1(in1)
# upsample 4
x = self.deconv4(x)
out2 = self.out2_conv(x)
if sparse_mode:
if labels:
mask0 = (lab0==0).unsqueeze(1).repeat(1,in0.shape[1],1,1).type(in0.dtype)
else:
mask0 = (torch.argmax(out2, dim=1)==0).unsqueeze(1).repeat(1,in0.shape[1],1,1).type(in0.dtype)
in0 = in0 * mask0
if use_skip:
x = x + self.skip0(in0)
# final
x = self.final_conv(x)
out1 = self.final_deconv(x)
return [out6, out5, out4, out3, out2, out1]
I get the following error:
File "/project/xfu/aamir/Golden-QGN/models/resnet.py", line 447, in forward
out6 = self.out6_conv(in4)
File "/project/xfu/aamir/anaconda3/envs/QGN/lib/python3.8/site-packages/torch/nn/modules/module.py", line 722, in _call_impl
result = self.forward(*input, **kwargs)
File "/project/xfu/aamir/anaconda3/envs/QGN/lib/python3.8/site-packages/torch/nn/modules/conv.py", line 419, in forward
return self._conv_forward(input, self.weight)
File "/project/xfu/aamir/anaconda3/envs/QGN/lib/python3.8/site-packages/torch/nn/modules/conv.py", line 415, in _conv_forward
return F.conv2d(input, weight, self.bias, self.stride,
RuntimeError: Given groups=1, weight of size [151, 1024, 1, 1], expected input[8, 512, 8, 8] to have 1024 channels, but got 512 channels instead
Exception in thread Thread-1:
If I change the Decoder Layers as follows:
self.out6_conv = nn.Conv2d(512, num_classes, kernel_size=1, stride=1, bias=True)
self.out5_conv = nn.Conv2d(256 * transblock.expansion, num_classes, kernel_size=1, stride=1, bias=True)
self.out4_conv = nn.Conv2d(128 * transblock.expansion, num_classes, kernel_size=1, stride=1, bias=True)
self.out3_conv = nn.Conv2d(64 * transblock.expansion, num_classes, kernel_size=1, stride=1, bias=True)
self.out2_conv = nn.Conv2d(32 * transblock.expansion, num_classes, kernel_size=1, stride=1, bias=True)
I get the followng error. Note that the input tensor has also changed.
File "/project/xfu/aamir/anaconda3/envs/QGN/lib/python3.8/site-packages/torch/nn/modules/conv.py", line 415, in _conv_forward
return F.conv2d(input, weight, self.bias, self.stride,
RuntimeError: Given groups=1, weight of size [64, 64, 3, 3], expected input[8, 32, 128, 128] to have 64 channels, but got 32 channels instead
I also tried altering the Resnet18 Encoder's fc output to 1024 instead of 512 as follows:
def resnet18(pretrained=False, **kwargs):
"""Constructs a ResNet-18 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
if pretrained:
model.load_state_dict(load_url(model_urls['resnet18']))
model.fc = torch.nn.Linear(1024, 150)
return model
But nothing seems to work. I also could not find a skip connections based decoder for resnet18 on github / internet. Any help will be appreciated.
NOTE: I only want to work on Resnet18. Also the images being input to the network through dataloader are working perfectly fine with a Resnet50 Encoder + Custom Decoder. I also tried changing crop size from 0 to 128 to 256 to 512 but in vain.
Here is the code that is being run in models.py for image segmentation. I tried using the set_trace() method to debug the code. The code stops running just before the (pred, pred_quad) = self.decoder(fmap, labels_scaled) line in following code
class SegmentationModule(SegmentationModuleBase):
def __init__(self, net_enc, net_dec, crit, deep_sup_scale=None, quad_sup=False, running_avg_param=0.99):
super(SegmentationModule, self).__init__()
self.encoder = net_enc
self.decoder = net_dec
self.crit = crit
if deep_sup_scale:
if deep_sup_scale < 0:
self.adapt_weights = True
self.running_avg_param = running_avg_param
deep_sup_scale = 1
else:
self.adapt_weights = False
self.loss_weights = [(deep_sup_scale**(i+1)) for i in range(5)]
self.quad_sup = quad_sup
def forward(self, feed_dict, *, segSize=None):
inputs = feed_dict['img_data'].cuda()
if segSize is None: # training
labels_orig_scale = feed_dict['seg_label_0'].cuda()
labels_scaled = []
fmap = self.encoder(inputs, return_feature_maps=True)
if self.quad_sup:
labels_scaled.append(feed_dict['seg_label_1'].cuda())
labels_scaled.append(feed_dict['seg_label_2'].cuda())
labels_scaled.append(feed_dict['seg_label_3'].cuda())
labels_scaled.append(feed_dict['seg_label_4'].cuda())
labels_scaled.append(feed_dict['seg_label_5'].cuda())
(pred, pred_quad) = self.decoder(fmap, labels_scaled)
else:
pred = self.decoder(fmap)
loss = self.crit(pred, labels_orig_scale)
if self.quad_sup:
loss_orig = loss
for i in range(len(pred_quad)):
loss_quad = self.crit(pred_quad[i], labels_scaled[i])
loss = loss + loss_quad * self.loss_weights[i]
if self.adapt_weights:
self.loss_weights[i] = self.running_avg_param * self.loss_weights[i] + \
(1 - self.running_avg_param) * (loss_quad/loss_orig).data.cpu().numpy()
acc = self.pixel_acc(pred, labels_orig_scale, self.quad_sup)
return loss, acc
else: # inference
if 'qtree' in feed_dict:
labels_scaled = [feed_dict['qtree'][l].cuda() for l in range(1,6)]
else:
labels_scaled = None
pred = self.decoder(self.encoder(inputs, return_feature_maps=True), labels_scaled, segSize=segSize)
return pred
So here is how I resolved the error.
Disabled the skip connections. Not sure why the decoder doesnt work with skip connections.
I had given incorrect layers [6, 3, 4, 3] input to Decoder. i.e for Resnet18 the layers should be [2,2,2,2]
I keep getting this error:
Traceback (most recent call last):
File "/Users/robbymoseley/Desktop/IOP/IOP-ML/Model/IOP_model.py", line 201, in <module>
logps = model(images)
File "/Applications/Python Virtual Environment/lib/python3.7/site-packages/torch/nn/modules/module.py", line 889, in _call_impl
result = self.forward(*input, **kwargs)
File "/Users/robbymoseley/Desktop/IOP/IOP-ML/Model/IOP_model.py", line 95, in forward
x = self.pool(F.relu(self.conv1(x)))
File "/Applications/Python Virtual Environment/lib/python3.7/site-packages/torch/nn/modules/module.py", line 889, in _call_impl
result = self.forward(*input, **kwargs)
File "/Applications/Python Virtual Environment/lib/python3.7/site-packages/torch/nn/modules/conv.py", line 399, in forward
return self._conv_forward(input, self.weight, self.bias)
File "/Applications/Python Virtual Environment/lib/python3.7/site-packages/torch/nn/modules/conv.py", line 396, in _conv_forward
self.padding, self.dilation, self.groups)
RuntimeError: Expected 4-dimensional input for 4-dimensional weight [6, 1, 5, 5], but got 2-dimensional input of size [10, 307200] instead
This is the model I am trying to use:
class Net(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(1, 6, 5)
# we use the maxpool multiple times, but define it once
self.pool = nn.MaxPool2d(2, 2)
# in_channels = 6 because self.conv1 output 6 channel
self.conv2 = nn.Conv2d(6, 16, 5)
# 5*5 comes from the dimension of the last convnet layer
self.fc1 = nn.Linear(input_size, hidden_sizes[0])
self.fc2 = nn.Linear(hidden_sizes[0], hidden_sizes[1])
self.fc3 = nn.Linear(hidden_sizes[1], output_size)
def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = x.view(-1, 16 * 5 * 5)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x) # no activation on final layer
return x
Input images are 640x480.
Here is my main function as well:
if __name__ == __main__:
if torch.cuda.is_available():
device = torch.device("cuda:0")
else:
device = torch.device("cpu")
model = Net().to(device)
criterion = nn.NLLLoss()
images, labels = next(iter(train_loader))
# images = images.view(images.shape[0], -1)
print(images.shape)
images = images.view(images.shape[0], -1)
logps = model(images)
loss = criterion(logps, labels) # calculate the NLL loss
# training process
optimizer = optim.SGD(model.parameters(), lr=0.003, momentum=0.9)
loss_fn = nn.NLLLoss()
# train the model
train_model(num_epochs, model)
# validate the model
validate_model(model)
print("Execution has finished")
How can I adjust either the expected input dimensions or the weights? Or can I change the dimensions of my input to properly reflect the structure of my model? If so, how can I do this?
based off the convolutional part i did notice the problem, where your final pooling layer out channel was not calculated correctly
The following is how the code should work based off your input size that you mentioned 640x480x1
import torch
import torch.nn as nn
import torch.nn.functional as F
class Model(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(1, 6, 5)
# we use the maxpool multiple times, but define it once
self.pool = nn.MaxPool2d(2, 2)
# in_channels = 6 because self.conv1 output 6 channel
self.conv2 = nn.Conv2d(6, 16, 5)
# 5*5 comes from the dimension of the last convnet layer
self.fc1 = nn.Linear(16 * 157 * 117, 256) # You didnt provide the numbers here but i did calculate the in channels based off the prev layer
self.fc2 = nn.Linear(256, 10)
self.fc3 = nn.Linear(10, 2)
def forward(self, x):
x = self.conv1(x)
print('Conv1 Shape: {}'.format(x.shape))
x = self.pool(F.relu(x))
print('Pool1 Shape: {}'.format(x.shape))
x = self.conv2(x)
print('Conv2 Shape: {}'.format(x.shape))
x = self.pool(F.relu(x))
print('Pool2 Shape: {}'.format(x.shape))
x = x.view(-1, 16 * 157 * 117)
print('Flatten Shape: {}'.format(x.shape))
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x) # no activation on final layer
return x
model = Model()
model(torch.randn(1, 1, 640, 480)) # 1 Channel since your first conv layer input inchannel is 1
Output shape for each layer
model(torch.randn(1, 1, 640, 480))
Conv1 Shape: torch.Size([1, 6, 636, 476])
Pool1 Shape: torch.Size([1, 6, 318, 238])
Conv2 Shape: torch.Size([1, 16, 314, 234])
Pool2 Shape: torch.Size([1, 16, 157, 117])
Flatten Shape: torch.Size([1, 293904])
I am working on a program that subtracts using two images from MNIST. In doing so, we are using an algorithm that combines CNN and RNN.
enter image description here
class CRNN_Model(tf.keras.Model):
def __init__(self):
super(CRNN_Model, self).__init__()
self.hidden_size = 256
self.batch_size = 128
self.sequence_size = 2
self.output_size = 19
self.img_size = 28
#cnn
self.reshape = tf.keras.layers.Reshape((10000, 28, 28, 1))
self.conv1 = tf.keras.layers.Conv2D(16, 3, padding='same', activation='relu', input_shape=(28, 28, 1))
self.maxpooling1 = tf.keras.layers.MaxPooling2D()
self.conv2 = tf.keras.layers.Conv2D(32, 3, padding='same', activation='relu')
self.maxpooling2 = tf.keras.layers.MaxPooling2D()
self.flatten1 = tf.keras.layers.Flatten()
self.hidden = tf.keras.layers.Dense(self.hidden_size, activation='relu')
#rnn
self.rnn_cell = tf.keras.layers.SimpleRNNCell(self.hidden_size, activation=None)
def call(self, x):
#cnn
hidden_list = []
for i in range(2):
x = x[0][i]
x = self.conv1(x)
x = self.maxpooling1(x)
x = self.conv2(x)
x = self.maxpooling2(x)
x = self.flatten1(x)
x = self.hidden(x)
hidden_list.append(x)
self.hidden_list = tf.transpose(hidden_list, perm=[1, 0, 2])
#rnn
self.initial_state = self.rnn_cell.zero_state(self.batch_size, tf_float32)
state = self.initial_state
outputs = []
for t in range(self.sequence_size):
(output, state) = self.rnn_cell(self.hidden_list[:, t, :], state)
outputs.append(output)
self.outputs = outputs
self.prediction = tf.keras.layers.Dense(self.outputs[-1], self.output_size)
self.pred_output = tf.keras.activations.softmax(self.prediction)
return self.pred_output
However, an error occurs at the input to the first conv1 layer.
ValueError: Input 0 of layer conv2d_111 is incompatible with the layer: : expected min_ndim=4, found ndim=3. Full shape received: (28, 28, 1)
The shape of the data we are giving is as follows.
print(np.shape(train_x))
print(np.shape(train_y))
print(np.shape(test_x))
print(np.shape(test_y))
(10000, 2, 28, 28, 1)
→(number of data * number of image * width * height * channel)
(10000, 1)
(2000, 2, 28, 28, 1)
(2000, 1)
How do I give the data?
I try to build a model that looks like a encoder-decoder model, but the difference is that I use the same LSTM model in both for loops.
To merge hidden states from t-1 and t-2, I built a middle layer called MyLayer. But following error always raises:
ValueError: Graph disconnected: cannot obtain value for tensor Tensor("lstm_layer_18/Identity_1:0", shape=(8, 128), dtype=float32) at layer "my_layer". The following previous layers were accessed without issue: ['input_1', 'embedding', 'input_3', 'input_2', 'lambda1', 'reshape1']
I tried whole afternoon debugging, but couldn't figured out.
I find most of answers is about avoiding Input layer names for intermediate parameters. But this method does not work for me.
I try to print out "lstm_layer_18/Identity_1:0" in the error, which is the state_h in the second for loop. But I can not fix it. Can anyone help me find error? My code is showed down below.
class MyLayer(Layer):
def __init__(self, output_dim, **kwargs):
self.output_dim = output_dim
super(MyLayer, self).__init__(**kwargs)
def build(self, input_shape):
assert isinstance(input_shape, list)
# 为该层创建一个可训练的权重
self.kernel_1 = self.add_weight(name='kernel_1',
shape=input_shape[0],
initializer='uniform',
trainable=True)
self.kernel_2 = self.add_weight(name='kernel_2',
shape=input_shape[1],
initializer='uniform',
trainable=True)
super(MyLayer, self).build(input_shape) # 一定要在最后调用它
def call(self, v):
assert isinstance(v, list)
a, b = v
return Add()([Multiply()([a, self.kernel_1]),
Multiply()([b, self.kernel_2])])
def compute_output_shape(self, input_shape):
assert isinstance(input_shape, list)
shape_a, shape_b = input_shape
assert shape_a == shape_b
return shape_a
def cf_lstm_model(input_step, output_step, features=2, embed_dim=64, lstm_dim=128, batch_size=8):
# input_shape:(batch_size, input_step, features)
x = Input(batch_shape=(batch_size, input_step, features))
h_1 = Input(batch_shape=(batch_size, lstm_dim))
h_2 = Input(batch_shape=(batch_size, lstm_dim))
c = Input(batch_shape=(batch_size, lstm_dim))
state_c = c
embedding = Dense(embed_dim, activation='relu', name='embedding')
inverse_embedding = Dense(features, activation='relu', name='inverse_embedding')
lstm_layer = LSTM(lstm_dim, activation='relu', return_state=True, name='lstm_layer')
cascade = MyLayer(lstm_dim)
# decoder = LSTM(lstm_dim, activation='relu', return_state=True, name='decoder')
# embeding ---> (batch_size, input_step, embed_dim)
emb_x = embedding(x)
# cascade_input
cascade_input = [h_2, h_1]
# Observe
for i in range(input_step - 1):
h_star = cascade(cascade_input)
m = Lambda(lambda xx: xx[:, i, :], name='lambda1')(emb_x)
m1 = Reshape((1, embed_dim), name='reshape1')(m)
lstm1, state_h, state_c = lstm_layer(m1, [h_star, state_c])
cascade_input.pop()
cascade_input.append(state_h)
out = Lambda(lambda xx: xx[:, input_step - 1, :], name='lambda2')(emb_x)
out = Reshape((1, embed_dim), name='reshape2')(out)
# Predict
for j in range(output_step):
h_star = cascade(cascade_input)
out, state_h, state_c = lstm_layer(out, [h_star, state_c])
# print('state_h', state_h)
print('state_c', state_c)
out = inverse_embedding(out)
if j == 0:
all_outputs = Reshape((1, features), name='reshape3')(out)
# print('output shape:', out.shape, K.int_shape(out))
else:
# 注意axis的纬度
all_outputs = K.concatenate([all_outputs, Reshape((1, features))(out)], axis=1)
# print('output shape:', K.int_shape(outputs))
cascade_input.pop()
cascade_input.append(state_h)
out = embedding(out)
out = Reshape((1, embed_dim), name='reshape4')(out)
# print('out', out.shape)
return Model(inputs=[x, h_1, h_2, c], outputs=all_outputs)
cf_lstm = cf_lstm_model(input_step=8,
output_step=12,
embed_dim=64,
lstm_dim=128)
Error Feedback
state_c Tensor("lstm_layer_7/Identity_2:0", shape=(8, 128), dtype=float32)
state_c Tensor("lstm_layer_8/Identity_2:0", shape=(8, 128), dtype=float32)
state_c Tensor("lstm_layer_9/Identity_2:0", shape=(8, 128), dtype=float32)
state_c Tensor("lstm_layer_10/Identity_2:0", shape=(8, 128), dtype=float32)
state_c Tensor("lstm_layer_11/Identity_2:0", shape=(8, 128), dtype=float32)
state_c Tensor("lstm_layer_12/Identity_2:0", shape=(8, 128), dtype=float32)
state_c Tensor("lstm_layer_13/Identity_2:0", shape=(8, 128), dtype=float32)
state_c Tensor("lstm_layer_14/Identity_2:0", shape=(8, 128), dtype=float32)
state_c Tensor("lstm_layer_15/Identity_2:0", shape=(8, 128), dtype=float32)
state_c Tensor("lstm_layer_16/Identity_2:0", shape=(8, 128), dtype=float32)
state_c Tensor("lstm_layer_17/Identity_2:0", shape=(8, 128), dtype=float32)
state_c Tensor("lstm_layer_18/Identity_2:0", shape=(8, 128), dtype=float32)
Traceback (most recent call last):
File "/Users/duoshaoshishi/Desktop/Pedestrian prediction/CF-lstm/model.py", line 105, in <module>
cf_lstm = cf_lstm_model(input_step=8,
File "/Users/duoshaoshishi/Desktop/Pedestrian prediction/CF-lstm/model.py", line 102, in cf_lstm_model
return Model(inputs=[x, h_1, h_2, c], outputs=all_outputs)
File "/Users/duoshaoshishi/opt/anaconda3/envs/tf2/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py", line 167, in __init__
super(Model, self).__init__(*args, **kwargs)
File "/Users/duoshaoshishi/opt/anaconda3/envs/tf2/lib/python3.8/site-packages/tensorflow/python/keras/engine/network.py", line 173, in __init__
self._init_graph_network(*args, **kwargs)
File "/Users/duoshaoshishi/opt/anaconda3/envs/tf2/lib/python3.8/site-packages/tensorflow/python/training/tracking/base.py", line 456, in _method_wrapper
result = method(self, *args, **kwargs)
File "/Users/duoshaoshishi/opt/anaconda3/envs/tf2/lib/python3.8/site-packages/tensorflow/python/keras/engine/network.py", line 306, in _init_graph_network
nodes, nodes_by_depth, layers, _ = _map_graph_network(
File "/Users/duoshaoshishi/opt/anaconda3/envs/tf2/lib/python3.8/site-packages/tensorflow/python/keras/engine/network.py", line 1787, in _map_graph_network
raise ValueError('Graph disconnected: '
ValueError: Graph disconnected: cannot obtain value for tensor Tensor("lstm_layer_18/Identity_1:0", shape=(8, 128), dtype=float32) at layer "my_layer". The following previous layers were accessed without issue: ['input_1', 'embedding', 'input_3', 'input_2', 'lambda1', 'reshape1']
I have a model created by training with a dataset. Now I want to re-train the model by put some more images in that dataset with out deleting the already created model. I have some images which are slightly different from the images already exists in the dataset. The colour is the only different. So I want to train the model using these new images also. So how can I train the model again using new images without deleting the alredy trained data. I use Tensorflow in Python. How can I re-train the model?
import math
import numpy as np
import tensorflow as tf
from enum import Enum, unique
#unique
class InputType(Enum):
TENSOR = 1
BASE64_JPEG = 2
class OpenNsfwModel:
"""Tensorflow implementation of Yahoo's Open NSFW Model
Original implementation:
https://github.com/yahoo/open_nsfw
Weights have been converted using caffe-tensorflow:
https://github.com/ethereon/caffe-tensorflow
"""
def __init__(self):
self.weights = {}
self.bn_epsilon = 1e-5 # Default used by Caffe
def build(self, weights_path="open_nsfw-weights.npy",
input_type=InputType.TENSOR):
self.weights = np.load(weights_path, encoding="latin1").item()
self.input_tensor = None
if input_type == InputType.TENSOR:
self.input = tf.placeholder(tf.float32,
shape=[None, 224, 224, 3],
name="input")
self.input_tensor = self.input
elif input_type == InputType.BASE64_JPEG:
from image_utils import load_base64_tensor
self.input = tf.placeholder(tf.string, shape=(None,), name="input")
self.input_tensor = load_base64_tensor(self.input)
else:
raise ValueError("invalid input type '{}'".format(input_type))
x = self.input_tensor
x = tf.pad(x, [[0, 0], [3, 3], [3, 3], [0, 0]], 'CONSTANT')
x = self.__conv2d("conv_1", x, filter_depth=64,
kernel_size=7, stride=2, padding='valid')
x = self.__batch_norm("bn_1", x)
x = tf.nn.relu(x)
x = tf.layers.max_pooling2d(x, pool_size=3, strides=2, padding='same')
x = self.__conv_block(stage=0, block=0, inputs=x,
filter_depths=[32, 32, 128],
kernel_size=3, stride=1)
x = self.__identity_block(stage=0, block=1, inputs=x,
filter_depths=[32, 32, 128], kernel_size=3)
x = self.__identity_block(stage=0, block=2, inputs=x,
filter_depths=[32, 32, 128], kernel_size=3)
x = self.__conv_block(stage=1, block=0, inputs=x,
filter_depths=[64, 64, 256],
kernel_size=3, stride=2)
x = self.__identity_block(stage=1, block=1, inputs=x,
filter_depths=[64, 64, 256], kernel_size=3)
x = self.__identity_block(stage=1, block=2, inputs=x,
filter_depths=[64, 64, 256], kernel_size=3)
x = self.__identity_block(stage=1, block=3, inputs=x,
filter_depths=[64, 64, 256], kernel_size=3)
x = self.__conv_block(stage=2, block=0, inputs=x,
filter_depths=[128, 128, 512],
kernel_size=3, stride=2)
x = self.__identity_block(stage=2, block=1, inputs=x,
filter_depths=[128, 128, 512], kernel_size=3)
x = self.__identity_block(stage=2, block=2, inputs=x,
filter_depths=[128, 128, 512], kernel_size=3)
x = self.__identity_block(stage=2, block=3, inputs=x,
filter_depths=[128, 128, 512], kernel_size=3)
x = self.__identity_block(stage=2, block=4, inputs=x,
filter_depths=[128, 128, 512], kernel_size=3)
x = self.__identity_block(stage=2, block=5, inputs=x,
filter_depths=[128, 128, 512], kernel_size=3)
x = self.__conv_block(stage=3, block=0, inputs=x,
filter_depths=[256, 256, 1024], kernel_size=3,
stride=2)
x = self.__identity_block(stage=3, block=1, inputs=x,
filter_depths=[256, 256, 1024],
kernel_size=3)
x = self.__identity_block(stage=3, block=2, inputs=x,
filter_depths=[256, 256, 1024],
kernel_size=3)
x = tf.layers.average_pooling2d(x, pool_size=7, strides=1,
padding="valid", name="pool")
x = tf.reshape(x, shape=(-1, 1024))
self.logits = self.__fully_connected(name="fc_nsfw",
inputs=x, num_outputs=2)
self.predictions = tf.nn.softmax(self.logits, name="predictions")
"""Get weights for layer with given name
"""
def __get_weights(self, layer_name, field_name):
if not layer_name in self.weights:
raise ValueError("No weights for layer named '{}' found"
.format(layer_name))
w = self.weights[layer_name]
if not field_name in w:
raise (ValueError("No entry for field '{}' in layer named '{}'"
.format(field_name, layer_name)))
return w[field_name]
"""Layer creation and weight initialization
"""
def __fully_connected(self, name, inputs, num_outputs):
return tf.layers.dense(
inputs=inputs, units=num_outputs, name=name,
kernel_initializer=tf.constant_initializer(
self.__get_weights(name, "weights"), dtype=tf.float32),
bias_initializer=tf.constant_initializer(
self.__get_weights(name, "biases"), dtype=tf.float32))
def __conv2d(self, name, inputs, filter_depth, kernel_size, stride=1,
padding="same", trainable=False):
if padding.lower() == 'same' and kernel_size > 1:
if kernel_size > 1:
oh = inputs.get_shape().as_list()[1]
h = inputs.get_shape().as_list()[1]
p = int(math.floor(((oh - 1) * stride + kernel_size - h)//2))
inputs = tf.pad(inputs,
[[0, 0], [p, p], [p, p], [0, 0]],
'CONSTANT')
else:
raise Exception('unsupported kernel size for padding: "{}"'
.format(kernel_size))
return tf.layers.conv2d(
inputs, filter_depth,
kernel_size=(kernel_size, kernel_size),
strides=(stride, stride), padding='valid',
activation=None, trainable=trainable, name=name,
kernel_initializer=tf.constant_initializer(
self.__get_weights(name, "weights"), dtype=tf.float32),
bias_initializer=tf.constant_initializer(
self.__get_weights(name, "biases"), dtype=tf.float32))
def __batch_norm(self, name, inputs, training=False):
return tf.layers.batch_normalization(
inputs, training=training, epsilon=self.bn_epsilon,
gamma_initializer=tf.constant_initializer(
self.__get_weights(name, "scale"), dtype=tf.float32),
beta_initializer=tf.constant_initializer(
self.__get_weights(name, "offset"), dtype=tf.float32),
moving_mean_initializer=tf.constant_initializer(
self.__get_weights(name, "mean"), dtype=tf.float32),
moving_variance_initializer=tf.constant_initializer(
self.__get_weights(name, "variance"), dtype=tf.float32),
name=name)
"""ResNet blocks
"""
def __conv_block(self, stage, block, inputs, filter_depths,
kernel_size=3, stride=2):
filter_depth1, filter_depth2, filter_depth3 = filter_depths
conv_name_base = "conv_stage{}_block{}_branch".format(stage, block)
bn_name_base = "bn_stage{}_block{}_branch".format(stage, block)
shortcut_name_post = "_stage{}_block{}_proj_shortcut" \
.format(stage, block)
shortcut = self.__conv2d(
name="conv{}".format(shortcut_name_post), stride=stride,
inputs=inputs, filter_depth=filter_depth3, kernel_size=1,
padding="same"
)
shortcut = self.__batch_norm("bn{}".format(shortcut_name_post),
shortcut)
x = self.__conv2d(
name="{}2a".format(conv_name_base),
inputs=inputs, filter_depth=filter_depth1, kernel_size=1,
stride=stride, padding="same",
)
x = self.__batch_norm("{}2a".format(bn_name_base), x)
x = tf.nn.relu(x)
x = self.__conv2d(
name="{}2b".format(conv_name_base),
inputs=x, filter_depth=filter_depth2, kernel_size=kernel_size,
padding="same", stride=1
)
x = self.__batch_norm("{}2b".format(bn_name_base), x)
x = tf.nn.relu(x)
x = self.__conv2d(
name="{}2c".format(conv_name_base),
inputs=x, filter_depth=filter_depth3, kernel_size=1,
padding="same", stride=1
)
x = self.__batch_norm("{}2c".format(bn_name_base), x)
x = tf.add(x, shortcut)
return tf.nn.relu(x)
def __identity_block(self, stage, block, inputs,
filter_depths, kernel_size):
filter_depth1, filter_depth2, filter_depth3 = filter_depths
conv_name_base = "conv_stage{}_block{}_branch".format(stage, block)
bn_name_base = "bn_stage{}_block{}_branch".format(stage, block)
x = self.__conv2d(
name="{}2a".format(conv_name_base),
inputs=inputs, filter_depth=filter_depth1, kernel_size=1,
stride=1, padding="same",
)
x = self.__batch_norm("{}2a".format(bn_name_base), x)
x = tf.nn.relu(x)
x = self.__conv2d(
name="{}2b".format(conv_name_base),
inputs=x, filter_depth=filter_depth2, kernel_size=kernel_size,
padding="same", stride=1
)
x = self.__batch_norm("{}2b".format(bn_name_base), x)
x = tf.nn.relu(x)
x = self.__conv2d(
name="{}2c".format(conv_name_base),
inputs=x, filter_depth=filter_depth3, kernel_size=1,
padding="same", stride=1
)
x = self.__batch_norm("{}2c".format(bn_name_base), x)
x = tf.add(x, inputs)
return tf.nn.relu(x)
You can save your trained model. When you get new images in your dataset, reload the saved model and continue training from that point onwards.
Refer Tensorflow docs: here
Check out SO how to save and restore a model in tensorflow