I'm having a problem implementing a super-resolution model
class SRNet(Model):
def __init__(self, scale=4):
super(SRNet, self).__init__()
self.scale = scale
self.conv1 = Sequential([
layers.Conv2D(filters=64, kernel_size=3,
strides=(1, 1), padding="same", data_format="channels_first"),
layers.ReLU(),
])
self.residualBlocks = Sequential(
[ResidualBlock() for _ in range(16)])
self.convUp = Sequential([
layers.Conv2DTranspose(filters=64, kernel_size=3, strides=(
2, 2), padding="same", data_format="channels_first"),
layers.ReLU(),
layers.Conv2DTranspose(filters=64, kernel_size=3, strides=(
2, 2), padding="same", data_format="channels_first"),
layers.ReLU(),
])
self.reluAfterPixleShuffle = layers.ReLU()
self.convOut = layers.Conv2D(
filters=3, kernel_size=3, strides=(1, 1), padding="same", data_format="channels_first", input_shape=(4, 1440, 2560)) # (kernel, kernel, channel, output)
def call(self, lrCur_hrPrevTran):
lrCur, hrPrevTran = lrCur_hrPrevTran
x = tf.concat([lrCur, hrPrevTran], axis=1)
x = self.conv1(x)
x = self.residualBlocks(x)
x = self.convUp(x)
# pixel shuffle
Subpixel_layer = Lambda(lambda x: tf.nn.depth_to_space(
x, self.scale, data_format="NCHW"))
x = Subpixel_layer(inputs=x)
x = self.reluAfterPixleShuffle(x)
x = self.convOut(x)
return x
Error
/usr/src/app/generator.py:164 call *
x = self.convOut(x)
ValueError: Tensor's shape (3, 3, 64, 3) is not compatible with supplied shape (3, 3, 4, 3)
after reading the error I know that (3, 3, 4, 3) is (kernel size, kernel size, channel, output) mean that only channel of input is not correct
so I printed out the shape of the input
# after pixel shuffle before convOut
print(x.shape)
>>> (1, 4, 1440, 2560) (batch size, channel, height, width)
but the shape of x after pixel shuffle (depth_to_space) is (1, 4, 1440, 2560) the channel value is 4 which is the same as convOut need
question is why the input's channel is changing from 4 to 64 as the error?
I have found a solution
First of all, I'm using checkpoints to save model weight when training
during the implementation and testing of the model, I have changed some of the layers so the input size is changed too, but my weight still remember the input size from the previous checkpoint
so I delete the checkpoints folder and then everything works again
Related
I wanna implement text2image neural networks like the image below: Please see the image
using CNNs and Transposed CNNs with Embedding layer
import torch
from torch import nn
Input text :
text = "A cat wearing glasses and playing the guitar "
# Simple preprocessing the text
word_to_ix = {"A": 0, "cat": 1, "wearing": 2, "glasses": 3, "and": 4, "playing": 5, "the": 6, "guitar":7}
lookup_tensor = torch.tensor(list(word_to_ix.values()), dtype = torch.long) # a tensor representing words by integers
vocab_size = len(lookup_tensor)
architecture implementation :
class TextToImage(nn.Module):
def __init__(self, vocab_size):
super(TextToImage, self).__init__()
self.vocab_size = vocab_size
self.noise = torch.rand((56,64))
# DEFINE the layers
# Embedding
self.embed = nn.Embedding(num_embeddings=self.vocab_size, embedding_dim = 64)
# Conv
self.conv2d_1 = nn.Conv2d(in_channels=64, out_channels=3, kernel_size=(3, 3), stride=(2, 2), padding='valid')
self.conv2d_2 = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=(3, 3), stride=(2, 2), padding='valid')
# Transposed CNNs
self.conv2dTran_1 = nn.ConvTranspose2d(in_channels=16, out_channels=16, kernel_size=(3, 3), stride=(1, 1), padding=1)
self.conv2dTran_2 = nn.ConvTranspose2d(in_channels=16, out_channels=3, kernel_size=(3, 3), stride=(2, 2), padding=0)
self.conv2dTran_3 = nn.ConvTranspose2d(in_channels=6, out_channels=3, kernel_size=(4, 4), stride=(2, 2), padding=0)
self.relu = torch.nn.ReLU(inplace=False)
self.dropout = torch.nn.Dropout(0.4)
def forward(self, text_tensor):
#SEND the input text tensor to the embedding layer
emb = self.embed(text_tensor)
#COMBINE the embedding with the noise tensor. Make it have 3 dimensions
combine1 = torch.cat((emb, self.noise), dim=1, out=None)
#SEND the noisy embedding to the convolutional and transposed convolutional layers
conv2d_1 = self.conv2d_1(combine1)
conv2d_2 = self.conv2d_2(conv2d_1)
dropout = self.dropout(conv2d_2)
conv2dTran_1 = self.conv2dTran_1(dropout)
conv2dTran_2 = self.conv2dTran_2(conv2dTran_1)
#COMBINE the outputs having a skip connection in the image of the architecture
combine2 = torch.cat((conv2d_1, conv2dTran_2), dim=1, out=None)
conv2dTran_3 = self.conv2dTran_3(combine2)
#SEND the combined outputs to the final layer. Please name your final output variable as "image" so you that it can be returned
image = self.relu(conv2dTran_3)
return image
Expected output
torch.Size( [3, 64, 64] )
texttoimage = TextToImage(vocab_size=vocab_size)
output = texttoimage(lookup_tensor)
output.size()
Generated random noisy image :
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
plt.imshow(np.moveaxis(output.detach().numpy(), 0,-1))
The error I got :
RuntimeError: Sizes of tensors must match except in dimension 1. Expected size 8 but got size 56 for tensor number 1 in the list.
Does anyone how to solve this issue I think it from concatenate nosey with embedding
After changing dim = 0
and expand to 3 dim
In addition there was issue in Input channel for first Conv_1 where changed from 64 to 1
class TextToImage(nn.Module):
def __init__(self, vocab_size):
super(TextToImage, self).__init__()
self.vocab_size = vocab_size
self.noise = torch.rand((56,64))
# DEFINE the layers
# Embedding
self.embed = nn.Embedding(num_embeddings=self.vocab_size, embedding_dim = 64)
# Conv
self.conv2d_1 = nn.Conv2d(in_channels=1, out_channels=3, kernel_size=(3, 3), stride=(2, 2), padding='valid')
self.conv2d_2 = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=(3, 3), stride=(2, 2), padding='valid')
# Transposed CNNs
self.conv2dTran_1 = nn.ConvTranspose2d(in_channels=16, out_channels=16, kernel_size=(3, 3), stride=(1, 1), padding=1)
self.conv2dTran_2 = nn.ConvTranspose2d(in_channels=16, out_channels=3, kernel_size=(3, 3), stride=(2, 2), padding=0)
self.conv2dTran_3 = nn.ConvTranspose2d(in_channels=6, out_channels=3, kernel_size=(4, 4), stride=(2, 2), padding=0)
self.relu = torch.nn.ReLU(inplace=False)
self.dropout = torch.nn.Dropout(0.4)
def forward(self, text_tensor):
#SEND the input text tensor to the embedding layer
emb = self.embed(text_tensor)
#COMBINE the embedding with the noise tensor. Make it have 3 dimensions
combined = torch.cat((emb, self.noise), dim=0) #, out=None
print(combined.shape)
combined_3d = combined[None, :]
print(combined_3d.shape)
# SEND the noisy embedding to the convolutional and transposed convolutional layers
conv2d_1 = self.conv2d_1(combined_3d)
conv2d_2 = self.conv2d_2(conv2d_1)
dropout = self.dropout(conv2d_2)
conv2dTran_1 = self.conv2dTran_1(dropout)
conv2dTran_2 = self.conv2dTran_2(conv2dTran_1)
#COMBINE the outputs having a skip connection in the image of the architecture
combined_2 = torch.cat((conv2d_1, conv2dTran_2),axis=0) #dim=1, out=None
conv2dTran_3 = self.conv2dTran_3(combined_2)
#SEND the combined outputs to the final layer. Please name your final output variable as "image" so you that it can be returned
image = self.relu(conv2dTran_3)
return image
The cat function requires the tensor shapes to match aside from the dimension you're concatenating, so to concatenate (8,64) and (56,64) your cat should look like this using dim 0 instead of 1:
combine1 = torch.cat((emb, self.noise), dim=0, out=None)
After that, I'm not seeing where you give combine1 a 3rd dimension.
I'm trying to convert pytorch model to tf.keras model including weights conversion and came across an output missmatch between libraries' outputs.
Here I define two convolutional layers, which should be identical
torch_layer = torch.nn.Conv2d(
in_channels=3,
out_channels=64,
kernel_size=(7, 7),
stride=(2, 2),
padding=(3, 3),
dilation=1,
groups=1,
bias=False,
padding_mode='zeros'
)
tf_layer = tf.keras.layers.Conv2D(
filters=64,
kernel_size=(7, 7),
strides=(2, 2),
padding='same',
dilation_rate=(1, 1),
groups=1,
activation=None,
use_bias=False
)
# define model to specify input channel size
tf_model = tf.keras.Sequential([tf.keras.layers.Input((256, 256, 3), batch_size=1), tf_layer])
now I have torch weights and I convert them to tf.keras format
# output_channels, input_channels, x, y
torch_weights = np.random.rand(64, 3, 7, 7)
# x, y, input_channels, output_channels
tf_weights = np.transpose(torch_weights, (2, 3, 1, 0))
# assign weights
torch_layer.weight = torch.nn.Parameter(torch.Tensor(torch_weights))
tf_model.layers[0].set_weights([tf_weights])
now I define input and the outputs are different (shape is the same, values are different), what am I doing wrong?
torch_inputs = np.random.rand(1, 3, 256, 256)
tf_inputs = np.transpose(torch_inputs, (0, 2, 3, 1))
torch_output = torch_layer(torch.Tensor(torch_inputs))
tf_output = tf_model.layers[0](tf_inputs)
In tensorflow, set_weights is basically used for outputs from get_weights, so it is better to use assign to avoid making mistakes.
Besides, 'same' padding in tensorflow is a little bit complicated. For details, see my SO answer. It depends on input_shape, kernel_size and strides. In your example here, it is translated to torch.nn.ZeroPad2d((2,3,2,3)) in pytorch.
Example codes: from tensorflow to pytorch
np.random.seed(88883)
#initialize the layers respectively
torch_layer = torch.nn.Conv2d(
in_channels=3,
out_channels=64,
kernel_size=(7, 7),
stride=(2, 2),
bias=False
)
torch_model = torch.nn.Sequential(
torch.nn.ZeroPad2d((2,3,2,3)),
torch_layer
)
tf_layer = tf.keras.layers.Conv2D(
filters=64,
kernel_size=(7, 7),
strides=(2, 2),
padding='same',
use_bias=False
)
#setting weights in torch layer and tf layer respectively
torch_weights = np.random.rand(64, 3, 7, 7)
tf_weights = np.transpose(torch_weights, (2, 3, 1, 0))
with torch.no_grad():
torch_layer.weight = torch.nn.Parameter(torch.Tensor(torch_weights))
tf_layer(np.zeros((1,256,256,3)))
tf_layer.kernel.assign(tf_weights)
#prepare inputs and do inference
torch_inputs = torch.Tensor(np.random.rand(1, 3, 256, 256))
tf_inputs = np.transpose(torch_inputs.numpy(), (0, 2, 3, 1))
with torch.no_grad():
torch_output = torch_model(torch_inputs)
tf_output = tf_layer(tf_inputs)
np.allclose(tf_output.numpy() ,np.transpose(torch_output.numpy(),(0, 2, 3, 1))) #True
Edit: from pytorch to tensorflow
torch_layer = torch.nn.Conv2d(
in_channels=3,
out_channels=64,
kernel_size=(7, 7),
stride=(2, 2),
padding=(3, 3),
bias=False
)
tf_layer=tf.keras.layers.Conv2D(
filters=64,
kernel_size=(7, 7),
strides=(2, 2),
padding='valid',
use_bias=False
)
tf_model = tf.keras.Sequential([
tf.keras.layers.ZeroPadding2D((3, 3)),
tf_layer
])
Working on a university exercise, I used the model sub-classing API of TF2.0. Here's my code (it's the Alexnet architecture, if you wonder...):
class MyModel(Model):
def __init__(self):
super(MyModel, self).__init__()
# OPS
self.relu = Activation('relu', name='ReLU')
self.maxpool = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding='valid', name='MaxPool')
self.softmax = Activation('softmax', name='Softmax')
# Conv layers
self.conv1 = Conv2D(filters=96, input_shape=(224, 224, 3), kernel_size=(11, 11), strides=(4, 4), padding='same',
name='conv1')
self.conv2a = Conv2D(filters=128, kernel_size=(5, 5), strides=(1, 1), padding='same', name='conv2a')
self.conv2b = Conv2D(filters=128, kernel_size=(5, 5), strides=(1, 1), padding='same', name='conv2b')
self.conv3 = Conv2D(filters=384, kernel_size=(3, 3), strides=(1, 1), padding='same', name='conv3')
self.conv4a = Conv2D(filters=192, kernel_size=(3, 3), strides=(1, 1), padding='same', name='conv4a')
self.conv4b = Conv2D(filters=192, kernel_size=(3, 3), strides=(1, 1), padding='same', name='conv4b')
self.conv5a = Conv2D(filters=128, kernel_size=(3, 3), strides=(1, 1), padding='same', name='conv5a')
self.conv5b = Conv2D(filters=128, kernel_size=(3, 3), strides=(1, 1), padding='same', name='conv5b')
# Fully-connected layers
self.flatten = Flatten()
self.dense1 = Dense(4096, input_shape=(100,), name='FC_4096_1')
self.dense2 = Dense(4096, name='FC_4096_2')
self.dense3 = Dense(1000, name='FC_1000')
# Network definition
def call(self, x, **kwargs):
x = self.conv1(x)
x = self.relu(x)
x = tf.nn.local_response_normalization(x, depth_radius=2, alpha=2e-05, beta=0.75, bias=1.0)
x = self.maxpool(x)
x = tf.concat((self.conv2a(x[:, :, :, :48]), self.conv2b(x[:, :, :, 48:])), 3)
x = self.relu(x)
x = tf.nn.local_response_normalization(x, depth_radius=2, alpha=2e-05, beta=0.75, bias=1.0)
x = self.maxpool(x)
x = self.conv3(x)
x = self.relu(x)
x = tf.concat((self.conv4a(x[:, :, :, :192]), self.conv4b(x[:, :, :, 192:])), 3)
x = self.relu(x)
x = tf.concat((self.conv5a(x[:, :, :, :192]), self.conv5b(x[:, :, :, 192:])), 3)
x = self.relu(x)
x = self.maxpool(x)
x = self.flatten(x)
x = self.dense1(x)
x = self.relu(x)
x = self.dense2(x)
x = self.relu(x)
x = self.dense3(x)
return self.softmax(x)
My goal is to access an arbitrary layer's output (in order to maximize a specific neuron's activation, if you have to know exactly :) ). The problem is that trying to access any layer's output, I get an attribute error. For example:
model = MyModel()
print(model.get_layer('conv1').output)
# => AttributeError: Layer conv1 has no inbound nodes.
I found some questions with this error here in SO, and all of them claim that I have to define the input shape in the first layer, but as you can see - it's already done (see the definition of self.conv1 in the __init__ function)!
I did find that if I define a keras.layers.Input object, I do manage to get the output of conv1, but trying to access deeper layers fails, for example:
model = MyModel()
I = tf.keras.Input(shape=(224, 224, 3))
model(I)
print(model.get_layer('conv1').output)
# prints Tensor("my_model/conv1/Identity:0", shape=(None, 56, 56, 96), dtype=float32)
print(model.get_layer('FC_1000').output)
# => AttributeError: Layer FC_1000 has no inbound nodes.
I googled every exception that I got on the way, but found no answer. How can I access any layer's input/output (or input/output _shape attributes, by the way) in this case?
In sub-classed model there is no graph of layers, it's just a piece of code (models call function). Layer connections are not defined while creating instance of Model class. Hence we need to build model first by calling call method.
Try this:
model = MyModel()
inputs = tf.keras.Input(shape=(224,224,3))
model.call(inputs)
# instead of model(I) in your code.
After doing this model graph is created.
for i in model.layers:
print(i.output)
# output
# Tensor("ReLU_7/Relu:0", shape=(?, 56, 56, 96), dtype=float32)
# Tensor("MaxPool_3/MaxPool:0", shape=(?, 27, 27, 96), dtype=float32)
# Tensor("Softmax_1/Softmax:0", shape=(?, 1000), dtype=float32)
# ...
Hello iam currently using the tensorflow.org Variational Autoencoder implementation.
https://www.tensorflow.org/tutorials/generative/cvae
I just tried to change the architecture in a way that it accepts batches of 6 images.
I tried to adapt the code by myslelf and just change the Conv2D to Conv3D but that did not really work.
The original images are batches of 6 x 299 x 299 OCT images.
I reshaped them to 64 x 64.
class CVAE(tf.keras.Model):
def __init__(self, latent_dim):
super(CVAE, self).__init__()
self.latent_dim = latent_dim
# defines an approximate posterior distribution q(z|x)
# outputs mean and log-variance of a factorized Gaussian
self.inference_net = tf.keras.Sequential(
[
tf.keras.layers.InputLayer(input_shape=(6, 64, 64, 1)), # (28, 28, 1)
tf.keras.layers.Conv3D(
filters=32, kernel_size=3, strides=(2, 2, 2), activation='relu'),
tf.keras.layers.Conv3D(
filters=64, kernel_size=3, strides=(2, 2, 2), activation='relu'),
tf.keras.layers.Flatten(),
# No activation
tf.keras.layers.Dense(latent_dim + latent_dim),
]
)
# outputs p(x|z)
self.generative_net = tf.keras.Sequential(
[
tf.keras.layers.InputLayer(input_shape=(latent_dim,)),
tf.keras.layers.Dense(units=6*16*16*32, activation=tf.nn.relu), # change with img_size (7,7,32)
tf.keras.layers.Reshape(target_shape=(6, 16, 16, 32)), # change with image size # (7,7,32)
tf.keras.layers.Conv3DTranspose(
filters=64,
kernel_size=3,
strides=(2, 2, 2),
padding="SAME",
activation='relu'),
tf.keras.layers.Conv3DTranspose(
filters=32,
kernel_size=3,
strides=(2, 2, 2),
padding="SAME",
activation='relu'),
# No activation
tf.keras.layers.Conv3DTranspose(
filters=1, kernel_size=3, strides=(1, 1, 1), padding="SAME"),
]
)
InvalidArgumentError: Negative dimension size caused by subtracting 3 from 2 for 'conv3d_5/Conv3D' (op: 'Conv3D') with input shapes: [?,2,13,13,32], [3,3,3,32,64].
def _parser(self, example_proto):
# define a dict with the data-names and types we
# expect to find
features = { 'image_raw': tf.io.FixedLenFeature([], tf.string) }
# parse the serialized data
parsed_features = tf.io.parse_single_example(example_proto, features)
# decode the raw bytes so it becomes a tensor with type
ima = tf.io.decode_raw(parsed_features['image_raw'], tf.float64)
ima = tf.reshape(ima, (6,299,299)) # (6,299,299)
## custom; ima is already a tensor
ima = tf.expand_dims(ima, -1) # (6,299,299,1)
ima = tf.image.resize(ima, (64, 64))
#ima = ima[0,:] # only 1st scan
#ima = tf.squeeze(ima)
#ima = tf.reshape(ima, (1,784)) #(6, 784)
ima = tf.cast(ima, 'float32')
# normalizing images
ima = ima / 255
print("Parser Format: {}" .format(ima))
return ima # (28, 28, 1)
Any kind of help is highly appreciated. I am kind of new to neural networks.
Thank you very much in advance.
My neural network receives a (1000, 1000, 5) shape array which undergoes convolution in one branch (5 stacked raster images) and a (12) shape array (just 12 numbers) which go through a couple of dense layers in a second branch.
The outputs are concatenated into a (31, 31, 65) shape tensor which then goes deconvolution into a final (1000, 1000) shape array.
My Issue:
I made my own simple loss function (mean error), because the output represents temperature in an area.
My issue is currently that my loss goes down significantly over 200 epochs (both loss and val_loss, from a small decimal to about -3) and the accuracy hovers around 0.002 the entire time .
I have changed the learning rate as low as 1e-5. I have given more samples to the training set (there aren't many samples to begin with unfortunately), increased (for fear of overfitting) and decreased (for lack of data) the batch size. All the input data is normalized to 0:1, which makes losses of anything beyond -1 unreasonable.
I am not sure whether I should use a different optimizer for this task, or different activation, or just remove a layer or two. But mostly I'd love to understand what is happening to make the model so unreliable.
I really tried to refrain from having to post the entire thing on here but I am officially out of ideas.
MLP Branch
dim = 12
inputs = Input(shape = (dim, ))
x = inputs
x = Dense(dim * 4, activation = 'relu')(x)
x = Dense(dim * 16, activation = 'relu')(x)
x = Dense(961, activation = 'relu')(x) # 961 nodes
x = Reshape((31, 31, 1))(x) # (31, 31, 1) array
model1 = Model(inputs, x)
Convolutional Branch
inputShape = (1000, 1000, 5)
chanDim = -1
inputs = Input(shape = inputShape)
x = inputs
# layer 1: conv, f = 8, pool = 2
x = Conv2D(8, (3, 3), padding = 'same', activation = 'relu')(x)
x = BatchNormalization(axis = chanDim)(x)
x = MaxPooling2D(pool_size = (2, 2))(x)
# layer 2: conv, f = 16, pool = 2
x = Conv2D(16, (3, 3), padding = 'same', activation = 'relu')(x)
x = BatchNormalization(axis = chanDim)(x)
x = MaxPooling2D(pool_size = (2, 2))(x)
# layer 3: conv, f = 32, pool = 2
x = Conv2D(32, (3, 3), padding = 'same', activation = 'relu')(x)
x = BatchNormalization(axis = chanDim)(x)
x = MaxPooling2D(pool_size = (2, 2))(x)
# layer 4: conv = 64, pool = 4
x = Conv2D(64, (3, 3), padding = 'same', activation = 'relu')(x)
x = BatchNormalization(axis = chanDim)(x)
x = MaxPooling2D(pool_size = (4, 4))(x)
model2 = Model(inputs, x)
Deconvolution
combinedInput = Concatenate()([model1.output, model2.output])
x = combinedInput # (31, 31, 65)
x = Conv2DTranspose(43, (3, 3), strides = (4, 4), padding = 'same', activation = 'relu')(x) # (124, 124, 43)
x = Conv2DTranspose(22, (3, 3), strides = (2, 2), padding = 'same', activation = 'relu')(x) # (248, 248, 22)
x = Lambda(lambda y: spatial_2d_padding(y))(x) # (250, 250, 22)
x = Conv2DTranspose(10, (3, 3), strides = (2, 2), padding = 'same', activation = 'relu')(x) # (500, 500, 10)
x = Conv2DTranspose(1, (3, 3), strides = (2, 2), padding = 'same', activation = 'linear')(x) # (1000, 1000, 1)
x = Lambda(lambda y: squeeze(y, axis = 3))(x) # (1000, 1000)
Compiling
def custom_loss(y_actual, y_predicted):
custom_loss_value = mean(y_actual - y_predicted)
return custom_loss_value
model = Model(inputs = [mlp.input, cnn.input], outputs = x)
model.compile(loss = custom_loss, optimizer = Adam(lr = 0.000001), metrics = ['mae'])
# train with epochs = 200, batch_size = 12
The Issue
As I explained above, my loss never stabilizes and the accuracy hovers roughly around the same number over the epochs.
I'd love to know possible reasons and possible solutions.
Edits:
Since writing this question I have attempted:
Transfering layers from the convolution branch to the deconvolution branch.
Adding BatchNormalization() after every Conv2DTranspose() layer.