Hello iam currently using the tensorflow.org Variational Autoencoder implementation.
https://www.tensorflow.org/tutorials/generative/cvae
I just tried to change the architecture in a way that it accepts batches of 6 images.
I tried to adapt the code by myslelf and just change the Conv2D to Conv3D but that did not really work.
The original images are batches of 6 x 299 x 299 OCT images.
I reshaped them to 64 x 64.
class CVAE(tf.keras.Model):
def __init__(self, latent_dim):
super(CVAE, self).__init__()
self.latent_dim = latent_dim
# defines an approximate posterior distribution q(z|x)
# outputs mean and log-variance of a factorized Gaussian
self.inference_net = tf.keras.Sequential(
[
tf.keras.layers.InputLayer(input_shape=(6, 64, 64, 1)), # (28, 28, 1)
tf.keras.layers.Conv3D(
filters=32, kernel_size=3, strides=(2, 2, 2), activation='relu'),
tf.keras.layers.Conv3D(
filters=64, kernel_size=3, strides=(2, 2, 2), activation='relu'),
tf.keras.layers.Flatten(),
# No activation
tf.keras.layers.Dense(latent_dim + latent_dim),
]
)
# outputs p(x|z)
self.generative_net = tf.keras.Sequential(
[
tf.keras.layers.InputLayer(input_shape=(latent_dim,)),
tf.keras.layers.Dense(units=6*16*16*32, activation=tf.nn.relu), # change with img_size (7,7,32)
tf.keras.layers.Reshape(target_shape=(6, 16, 16, 32)), # change with image size # (7,7,32)
tf.keras.layers.Conv3DTranspose(
filters=64,
kernel_size=3,
strides=(2, 2, 2),
padding="SAME",
activation='relu'),
tf.keras.layers.Conv3DTranspose(
filters=32,
kernel_size=3,
strides=(2, 2, 2),
padding="SAME",
activation='relu'),
# No activation
tf.keras.layers.Conv3DTranspose(
filters=1, kernel_size=3, strides=(1, 1, 1), padding="SAME"),
]
)
InvalidArgumentError: Negative dimension size caused by subtracting 3 from 2 for 'conv3d_5/Conv3D' (op: 'Conv3D') with input shapes: [?,2,13,13,32], [3,3,3,32,64].
def _parser(self, example_proto):
# define a dict with the data-names and types we
# expect to find
features = { 'image_raw': tf.io.FixedLenFeature([], tf.string) }
# parse the serialized data
parsed_features = tf.io.parse_single_example(example_proto, features)
# decode the raw bytes so it becomes a tensor with type
ima = tf.io.decode_raw(parsed_features['image_raw'], tf.float64)
ima = tf.reshape(ima, (6,299,299)) # (6,299,299)
## custom; ima is already a tensor
ima = tf.expand_dims(ima, -1) # (6,299,299,1)
ima = tf.image.resize(ima, (64, 64))
#ima = ima[0,:] # only 1st scan
#ima = tf.squeeze(ima)
#ima = tf.reshape(ima, (1,784)) #(6, 784)
ima = tf.cast(ima, 'float32')
# normalizing images
ima = ima / 255
print("Parser Format: {}" .format(ima))
return ima # (28, 28, 1)
Any kind of help is highly appreciated. I am kind of new to neural networks.
Thank you very much in advance.
Related
I wanna implement text2image neural networks like the image below: Please see the image
using CNNs and Transposed CNNs with Embedding layer
import torch
from torch import nn
Input text :
text = "A cat wearing glasses and playing the guitar "
# Simple preprocessing the text
word_to_ix = {"A": 0, "cat": 1, "wearing": 2, "glasses": 3, "and": 4, "playing": 5, "the": 6, "guitar":7}
lookup_tensor = torch.tensor(list(word_to_ix.values()), dtype = torch.long) # a tensor representing words by integers
vocab_size = len(lookup_tensor)
architecture implementation :
class TextToImage(nn.Module):
def __init__(self, vocab_size):
super(TextToImage, self).__init__()
self.vocab_size = vocab_size
self.noise = torch.rand((56,64))
# DEFINE the layers
# Embedding
self.embed = nn.Embedding(num_embeddings=self.vocab_size, embedding_dim = 64)
# Conv
self.conv2d_1 = nn.Conv2d(in_channels=64, out_channels=3, kernel_size=(3, 3), stride=(2, 2), padding='valid')
self.conv2d_2 = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=(3, 3), stride=(2, 2), padding='valid')
# Transposed CNNs
self.conv2dTran_1 = nn.ConvTranspose2d(in_channels=16, out_channels=16, kernel_size=(3, 3), stride=(1, 1), padding=1)
self.conv2dTran_2 = nn.ConvTranspose2d(in_channels=16, out_channels=3, kernel_size=(3, 3), stride=(2, 2), padding=0)
self.conv2dTran_3 = nn.ConvTranspose2d(in_channels=6, out_channels=3, kernel_size=(4, 4), stride=(2, 2), padding=0)
self.relu = torch.nn.ReLU(inplace=False)
self.dropout = torch.nn.Dropout(0.4)
def forward(self, text_tensor):
#SEND the input text tensor to the embedding layer
emb = self.embed(text_tensor)
#COMBINE the embedding with the noise tensor. Make it have 3 dimensions
combine1 = torch.cat((emb, self.noise), dim=1, out=None)
#SEND the noisy embedding to the convolutional and transposed convolutional layers
conv2d_1 = self.conv2d_1(combine1)
conv2d_2 = self.conv2d_2(conv2d_1)
dropout = self.dropout(conv2d_2)
conv2dTran_1 = self.conv2dTran_1(dropout)
conv2dTran_2 = self.conv2dTran_2(conv2dTran_1)
#COMBINE the outputs having a skip connection in the image of the architecture
combine2 = torch.cat((conv2d_1, conv2dTran_2), dim=1, out=None)
conv2dTran_3 = self.conv2dTran_3(combine2)
#SEND the combined outputs to the final layer. Please name your final output variable as "image" so you that it can be returned
image = self.relu(conv2dTran_3)
return image
Expected output
torch.Size( [3, 64, 64] )
texttoimage = TextToImage(vocab_size=vocab_size)
output = texttoimage(lookup_tensor)
output.size()
Generated random noisy image :
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
plt.imshow(np.moveaxis(output.detach().numpy(), 0,-1))
The error I got :
RuntimeError: Sizes of tensors must match except in dimension 1. Expected size 8 but got size 56 for tensor number 1 in the list.
Does anyone how to solve this issue I think it from concatenate nosey with embedding
After changing dim = 0
and expand to 3 dim
In addition there was issue in Input channel for first Conv_1 where changed from 64 to 1
class TextToImage(nn.Module):
def __init__(self, vocab_size):
super(TextToImage, self).__init__()
self.vocab_size = vocab_size
self.noise = torch.rand((56,64))
# DEFINE the layers
# Embedding
self.embed = nn.Embedding(num_embeddings=self.vocab_size, embedding_dim = 64)
# Conv
self.conv2d_1 = nn.Conv2d(in_channels=1, out_channels=3, kernel_size=(3, 3), stride=(2, 2), padding='valid')
self.conv2d_2 = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=(3, 3), stride=(2, 2), padding='valid')
# Transposed CNNs
self.conv2dTran_1 = nn.ConvTranspose2d(in_channels=16, out_channels=16, kernel_size=(3, 3), stride=(1, 1), padding=1)
self.conv2dTran_2 = nn.ConvTranspose2d(in_channels=16, out_channels=3, kernel_size=(3, 3), stride=(2, 2), padding=0)
self.conv2dTran_3 = nn.ConvTranspose2d(in_channels=6, out_channels=3, kernel_size=(4, 4), stride=(2, 2), padding=0)
self.relu = torch.nn.ReLU(inplace=False)
self.dropout = torch.nn.Dropout(0.4)
def forward(self, text_tensor):
#SEND the input text tensor to the embedding layer
emb = self.embed(text_tensor)
#COMBINE the embedding with the noise tensor. Make it have 3 dimensions
combined = torch.cat((emb, self.noise), dim=0) #, out=None
print(combined.shape)
combined_3d = combined[None, :]
print(combined_3d.shape)
# SEND the noisy embedding to the convolutional and transposed convolutional layers
conv2d_1 = self.conv2d_1(combined_3d)
conv2d_2 = self.conv2d_2(conv2d_1)
dropout = self.dropout(conv2d_2)
conv2dTran_1 = self.conv2dTran_1(dropout)
conv2dTran_2 = self.conv2dTran_2(conv2dTran_1)
#COMBINE the outputs having a skip connection in the image of the architecture
combined_2 = torch.cat((conv2d_1, conv2dTran_2),axis=0) #dim=1, out=None
conv2dTran_3 = self.conv2dTran_3(combined_2)
#SEND the combined outputs to the final layer. Please name your final output variable as "image" so you that it can be returned
image = self.relu(conv2dTran_3)
return image
The cat function requires the tensor shapes to match aside from the dimension you're concatenating, so to concatenate (8,64) and (56,64) your cat should look like this using dim 0 instead of 1:
combine1 = torch.cat((emb, self.noise), dim=0, out=None)
After that, I'm not seeing where you give combine1 a 3rd dimension.
I'm pretty new to this AI model creation and I'm trying to create a activity recognition model using heatmaps using this reference Human activity recognition model
In the above link they're doing activity recognition by providing some video feed, the videos have been sliced into frames and training the model here in reference link.
But in my case I'm providing the sliced frames(images) directly for the training.
The problem is that when I'm providing my dataset for AI model creation, the input is not matching with expected input.
I'm getting this error
ValueError: Input 0 of layer "sequential" is incompatible with the layer: expected shape=(None, 4, 8, 8, 3), found shape=(4, 8, 8, 3)
Here is my code
seed_constant = 27
np.random.seed(seed_constant)
random.seed(seed_constant)
tf.random.set_seed(seed_constant)
CLASSES_LIST = ["Forward", "Backward"]
SEQUENCE_LENGTH = 4
IMAGE_HEIGHT = 8
IMAGE_WIDTH = 8
DATASET_DIR = r"D:\ppl_count.tar (1)\ppl_count\ppl_count\datasets\ir\dataset_test"
Input = (IMAGE_WIDTH, IMAGE_HEIGHT)
def create_dataset():
"""
This function will extract the data of the selected classes and create the required dataset.
Returns:
features: A list containing the extracted frames of the videos.
labels: A list containing the indexes of the classes associated with the videos.
video_files_paths: A list containing the paths of the videos in the disk.
"""
images_dire = r"D:\ppl_count.tar (1)\ppl_count\ppl_count\datasets\ir\test_sample"
dataset_files = create_dataset_files(images_dir=images_dire, datasets_dir=DATASET_DIR,
split_size=100,
num_threads=1,
resize=Input, normalize=True)
return dataset_files
create_dataset()
data1 = numpy.load(r"D:\ppl_count.tar
(1)\ppl_count\ppl_count\datasets\ir\dataset_test\dataset0.npz",
allow_pickle=True)
features_ = data1.f.data
labels_ = data1.f.labels
one_hot_encoded_labels = to_categorical(labels_)
# splits data into train and test sets
features_train, features_test, labels_train, labels_test = train_test_split(features_,
one_hot_encoded_labels,
test_size=0.25,
shuffle=True,
random_state=seed_constant)
print("dataset_creation_success")
def create_convlstm_model():
"""
This function will construct the required convlstm model.
Returns:
model: It is the required constructed convlstm model.
"""
# We will use a Sequential model for model construction
model = Sequential()
# Define the Model Architecture.
# #######################################################################################################################
model.add(ConvLSTM2D(filters=4, kernel_size=(3, 3), activation='tanh', data_format="channels_last",
recurrent_dropout=0.2, return_sequences=True, input_shape=(SEQUENCE_LENGTH,
IMAGE_HEIGHT, IMAGE_WIDTH, 3)))
model.add(MaxPooling3D(pool_size=(1, 2, 2), strides=(1, 1, 1), padding='same', data_format='channels_last'))
model.add(TimeDistributed(Dropout(0.2)))
print(f'1_works')
model.add(ConvLSTM2D(filters=8, kernel_size=(3, 3), activation='tanh', data_format="channels_last",
recurrent_dropout=0.2, return_sequences=True, input_shape=(SEQUENCE_LENGTH,
IMAGE_HEIGHT, IMAGE_WIDTH, 3)))
model.add(MaxPooling3D(pool_size=(1, 2, 2), strides=(1, 1, 1), padding='same', data_format='channels_last'))
model.add(TimeDistributed(Dropout(0.2)))
print(f'2_works')
model.add(ConvLSTM2D(filters=14, kernel_size=(3, 3), activation='tanh', data_format="channels_last",
recurrent_dropout=0.2, return_sequences=True, input_shape=(SEQUENCE_LENGTH,
IMAGE_HEIGHT, IMAGE_WIDTH, 3)))
model.add(MaxPooling3D(pool_size=(1, 2, 2), strides=(1, 1, 1), padding='same', data_format='channels_last'))
model.add(TimeDistributed(Dropout(0.2)))
print(f'3_works')
model.add(ConvLSTM2D(filters=16, kernel_size=(3, 3), activation='tanh', padding='same', data_format="channels_last",
recurrent_dropout=0.2, return_sequences=True, input_shape=(SEQUENCE_LENGTH,
IMAGE_HEIGHT, IMAGE_WIDTH, 3)))
model.add(MaxPooling3D(pool_size=(1, 2, 2), strides=(1, 1, 1), padding='same', data_format='channels_last'))
model.add(TimeDistributed(Dropout(0.2)))
model.add(Flatten())
model.add(Dense(len(CLASSES_LIST), activation="softmax"))
# #######################################################################################################################
# Display the models summary.
model.summary()
# Return the constructed convlstm model.
return model
convlstm_model = create_convlstm_model()
# Display the success message.
print("Model Created Successfully!")
early_stopping_callback = EarlyStopping(monitor='val_loss', patience=10, mode='min',
restore_best_weights=True)
# Compile the model and specify loss function, optimizer and metrics values to the model
convlstm_model.compile(loss='categorical_crossentropy', optimizer='Adam', metrics=
["accuracy"])
# Start training the model.
convlstm_model_training_history = convlstm_model.fit(x=features_train, y=labels_train,
epochs=50, batch_size=4,
shuffle=True, validation_split=0.2,
callbacks=[early_stopping_callback])
**please do ignore some indentations as it's very difficult to post with indentations and provide any solution/reference where I can get answer from. **
tf.keras.layers.ConvLSTM2D expects input of shape 5D but you are giving 4D input.
I could reproduce your issue
import tensorflow as tf
SEQUENCE_LENGTH = 4
IMAGE_HEIGHT = 8
IMAGE_WIDTH = 8
input_shape = (SEQUENCE_LENGTH, IMAGE_HEIGHT, IMAGE_WIDTH,3)
x = tf.random.normal(input_shape)
y = tf.keras.layers.ConvLSTM2D(filters=4, kernel_size=(3, 3), activation='tanh', data_format="channels_last",
recurrent_dropout=0.2, return_sequences=True, input_shape=input_shape)(x)
print(y.shape)
Output
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-8-6a18c66320ed> in <module>()
5 x = tf.random.normal(input_shape)
6 y = tf.keras.layers.ConvLSTM2D(filters=4, kernel_size=(3, 3), activation='tanh', data_format="channels_last",
----> 7 recurrent_dropout=0.2, return_sequences=True, input_shape=input_shape)(x)
8 print(y.shape)
2 frames
/usr/local/lib/python3.7/dist-packages/keras/engine/input_spec.py in assert_input_compatibility(input_spec, inputs, layer_name)
212 ndim = shape.rank
213 if ndim != spec.ndim:
--> 214 raise ValueError(f'Input {input_index} of layer "{layer_name}" '
215 'is incompatible with the layer: '
216 f'expected ndim={spec.ndim}, found ndim={ndim}. '
ValueError: Input 0 of layer "conv_lstm2d_4" is incompatible with the layer: expected ndim=5, found ndim=4. Full shape received: (4, 8, 8, 3)
Working sample code
import tensorflow as tf
SEQUENCE_LENGTH = 4
IMAGE_HEIGHT = 8
IMAGE_WIDTH = 8
input_shape = (16,SEQUENCE_LENGTH, IMAGE_HEIGHT, IMAGE_WIDTH,3)
x = tf.random.normal(input_shape)
y = tf.keras.layers.ConvLSTM2D(filters=4, kernel_size=(3, 3), activation='tanh', data_format="channels_last",
recurrent_dropout=0.2, return_sequences=True, input_shape=input_shape)(x)
print(y.shape)
Output
(16, 4, 6, 6, 4)
I'm having a problem implementing a super-resolution model
class SRNet(Model):
def __init__(self, scale=4):
super(SRNet, self).__init__()
self.scale = scale
self.conv1 = Sequential([
layers.Conv2D(filters=64, kernel_size=3,
strides=(1, 1), padding="same", data_format="channels_first"),
layers.ReLU(),
])
self.residualBlocks = Sequential(
[ResidualBlock() for _ in range(16)])
self.convUp = Sequential([
layers.Conv2DTranspose(filters=64, kernel_size=3, strides=(
2, 2), padding="same", data_format="channels_first"),
layers.ReLU(),
layers.Conv2DTranspose(filters=64, kernel_size=3, strides=(
2, 2), padding="same", data_format="channels_first"),
layers.ReLU(),
])
self.reluAfterPixleShuffle = layers.ReLU()
self.convOut = layers.Conv2D(
filters=3, kernel_size=3, strides=(1, 1), padding="same", data_format="channels_first", input_shape=(4, 1440, 2560)) # (kernel, kernel, channel, output)
def call(self, lrCur_hrPrevTran):
lrCur, hrPrevTran = lrCur_hrPrevTran
x = tf.concat([lrCur, hrPrevTran], axis=1)
x = self.conv1(x)
x = self.residualBlocks(x)
x = self.convUp(x)
# pixel shuffle
Subpixel_layer = Lambda(lambda x: tf.nn.depth_to_space(
x, self.scale, data_format="NCHW"))
x = Subpixel_layer(inputs=x)
x = self.reluAfterPixleShuffle(x)
x = self.convOut(x)
return x
Error
/usr/src/app/generator.py:164 call *
x = self.convOut(x)
ValueError: Tensor's shape (3, 3, 64, 3) is not compatible with supplied shape (3, 3, 4, 3)
after reading the error I know that (3, 3, 4, 3) is (kernel size, kernel size, channel, output) mean that only channel of input is not correct
so I printed out the shape of the input
# after pixel shuffle before convOut
print(x.shape)
>>> (1, 4, 1440, 2560) (batch size, channel, height, width)
but the shape of x after pixel shuffle (depth_to_space) is (1, 4, 1440, 2560) the channel value is 4 which is the same as convOut need
question is why the input's channel is changing from 4 to 64 as the error?
I have found a solution
First of all, I'm using checkpoints to save model weight when training
during the implementation and testing of the model, I have changed some of the layers so the input size is changed too, but my weight still remember the input size from the previous checkpoint
so I delete the checkpoints folder and then everything works again
I need to train a 3D_Unet model with (128x128x128) patches of 42 CT scans.
My input data is 128x128x128 for the CT scans and also for masks.
I extended the shape of arrays to (128, 128, 128, 1). Where 1 is the channel.
The problem is how to feed the model with my list of 40 4D-arrays?
How can I use the model.fit() or model.train_on_batch with the correct input shape specified in my Model?
project_name = '3D-Unet Segmentation of Lungs'
img_rows = 128
img_cols = 128
img_depth = 128
# smooth = 1
K.set_image_data_format('channels_last')
#corresponds to inputs with shape:
#(batch, spatial_dim1, spatial_dim2, spatial_dim3, channels)
def get_unet():
inputs = Input(shape=(img_depth, img_rows, img_cols, 1))
conv1 = Conv3D(32, (3, 3, 3), activation='relu', padding='same')(inputs)
conv1 = Conv3D(32, (3, 3, 3), activation='relu', padding='same')(conv1)
pool1 = MaxPooling3D(pool_size=(2, 2, 2))(conv1)
conv2 = Conv3D(64, (3, 3, 3), activation='relu', padding='same')(pool1)
conv2 = Conv3D(64, (3, 3, 3), activation='relu', padding='same')(conv2)
pool2 = MaxPooling3D(pool_size=(2, 2, 2))(conv2)
....
model = Model(inputs=[inputs], outputs=[conv10])
model.summary()
#plot_model(model, to_file='model.png')
model.compile(optimizer=Adam(lr=1e-5, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.000000199),
loss='binary_crossentropy', metrics=['accuracy'])
return model
for list of arrays as input
What should I specify in either .train_on_batch() or .fit()?
This is the error I get when using the .train_on_batch option:
ValueError: Error when checking model input: the list of Numpy arrays that you are passing to your model is not the size the model expected. Expected to see 1 array(s), but instead got the following list of 42 arrays
model.train_on_batch(train_arrays_list, mask_arrays_list)
This is the error I get when using the .model.fit option after having increased the shape of arrays with axis=0.
UnboundLocalError: local variable 'batch_index' referenced before assignment
model.fit(train_arrays_list[0], mask_arrays_list[0],
batch_size=1,
epochs=50,
verbose=1,
shuffle=True,
validation_split=0.10,
callbacks=[model_checkpoint, csv_logger])
You have to transform your list of numpy arrays of shape (128, 128, 128, 1) into a stacked 5 dimensional numpy array of shape (42, 128, 128, 128, 1). You can do this with: model.fit(np.array(train_arrays_list), np.array(mask_arrays_list), batch_size=1, ...)
My input is 299,299,3
My graphics card is 1070 (8 gigs of ram)
Other Specs: Python 3.6, Keras 2.xx, Tensorflow-backend(1.4), Windows 7
Even batch size of 1 isn't working.
I feel like my card should handle a batch of size one --
Here is my code:
def full_model():
#model layers
input_img = Input(shape=(299, 299, 3))
tower_1 = Conv2D(64, (1, 1,), padding='same', activation='relu')(input_img)
tower_1 = Conv2D(64, (3, 3), padding='same', activation='relu')(tower_1)
tower_2 = Conv2D(64, (1, 1), padding='same', activation='relu')(input_img)
tower_2 = Conv2D(64, (5, 5), padding='same', activation='relu')(tower_2)
concatenated_layer = keras.layers.concatenate([tower_1, tower_2], axis=3)
bottleneck = MaxPooling2D((2, 2), strides=(2, 2), padding='same')(concatenated_layer)
flatten = Flatten()(bottleneck)
dense_1 = Dense(500, activation = 'relu')(flatten)
predictions = Dense(12, activation = 'softmax')(dense_1)
model = Model(inputs= input_img, output = predictions)
SGD =keras.optimizers.SGD(lr=0.1, momentum=0.0, decay=0.0, nesterov=False)
model.compile(optimizer=SGD,
loss='categorical_crossentropy',
metrics=['accuracy'])
return model
hdf5_path =r'C:\Users\Moondra\Desktop\Keras Applications\training.hdf5'
model = full_model()
def run_model( hdf5_path,
epochs = 10,
steps_per_epoch =8,
classes =12,
batch_size =1, model= model ):
for i in range(epochs):
batches = loading_hdf5_files.load_batches(batch_size =1,
hdf5_path=hdf5_path ,
classes = classes)
for i in range(steps_per_epoch):
x,y = next(batches)
#plt.imshow(x[0])
#plt.show()
x = (x/255).astype('float32')
print(x.shape)
data =model.train_on_batch(x,y)
print('loss : {:.5}, accuracy : {:.2%}'.format(*data))
return model
I can't seem to handle even a batch of size one.
Here is the last part of the error:
ResourceExhaustedError (see above for traceback): OOM when allocating tensor of shape [] and type float
[[Node: conv2d_4/random_uniform/sub = Const[dtype=DT_FLOAT, value=Tensor<type: float shape: [] values: 0.0866025388>, _device="/job:localhost/replica:0/task:0/device:GPU:0"]()]]
It turns out I have way too many parameters.
After running print(model.summary()), I had a billion plus parameters.
I increased size of MaxPooling and no more problems.