Image understanding - CNN Triplet loss - python

i'm new to NN and trying to create a simple NN for image understanding.
I tried using the triplet loss method, but keep getting errors that made me think i'm missing some fundamental concept.
My code is :
def triplet_loss(x):
anchor, positive, negative = tf.split(x, 3)
pos_dist = tf.reduce_sum(tf.square(tf.subtract(anchor, positive)), 1)
neg_dist = tf.reduce_sum(tf.square(tf.subtract(anchor, negative)), 1)
basic_loss = tf.add(tf.subtract(pos_dist, neg_dist), ALPHA)
loss = tf.reduce_mean(tf.maximum(basic_loss, 0.0), 0)
return loss
def build_model(input_shape):
K.set_image_data_format('channels_last')
positive_example = Input(shape=input_shape)
negative_example = Input(shape=input_shape)
anchor_example = Input(shape=input_shape)
embedding_network = create_embedding_network(input_shape)
positive_embedding = embedding_network(positive_example)
negative_embedding = embedding_network(negative_example)
anchor_embedding = embedding_network(anchor_example)
merged_output = concatenate([anchor_embedding, positive_embedding, negative_embedding])
loss = Lambda(triplet_loss, (1,))(merged_output)
model = Model(inputs=[anchor_example, positive_example, negative_example],
outputs=loss)
model.compile(loss='mean_absolute_error', optimizer=Adam())
return model
def create_embedding_network(input_shape):
input_shape = Input(input_shape)
x = Conv2D(32, (3, 3))(input_shape)
x = PReLU()(x)
x = Conv2D(64, (3, 3))(x)
x = PReLU()(x)
x = Flatten()(x)
x = Dense(10, activation='softmax')(x)
model = Model(inputs=input_shape, outputs=x)
return model
Every image is read using:
imageio.imread(imagePath, pilmode="RGB")
And the shape of each image:
(1024, 1024, 3)
Then i use my own triplet method (just creating 3 sets of anchor, positive and negative)
triplets = get_triplets(data)
triplets.shape
The shape is (number of examples, triplet, x_image, y_image, number of channels
(RGB)):
(20, 3, 1024, 1024, 3)
Then i use the build_model function:
model = build_model((1024, 1024, 3))
And the problem starts here:
model.fit(triplets, y=np.zeros(len(triplets)), batch_size=1)
For this line of code when i'm trying to train my model i'm getting this error:
For more details, my code is in this collab notebook
The pictures i used can be found in this Drive
For this to run seamlessly - place this folder under
My Drive/Colab Notebooks/images/

For anyone also struggling
My problem was actually the dimension of each observation.
By changing the dimension as suggested in the comments
(?, 1024, 1024, 3)
The colab notebook updated with the solution
P.s - i also changed the size of the pictures to 256 * 256 so that the code will run much faster on my pc.

Related

TensorFlow: Using None-Values in a Tensor

I am trying to use this DSNT-layer from GitHub:
https://github.com/ashwhall/dsnt/
It seems that the implementation has a problem with the placeholder consisting of the input size and the batch size.
My understanding is that the batch size is usually unknown during graph initialization unless one defines a batch value in the input layer or until the learning process begins.
Based on the batch-size, the dsnt layer creates tensors as seen below:
batch_count = tf.shape(norm_heatmap)[0]
height = tf.shape(norm_heatmap)[1]
width = tf.shape(norm_heatmap)[2]
# TODO scalars for the new coord system
scalar_x = ((2 * width) - (width + 1)) / width
scalar_x = tf.cast(scalar_x, tf.float32)
scalar_y = ((2 * height) - (height + 1)) / height
scalar_y = tf.cast(scalar_y, tf.float32)
# Build the DSNT x, y matrices
dsnt_x = tf.tile([[(2 * tf.range(1, width+1) - (width + 1)) / width]], [batch_count, height, 1]) # <-- point of error
dsnt_x = tf.cast(dsnt_x, tf.float32)
dsnt_y = tf.tile([[(2 * tf.range(1, height+1) - (height + 1)) / height]], [batch_count, width, 1])
dsnt_y = tf.cast(tf.transpose(dsnt_y, perm=[0, 2, 1]), tf.float32)
When I run this code, I get following error message:
raise e.with_traceback(filtered_tb) from None
ValueError: Shape [1,2,3,4,5,...,64] is too large (more than 2**63 - 1 entries) for '{{node Placeholder}} = Placeholder[dtype=DT_INT32, shape=[1,2,3,4,5,..., 64]]()' with input shapes: .
I found answers in stackoverflow recommending to use tf.shape to avoid problems with handling unknown dimensions. This does not seem to be enough here.
If an input with dimension (none, none, 1) is used, the code is executed. Furthermore, it will be executed when running on Tensorflow 2.5.3 or lower.
My Question:
How do I use unknown values that will be defined only when the process of learning started.
I attached a minimal example:
Using Python3.10 and Tensorflow2.8
The input is an image of a certain size, e.g. 128x64x1, and the output is the normalized coordinate of the center of mass.
def Minimal_Model(_):
input_shape = (128, 64, 1)
X_input = Input(shape=input_shape)
X_out = Conv2D(filters=1, kernel_size=(1, 1), strides=(1, 1), padding='valid',
name="conv", kernel_initializer=he_uniform())(X_input)
norm_heatmap, coordinates = dsnt.dsnt(X_out)
model = Model(inputs=X_input, outputs=coordinates, name='Test-DSNT')
model.compile(optimizer=tensorflow.keras.optimizers.Adam(0.0001),
loss=[tf.keras.losses.MeanSquaredError(), tf.keras.losses.MeanSquaredError()],
metrics=[tf.keras.metrics.MeanSquaredError()])
return model
import tensorflow as tf
from models import Minimal_Model
from keras_tuner.tuners import BayesianOptimization
import time
tf.get_logger().setLevel('DEBUG')
MAX_TRIALS = 10
EXECUTION_PER_TRIAL = 1
BATCH_SIZE = 8
EPOCHS = 10
LOG_DIR = 'results-random' + f"{int(time.time())}"
train_images = tf.random.uniform((2000, 64, 128, 1), minval=0, dtype=tf.float32, maxval=1)
test_images = tf.random.uniform((200, 64, 128, 1), minval=0, dtype=tf.float32, maxval=1)
train_labels = tf.random.uniform((2000, 2, 1), minval=0, dtype=tf.float32, maxval=1)
test_labels = tf.random.uniform((200, 2, 1), minval=0, dtype=tf.float32, maxval=1)
tuner = BayesianOptimization(
Minimal_Model,
seed=1,
objective='val_mean_squared_error',
max_trials=MAX_TRIALS,
executions_per_trial=EXECUTION_PER_TRIAL,
directory=LOG_DIR,
project_name="project"
)
tuner.search(train_images, train_labels, epochs=EPOCHS, batch_size=BATCH_SIZE,
validation_data=(test_images, test_labels),
callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_mean_squared_error', restore_best_weights=True,
patience=3, mode='min')])
# Show a summary of the search
tuner.results_summary(num_trials=1)

Issue retrieiving value error `decode_predictions` expects a batch of predictions

I have the following code trying to perform predictions on part of resnet model. However, I am retrieving error.
def layer_input_shape(Model, layer_index):
input_shape = np.array(Model.layers[layer_index - 1].output_shape)
input_shape = np.ndarray.tolist(np.delete(input_shape, 0))
return input_shape
def resnet50_Model(Model, trainable=True):
input_shape = layer_input_shape(Model, 1)
input = tf.keras.layers.Input(shape=input_shape)
first_layer = Model.layers[0]
first_layer.trainable = trainable
out = first_layer(input)
for i in range(1, 12):
layer_i = Model.layers[i]
layer_i.trainable = trainable
out = layer_i(out)
out = Conv2D(filters=2, kernel_size=2, strides=(2,2), activation='relu')(out)
out = Flatten()(out)
out = Dense(units=2,activation='softmax')(out)
result_model = tf.keras.models.Model(inputs=[input], outputs=out)
return result_model
from tensorflow.keras.applications.resnet50 import preprocess_input, decode_predictions
img='/content/elephant.jpg'
img = image.load_img(img, target_size=(224, 224))
x = image.img_to_array(img)
x = np.expand_dims(x, axis=0)
x = preprocess_input(x)
preds = resnet_skip_model.predict(x)
print('Predicted:', decode_predictions(preds, top=3)[0])
Retrieving below error:
ValueError: `decode_predictions` expects a batch of predictions (i.e. a 2D array of shape (samples,
1000)). Found array with shape: (1, 3)
I added two output dense layer so I can only predict two classes and when I call decode it expects 1000 output last dense layer, therefore changed units from two to 1000
out = Dense(units=1000,activation='softmax')(out)

ValueError: cannot reshape array of size 0 into shape (224,512) - plotting heat maps to localize anomalies in images

I've been using VGG16 to create a model that can classify images into two categories, which works perfectly fine. Now I want to create a function to localize anomalies through heat maps as in here: https://towardsdatascience.com/anomaly-detection-in-images-777534980aeb
Unfortunately that's not working for me, the array including my test image has either the wrong size (ValueError: Error when checking input: expected input_1 to have 4 dimensions, but got array with shape (1, 1, 224, 224, 3) or ValueError: Error when checking input: expected input_1 to have 4 dimensions, but got array with shape (224, 224, 3)) or it's telling me ValueError: cannot reshape array of size 0 into shape (224,512). Here you can see a code snippet:
[...]
test_image_generator = ImageDataGenerator(preprocessing_function=preprocess_input)
test_data_gen = test_image_generator.flow_from_directory(batch_size=batch_size,
directory=test_dir,
shuffle=False,
target_size=(IMG_HEIGHT, IMG_WIDTH),
class_mode='binary')
# create the model/ import vgg16
vgg_conv = vgg16.VGG16(weights='imagenet', include_top=False, input_shape = (224, 224, 3))
# Freeze the layers except the last 4 layers
for layer in vgg_conv.layers[:-8]:
layer.trainable = False
# Check the trainable status of the individual layers
for layer in vgg_conv.layers:
print(layer, layer.trainable)
# modify vgg structure
x = vgg_conv.output
x = GlobalAveragePooling2D()(x)
x = Dense(1, activation="sigmoid")(x)
model = Model(vgg_conv.input, x)
model.compile(loss = "binary_crossentropy", optimizer = optimizers.SGD(lr=0.00001, momentum=0.9), metrics=["accuracy"])
model.summary()
[...]
### CREATE FUNCTION TO DRAW ANOMALIES ###
def plot_activation(img):
pred = model.predict(img[:,:,:,:])
pred_class = np.argmax(pred)
weights = model.layers[-1].get_weights()[0] # weights last classification layer
class_weights = weights[:, pred_class]
intermediate = Model(model.input, model.get_layer("block5_conv3").output)
conv_output = intermediate.predict(img)
conv_output = np.squeeze(conv_output)
h = int(img.shape[0] / conv_output.shape[0])
w = int(img.shape[1] / conv_output.shape[1])
activation_maps = sp.ndimage.zoom(conv_output, (h, w, 1), order=1)
out = np.dot(activation_maps.reshape((img.shape[0] * img.shape[1], 512)), class_weights).reshape(img.shape[0],
img.shape[1])
plt.imshow(img.astype('float32').reshape(img.shape[0], img.shape[1], 3))
plt.imshow(out, cmap='jet', alpha=0.35)
plt.title('Crack' if pred_class == 1 else 'No Crack')
# return out, pred_class
img_width, img_height = 224, 224
img = image.load_img('/Volumes/test_image.jpg', target_size = (img_width, img_height))
img = image.img_to_array(img)
img = np.expand_dims(img, axis = 0)
plot_activation(img)
With that code I'm getting the following error message:
Traceback (most recent call last):
File "test.py", line 285, in <module>
plot_activation(img)
File "test.py", line 251, in plot_activation
out = np.dot(activation_maps.reshape((img.shape[0] * img.shape[1], 512)), class_weights).reshape(img.shape[0],
ValueError: cannot reshape array of size 0 into shape (224,512)
I've tried to resize my array but probably haven't done that the right way as I'm only getting different errors (see above). Maybe it's got something to do with how I've been preprocessing the images, but I'm not sure about that one. Can anyone tell me how to fix this?
Additionally - do I need to undo the preprocessing to plot the heat maps correctly ? If so, how will I be able to do so?
If there are any questions left, feel free to ask, I'm happy to answer.
EDIT:
This is my adjusted code which now is working fine:
### CREATE FUNCTION TO DRAW ANOMALIES ###
def plot_activation(img):
pred = model.predict(img[np.newaxis,:,:,:])
# pred_class = np.argmax(pred)
pred_class = np.argmax(pred, axis=-1)
weights = model.layers[-1].get_weights()[0] # weights last classification layer
class_weights = weights[:, pred_class]
intermediate = Model(model.input, model.get_layer("block5_conv3").output)
conv_output = intermediate.predict(img[np.newaxis,:,:,:])
conv_output = np.squeeze(conv_output)
h = int(img.shape[0] / conv_output.shape[0])
w = int(img.shape[1] / conv_output.shape[1])
activation_maps = sp.ndimage.zoom(conv_output, (h, w, 1), order=1)
out = np.dot(activation_maps.reshape((img.shape[0] * img.shape[1], 512)), class_weights).reshape(img.shape[0],
img.shape[1])
plt.imshow(img.astype('float32').reshape(img.shape[0], img.shape[1], 3))
plt.imshow(out, cmap='jet', alpha=0.35)
plt.title('Crack' if pred_class == 1 else 'No Crack')
plt.show()
# return out, pred_class
test_images = test_data_gen[0][0][0]
plot_activation(test_images)

ValueError: Target and input must have the same number of elements. target nelement (50) != input nelement (100)

I'm new to Pytorch so I tried to learn it by creating simple dogs vs cats classification.
The code:
class DogCatClassifier(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(1, 32, 5)
self.conv2 = nn.Conv2d(32, 64, 5)
self.conv3 = nn.Conv2d(64, 128, 5)
self.fc1 = nn.Linear(512, 256)
self.fc2 = nn.Linear(256, 2)
def forward(self, x):
x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
print("1-st: ", x.shape)
x = F.max_pool2d(F.relu(self.conv2(x)), (2, 2))
print("2-nd: ", x.shape)
x = F.max_pool2d(F.relu(self.conv3(x)), (2, 2))
print("3-rd: ", x.shape)
x = torch.flatten(x, start_dim=1)
x = F.relu(self.fc1(x))
print("6-th: ", x.shape)
x = self.fc2(x) # bc this is our output layer. No activation here.
print("7-th: ", x.shape)
x = F.sigmoid(x)
print("8-th: ", x.shape)
return x
I pass a single batch of data (data shape is (50, 1, 50, 50)
model = DogCatClassifier()
images, labels = next(iter(train_loader))
preds = model(images)
print(pred)
loss = F.binary_cross_entropy(preds, labels)
My prediction shape is (50, 2), so as I understand F.binary_cross_entropy(preds, labels) checks both predictions from a single image and that's why I get 100 predictions against 50 labels. Coming from tensorflow I thought that I could just implement the same logic like using sigmoid as last activation and binary_cross_entropy as loss function. What I don't understand is how to make this piece of code work.
Your problem arises because you are using binary cross entropy instead of regular cross entropy. As the name implies, it checks weather the label is correct or not thus the shape of both tensors (preds and labels in your code) should be the same. As you are giving the confidence of both classes, the BCE loss function gets confused and the code crashes. You can either do two things:
1- Change to F.cross_entropy(preds, label) as your loss function.
2- Change your code to pick the maximum value as the target.
pred = pred.argmax(dim=1, keepdim=True) # gets the max value
Let me know if this works, if it doesn't please update with the new error.

Why my DataGenerator iterates on more data than the size of dataset and give IndexError: list index out of range?

I'm trying to implement a network with keras and tensorflow back-end, I'm using transfer learning model (VGG16), my dataset is a medical images dataset so instead of having only one image, I have a series of slices, so my dataset is organized in a folder and each serie is a np.array() with size (nb_slices,512,512,3).
My dataset is composed by 1130 train samples and 120 valid samples, so I don't think that datas is the problem.
I tried to create a dataGenerator to load my image series in my model without a batch-size problem (I used this Training a Keras model from batches of .npy files using generator? to make my generator class) , (and I reshaped my volumes with size (nb_slices, 224,224,3))
then I tried to use transfer learning, and custom a VGG16 network with 1 more convolution layer, MaxPooling, Flatten, Dense, Dropout and final Dense layer.
When I start training, it seems there is no problem, but at a moment it returns IndexError: list index out of range, and I saw that DataGenerator iterates more than the size of dataset but I don't know why...
Which part could cause it ?
Here is my DataGenerator
INPUT_DIM = 224
MAX_PIXEL_VAL = 255
MEAN = 58.09
STDDEV = 49.73
class DataGenerator(keras.utils.Sequence):
def __init__(self, file_list, labels, data_loc):
self.listIDs = file_list
self.labels = labels
self.data_loc = data_loc
self.on_epoch_end()
def __len__(self):
return int(len(self.listIDs))
def __getitem__(self, index):
indexes = self.indexes[index:(index + 1)]
list_IDS_temp = [self.listIDs[k] for k in indexes]
X, y = self.__data_generation(list_IDS_temp)
return X, y
def on_epoch_end(self):
self.indexes = np.arange(len(self.listIDs))
def __data_generation(self, list_IDS_temp):
for ID in list_IDS_temp:
vol = np.load(self.data_loc + ID + '.npy')
nb_slices = vol.shape[0]
pad = int((vol.shape[2] - INPUT_DIM) / 2)
vol = vol[:, pad:-pad, pad:-pad]
# standardize
vol = (vol - np.min(vol)) / (np.max(vol) - np.min(vol)) * MAX_PIXEL_VAL
# normalize
vol = (vol - MEAN) / STDDEV
# convert to RGB
vol = np.stack((vol,) * 3, axis=3)
y = np.empty(nb_slices, dtype=int)
# y = self.labels[ID]
for i in range(nb_slices):
y[i] = self.labels[int(ID)]
return vol, keras.utils.to_categorical(y, num_classes=2)
and my model:
train_set = DataGenerator(df_train['exams'].tolist(), df_train['labels'].tolist(), all_file_loc_train)
valid_set = DataGenerator(df_val['exams'].tolist(), df_val['labels'].tolist(), all_file_loc_val)
model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3)) # , input_shape=(224, 224, 3)
layer_dict = dict([(layer.name, layer) for layer in model.layers])
x = layer_dict['block2_pool'].output
x = Conv2D(filters=64, kernel_size=(3, 3), activation='relu')(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = Flatten()(x)
x = Dense(256, activation='relu')(x)
x = Dropout(0.4)(x)
x = Dense(2, activation='softmax')(x)
custom_model = Model(inputs=model.input, outputs=x)
for layer in custom_model.layers[:7]:
layer.trainable = False
custom_model.compile(loss='categorical_crossentropy', optimizer=optimizers.SGD(lr=0.0001, momentum=0.9),
metrics=["accuracy"])
results = custom_model.fit_generator(generator=train_set, validation_data=valid_set, epochs=50, verbose=2)
I expected accuracy around 80-90% of accuracy but it seems that something goes wrong in my DataGenerator and I don't know what. Please I need help...

Categories