Computing Masking layer and using a few layers later in Keras - python

I have an architecture that processes padded sequences of fixed length. For whatever reason, passing the mask through some of the intermediate layers doesn't work, so what I want is to:
Compute the mask right after the Input layer
Process the input with some other layers
Apply the mask before it goes into a GRU layer
Something like this
x = layers.Input(shape=(sequenceLength, inputFeatures))
m = layers.Masking(mask_value=255)(x)
# x = SomeOtherLayers()(x) # some other layers
# Apply initial mask here
x = GRU()(x)
Is there an easy way to achieve this? I have tried adding a new mask and overwriting the _keras_mask attribute, but that didn't work
x = layers.Input(shape=(sequenceLength, inputFeatures), name=name)
m = layers.Masking(mask_value=255)(x)
# x = SomeOtherLayers()(x) # some other layers
x = layers.Masking()(x)
x._keras_mask = m._keras_mask
x = GRU()(x)
Maybe I am approaching this the wrong way. Any suggestion is welcome.

What I do is using a custom function to get the mask :
def get_mask_from_lengths(lengths, max_len=None):
if max_len is None:
max_len = tf.reduce_max(lengths)
ids = tf.range(0, max_len)
mask = ids < lengths
return mask
Then I define the model:
sequenceLength = 5
inputFeatures = 1
inputs = tf.keras.layers.Input(shape=(sequenceLength, inputFeatures))
lengths = tf.keras.layers.Input(shape=(1,)) # vector containing the length of each element of the batch
x = tf.keras.layers.Dense(units=3)(inputs) # some other layer
mask = get_mask_from_lengths(lengths=lengths)
output = tf.keras.layers.GRU(units=2)(x, mask=mask)
model = tf.keras.Model(inputs=[inputs, lengths], outputs=output)
model.compile(loss='mse', optimizer='adam')
model.summary()
An example:
x1 = tf.reshape(tf.convert_to_tensor([10, 3, 5, 3, 5]), (1, -1))
x2 = tf.reshape(tf.convert_to_tensor([11, 9, 120, 255, 255]), (1, -1))
input_tensor = tf.concat([x1, x2], axis=0)
length_tensor = tf.reshape([5, 3], (-1, 1)) # first sequence x1 is full and x2 has three elements not equal to the masking value 255 (should create a function to get this tensor from input_tensor)
out_tensor = tf.random.uniform(shape=(2, 2))
model.fit([input_tensor, length_tensor], out_tensor, epochs=2)

Related

Visualizing the attention map of a multihead attention in ViT

I'm trying to visualize the attention map of mit Visual Transformer architecture in keras/tensorflow. For this I was able to implement the ViT model the following way:
def model():
input_layer = layers.Input(shape=input_shape)
#image_patches = create_patches(input_layer)
#print(input_layer.shape)
image_patches = Patches(patch_size)(input_layer)
#print(image_patches.shape)
encoded_patches = PatchEncoder(num_patch, projection_dim)(image_patches)
#print(encoded_patches.shape)
#for i in range(transformer_blocks):
x1 = layers.LayerNormalization()(encoded_patches)
x1 = layers.MultiHeadAttention(num_heads=num_heads, key_dim=projection_dim, name='MHA_1')(x1, x1)
x = layers.Add()([x1, encoded_patches])
x2 = layers.LayerNormalization()(x)
x2 = mlp_head(x2, transformer_units)
encoded_patches = layers.Add()([x2, x])
x = layers.LayerNormalization()(encoded_patches)
x = layers.Flatten()(x)
x = layers.Dropout(0.5)(x)
x = layers.Dense(2)(x)
model = tf.keras.Model(inputs=input_layer, outputs=x)
print(model.summary())
return model
I'm now trying to visualize the attention map based on an input image and my model output. For this I first try to predict the outcome and reshape the weights:
def attention_map(model, image):
size = model.input_shape[1]
grid_size = int(np.sqrt(model.layers[4].output_shape[-2] - 1))
# Prepare the input
X = preprocess_inputs(cv2.resize(image, (size, size)))#[np.newaxis, :] # type: ignore
# Get the attention weights from each transformer.
outputs = [
l.output[1] for l in model.layers if isinstance(l, layers.MultiHeadAttention)
]
weights = np.array(
tf.keras.models.Model(inputs=model.inputs, outputs=outputs).predict(X_test)
)
print(weights.shape)
num_layers = weights.shape[0]
num_heads = weights.shape[1]
reshaped = weights.reshape(
(num_layers, num_heads, grid_size ** 2 + 1, grid_size ** 2 + 1)
)
# From Appendix D.6 in the paper ...
# Average the attention weights across all heads.
reshaped = reshaped.mean(axis=1)
# From Section 3 in https://arxiv.org/pdf/2005.00928.pdf ...
# To account for residual connections, we add an identity matrix to the
# attention matrix and re-normalize the weights.
reshaped = reshaped + np.eye(reshaped.shape[1])
reshaped = reshaped / reshaped.sum(axis=(1, 2))[:, np.newaxis, np.newaxis]
# Recursively multiply the weight matrices
v = reshaped[-1]
for n in range(1, len(reshaped)):
v = np.matmul(v, reshaped[-1 - n])
# Attention from the output token to the input space.
mask = v[0, 1:].reshape(grid_size, grid_size)
mask = cv2.resize(mask / mask.max(), (image.shape[1], image.shape[0]))[
..., np.newaxis
]
return (mask * image).astype("uint8")
However my problem is now to reshape my weight matrix getting in mismatch. Can someone give me a hint on why this is occuring? A hint based on the output dimension given by
weights = np.array(
tf.keras.models.Model(inputs=model.inputs, outputs=outputs).predict(X_test)
)
would also help.

How to concatenate a tensor to a keras layer along batch (without specifying batch size)?

I want to concatenate the output from an embedding layer with a custom tensor (myarr / myconst). I can specify everything with a fixed batch size like follows:
import numpy as np
import tensorflow as tf
BATCH_SIZE = 100
myarr = np.ones((10, 5))
myconst = tf.constant(np.tile(myarr, (BATCH_SIZE, 1, 1)))
# Model definition
inputs = tf.keras.layers.Input((10,), batch_size=BATCH_SIZE)
x = tf.keras.layers.Embedding(10, 5)(inputs)
x = tf.keras.layers.Concatenate(axis=1)([x, myconst])
model = tf.keras.models.Model(inputs=inputs, outputs=x)
However, if I don't specify batch size and tile my array, i.e. just the following...
myarr = np.ones((10, 5))
myconst = tf.constant(myarr)
# Model definition
inputs = tf.keras.layers.Input((10,))
x = tf.keras.layers.Embedding(10, 5)(inputs)
x = tf.keras.layers.Concatenate(axis=1)([x, myconst])
model = tf.keras.models.Model(inputs=inputs, outputs=x)
... I get an error specifying that shapes [(None, 10, 5), (10, 5)] can't be concatenated. Is there a way to add this None / batch_size axis to avoid tiling?
Thanks in advance
You want to concatenate to a 3D tensor of shape (batch, 10, 5) a constant of shape (10, 5) along the batch dimensionality. To do this your constant must be 3D. So you have to reshape it in (1, 10, 5) and repeat it along the axis=0 in order to match the shape (batch, 10, 5) and operate a concatenation.
We do this inside a Lambda layer:
X = np.random.randint(0,10, (100,10))
Y = np.random.uniform(0,1, (100,20,5))
myarr = np.ones((1, 10, 5)).astype('float32')
myconst = tf.convert_to_tensor(myarr)
def repeat_const(tensor, myconst):
shapes = tf.shape(tensor)
return tf.repeat(myconst, shapes[0], axis=0)
inputs = tf.keras.layers.Input((10,))
x = tf.keras.layers.Embedding(10, 5)(inputs)
xx = tf.keras.layers.Lambda(lambda x: repeat_const(x, myconst))(x)
x = tf.keras.layers.Concatenate(axis=1)([x, xx])
model = tf.keras.models.Model(inputs=inputs, outputs=x)
model.compile('adam', 'mse')
model.fit(X, Y, epochs=3)

"Invalid argument: indices[0,0,0,0] = 30 is not in [0, 30)"

Error:
InvalidArgumentError: indices[0,0,0,0] = 30 is not in [0, 30)
[[{{node GatherV2}}]] [Op:IteratorGetNext]
History:
I have a custom data loader for a tf.keras based U-Net for semantic segmentation, based on this example. It is written as follows:
def parse_image(img_path: str) -> dict:
# read image
image = tf.io.read_file(img_path)
#image = tfio.experimental.image.decode_tiff(image)
if xf == "png":
image = tf.image.decode_png(image, channels = 3)
else:
image = tf.image.decode_jpeg(image, channels = 3)
image = tf.image.convert_image_dtype(image, tf.uint8)
#image = image[:, :, :-1]
# read mask
mask_path = tf.strings.regex_replace(img_path, "X", "y")
mask_path = tf.strings.regex_replace(mask_path, "X." + xf, "y." + yf)
mask = tf.io.read_file(mask_path)
#mask = tfio.experimental.image.decode_tiff(mask)
mask = tf.image.decode_png(mask, channels = 1)
#mask = mask[:, :, :-1]
mask = tf.where(mask == 255, np.dtype("uint8").type(NoDataValue), mask)
return {"image": image, "segmentation_mask": mask}
train_dataset = tf.data.Dataset.list_files(
dir_tls(myear = year, dset = "X") + "/*." + xf, seed = zeed)
train_dataset = train_dataset.map(parse_image)
val_dataset = tf.data.Dataset.list_files(
dir_tls(myear = year, dset = "X_val") + "/*." + xf, seed = zeed)
val_dataset = val_dataset.map(parse_image)
## data transformations--------------------------------------------------------
#tf.function
def normalise(input_image: tf.Tensor, input_mask: tf.Tensor) -> tuple:
input_image = tf.cast(input_image, tf.float32) / 255.0
return input_image, input_mask
#tf.function
def load_image_train(datapoint: dict) -> tuple:
input_image = tf.image.resize(datapoint["image"], (imgr, imgc))
input_mask = tf.image.resize(datapoint["segmentation_mask"], (imgr, imgc))
if tf.random.uniform(()) > 0.5:
input_image = tf.image.flip_left_right(input_image)
input_mask = tf.image.flip_left_right(input_mask)
input_image, input_mask = normalise(input_image, input_mask)
return input_image, input_mask
#tf.function
def load_image_test(datapoint: dict) -> tuple:
input_image = tf.image.resize(datapoint["image"], (imgr, imgc))
input_mask = tf.image.resize(datapoint["segmentation_mask"], (imgr, imgc))
input_image, input_mask = normalise(input_image, input_mask)
return input_image, input_mask
## create datasets-------------------------------------------------------------
buff_size = 1000
dataset = {"train": train_dataset, "val": val_dataset}
# -- Train Dataset --#
dataset["train"] = dataset["train"]\
.map(load_image_train, num_parallel_calls = tf.data.experimental.AUTOTUNE)
dataset["train"] = dataset["train"].shuffle(buffer_size = buff_size,
seed = zeed)
dataset["train"] = dataset["train"].repeat()
dataset["train"] = dataset["train"].batch(bs)
dataset["train"] = dataset["train"].prefetch(buffer_size = AUTOTUNE)
#-- Validation Dataset --#
dataset["val"] = dataset["val"].map(load_image_test)
dataset["val"] = dataset["val"].repeat()
dataset["val"] = dataset["val"].batch(bs)
dataset["val"] = dataset["val"].prefetch(buffer_size = AUTOTUNE)
print(dataset["train"])
print(dataset["val"])
Now I wanted to use a weighted version of tf.keras.losses.SparseCategoricalCrossentropy for my model and I found this tutorial, which is rather similar to the example above.
However, they also offered a weighted version of the loss, using:
def add_sample_weights(image, label):
# The weights for each class, with the constraint that:
# sum(class_weights) == 1.0
class_weights = tf.constant([2.0, 2.0, 1.0])
class_weights = class_weights/tf.reduce_sum(class_weights)
# Create an image of `sample_weights` by using the label at each pixel as an
# index into the `class weights` .
sample_weights = tf.gather(class_weights, indices=tf.cast(label, tf.int32))
return image, label, sample_weights
and
weighted_model.fit(
train_dataset.map(add_sample_weights),
epochs=1,
steps_per_epoch=10)
I combined those approaches since the latter tutorial uses previously loaded data, while I want to draw the images from disc (not enough RAM to load all at once).
Resulting in the code from the first example (long code block above) followed by
def add_sample_weights(image, segmentation_mask):
class_weights = tf.constant(inv_weights, dtype = tf.float32)
class_weights = class_weights/tf.reduce_sum(class_weights)
sample_weights = tf.gather(class_weights,
indices = tf.cast(segmentation_mask, tf.int32))
return image, segmentation_mask, sample_weights
(inv_weights are my weights, an array of 30 float64 values) and
model.fit(dataset["train"].map(add_sample_weights),
epochs = 45, steps_per_epoch = np.ceil(N_img/bs),
validation_data = dataset["val"],
validation_steps = np.ceil(N_val/bs),
callbacks = cllbs)
When I run
dataset["train"].map(add_sample_weights).element_spec
as in the second example, I get an output that looks reasonable to me (similar to the one in the example):
Out[58]:
(TensorSpec(shape=(None, 512, 512, 3), dtype=tf.float32, name=None),
TensorSpec(shape=(None, 512, 512, 1), dtype=tf.float32, name=None),
TensorSpec(shape=(None, 512, 512, 1), dtype=tf.float32, name=None))
However, when I try to fit the model or run something like
a, b, c = dataset["train"].map(add_sample_weights).take(1)
I will receive the error mentioned above.
So far, I have found quite some questions regarding this error (e.g., a, b, c, d), however, they all talk of "embedding layers" and things I am not aware of using.
Where does this error come from and how can I solve it?
Picture tf.gather as a fancy way to do indexing. The error you get is akin to the following example in python:
>>> my_list = [1,2,3]
>>> my_list[3]
IndexError: list index out of range
If you want to use tf.gather, then the range of value of your indices should not be bigger than the dimension size of the Tensor you are willing to index.
In your case, in the call tf.gather(class_weights,indices = tf.cast(segmentation_mask, tf.int32)), with class_weights being a Tensor of dimension (30,), the range of values of segmentation_mask should be between 0 and 29. As far as I can tell from your data pipeline, segmentation_mask has a range of value between 0 and 255. The fix will be problem dependent.

How to modify elements of a Keras Tensor object

I am building a Convolution Neural Network in Keras that receives batch of images with dimensions (None, 256, 256, 1) and the output would be batches with size (None, 256, 256, 3). Now after the final layer output I want to add a layer that assigns values to some of the pixels in output layer based on a value condition on inputs. Here is what I tried:
The Function
def SetBoundaries(ins):
xi = ins[0]
xo = ins[1]
bnds = np.where(xi[:, :, :, 0] == 0)
bnds_s, bnds_i, bnds_j = bnds[0], bnds[1], bnds[2]
xo[bnds_s, bnds_i, bnds_j, 0] = 0
xo[bnds_s, bnds_i, bnds_j, 1] = 0
xo[bnds_s, bnds_i, bnds_j, 2] = 0
return xo
Keras model
def conv_res(inputs):
x0 = inputs
...
xc = conv_layer(xc, kernel_size=3, stride=1,
num_filters=3, name="Final_Conv")
# apply assignment function
xc = Lambda(SetBoundaries, name="assign_boundaries")([x0, xc])
return xc
Finally, the model is built using
def build_model(inputs):
xres = int(inputs.shape[1])
yres = int(inputs.shape[2])
cres = int(inputs.shape[3])
inputs = Input((xres, yres, cres))
outputs = UNet.conv_res(inputs)
model = keras.Model(inputs=inputs, outputs=outputs)
return model
However, when I run I get the error:
NotImplementedError: Cannot convert a symbolic Tensor (assign_boundaries/Equal:0) to a numpy array.
Everything works fine without the Lambda function. I understand the issue is assigning value to Tensor object but how can I achieve what I am after?
Thanks
np.where works with NumPy arrays, but the output from your model is a Tensorflow tensor. Try using tf.where, which is the same thing but for tf.Tensors.
I managed to make it work by changing the function to:
def SetBoundaries(ins):
xi = ins[0]
xo = ins[1]
xin = tf.broadcast_to(xi, tf.shape(xo))
mask = K.cast(tf.not_equal(xin, 0), dtype="float32")
xf = layers.Multiply()([mask, xo])
return xf

Solved: How to combine tf.gradients with tf.data.dataset and keras models

I'm trying to build a workflow that uses tf.data.dataset batches and an iterator. For performance reasons, I am really trying to avoid using the placeholder->feed_dict loop workflow.
The process I'm trying to implement involves grad-cam (which requires the gradient of the loss with respect to the final convolutional layer of a CNN) as an intermediate step, and ideally I'd like to be able to try it out on several Keras pre-trained models, including non-sequential ones like ResNet.
Most implementations of grad-cam that I've found rely on hand-crafting the CNN of interest in tensorflow. I found one implementation, https://github.com/jacobgil/keras-grad-cam, that is made for keras models, and following that example, I get
def safe_norm(x):
return x / tf.sqrt(tf.reduce_mean(x ** 2) + 1e-8)
vgg_ = VGG19()
dataset = tf.data.Dataset.from_tensor_slices((filenames))
#preprocessing...
it = dataset.make_one_shot_iterator()
files, batch = it.get_next()
conv5_4 = vgg_.layers[-6]
h_k, w_k, c_k = conv5_4.output.shape[1:]
vgg_model = Model(inputs=vgg_.input, outputs=vgg_.output)
conv_model = Model(inputs=vgg_.input, outputs=conv5_4.output)
probs = vgg_model(batch)
predicted_class = tf.argmax(probs, axis=-1)
layer_name = 'block5_conv4'
target_layer = lambda x: target_category_loss(x, predicted_class, n_categories)
x = Lambda(target_layer)(vgg_model.outputs[0])
model = Model(inputs=vgg_model.inputs[0], outputs=x)
loss = K.sum(model.output, axis=-1)
conv_output = [l for l in model.layers if l.name is layer_name][0].output
grads = Lambda(safe_norm)(K.gradients(loss, [conv_output])[0])
gradient_function = K.function([model.input], [conv_output, grads])
output, grads_val = gradient_function([batch])
weights = tf.reduce_mean(grads_val, axis = (1, 2))
cam = tf.ones([batch_size, h_k, w_k], dtype = tf.float32)
cam += tf.reduce_sum(output * tf.reshape(weights, [-1, 1, 1, weights.shape[-1]]), axis=-1)
cam = tf.squeeze(tf.image.resize_images(images=tf.expand_dims(cam, axis=-1), size=(224, 224)))
cam = tf.maximum(cam, 0)
heatmap = cam / tf.reshape(tf.reduce_max(cam, axis=[1, 2]), shape=[-1, 1, 1])
The problem is that gradient_function([batch]) returns a numpy array whose value is determined by the first batch, so that heatmap doesn't change with subsequent evaluations.
I've tried replacing K.function with a Model in various ways, but nothing seems to work. I usually end up either with an error suggesting that grads evaluates to None or that one model or another is expecting a feed_dict and not receiving one.
Is this code salvageable? Is there a better way to do this besides looping through the data several times (once to get all the grad-cams and then again once I have them) or using placeholders and feed_dicts?
Edit:
def safe_norm(x):
return x / tf.sqrt(tf.reduce_mean(x ** 2) + 1e-8)
vgg_ = VGG19()
dataset = tf.data.Dataset.from_tensor_slices((filenames))
#preprocessing...
it = dataset.make_one_shot_iterator()
files, batch = it.get_next()
conv5_4 = vgg_.layers[-6]
h_k, w_k, c_k = conv5_4.output.shape[1:]
vgg_model = Model(inputs=vgg_.input, outputs=vgg_.output)
conv_model = Model(inputs=vgg_.input, outputs=conv5_4.output)
probs = vgg_model(batch)
predicted_class = tf.argmax(probs, axis=-1)
layer_name = 'block5_conv4'
target_layer = lambda x: target_category_loss(x, predicted_class, n_categories)
x = Lambda(target_layer)(vgg_model.outputs[0])
model = Model(inputs=vgg_model.inputs[0], outputs=x)
loss = K.sum(model.output, axis=-1)
conv_output = [l for l in model.layers if l.name is layer_name][0].output
grads = Lambda(safe_norm)(K.gradients(loss, [conv_output])[0])
gradient_function = K.function([model.input], [conv_output, grads])
output, grads_val = gradient_function([batch])
weights = tf.reduce_mean(grads_val, axis = (1, 2))
cam = tf.ones([batch_size, h_k, w_k], dtype = tf.float32)
cam += tf.reduce_sum(output * tf.reshape(weights, [-1, 1, 1, weights.shape[-1]]), axis=-1)
cam = tf.squeeze(tf.image.resize_images(images=tf.expand_dims(cam, axis=-1), size=(224, 224)))
cam = tf.maximum(cam, 0)
heatmap = cam / tf.reshape(tf.reduce_max(cam, axis=[1, 2]), shape=[-1, 1, 1])
# other operations on heatmap and batch ...
# ...
output_function = K.function(model.input, [node1, ..., nodeN])
for batch in range(n_batches):
outputs1, ... , outputsN = output_function(batch)
Gives me the desired outputs for each batch.
Yes, K.function returns numpy arrays because it evaluates the symbolic computation in your graph. What I think you should do is to keep everything symbolic up to K.function, and after getting the gradients, perform all computations of the Grad-CAM weights and final saliency map using numpy.
Then you can iterate on your dataset, evaluate gradient_function on a new batch of data, and compute the saliency map.
If you want to keep everything symbolic, then you should not use K.function to produce the gradient function, but use the symbolic gradient (the output of K.gradient, without lambda) and convolutional feature maps (conv_output) and perform the saliency map computation on top of that, and then build a function (using K.function) that takes the model input, and outputs the saliency map.
Hope the explanation is enough.

Categories