I want to perform some transformation on the input batch during training. For example, if I have a batch of images of size (number of samples, width, height, channels), I want to replace the 3rd channel with the difference of the first two channels, then resize the image size and finally normalize it.
I tried to define a custom layer:
class CustomLayer(tf.keras.layers.Layer):
def __init__(self):
super(CustomLayer, self).__init__()
def build(self, input_shape):
pass
def call(self, input_):
#Loaded images
self.img_tr = []
for image in input_:
img_input = resize(image,(267,400))#from skimage import resize
img_diff = (img_input[:,:,1]/np.max(img_input[:,:,1]))-((img_input[:,:,0]+img_input[:,:,2])/np.max(img_input[:,:,0]+img_input[:,:,2]))
img_temp = np.zeros((267,400,3))
img_temp[:,:,0] = img_input[:,:,0]/np.max(img_input[:,:,0])
img_temp[:,:,1] = img_input[:,:,1]/np.max(img_input[:,:,1])
img_temp[:,:,2] = img_diff/np.max(img_diff)
self.img_tr.append(img_temp)
self.img_tr= np.asarray(self.img_tr)
return self.img_tr
Then I used:
input_0 = tf.keras.Input(shape = (None,None,3))
clayer = CustomLayer()
input_1 = clayer(input_0)
x = tf.keras.layers.Conv2D(filters = 16, kernel_size = (7,7), activation = tf.keras.activations.relu)(input_1)
x = tf.keras.layers.MaxPool2D(pool_size = (2,2))(x)
x = tf.keras.layers.Flatten()(x)
x = tf.keras.layers.Dense(units = 64, activation = tf.keras.activations.relu)(x)
output = tf.keras.layers.Dense(units = 12)(x)
model = tf.keras.Model(inputs = input_0, outputs = output)
model.compile(
optimizer = tf.keras.optimizers.Adam(),
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits = True),
metrics = tf.keras.metrics.SparseCategoricalAccuracy()
)
model.summary()
I get an error that says:
AttributeError: 'Tensor' object has no attribute 'ndim'
I think the issue related to the fact that my custom layer expects a 4d numpy array but the input is having this format:
<KerasTensor: shape=(None, None, None, 3) dtype=float32 (created by layer 'input_20')>
How can I resolve the issue? I cannot find a way to convert KerasTensor to a numpy array inside my custom layer.
Edit
I tried to avoid for loops and numpy so I tried:
class CustomLayer(tf.keras.layers.Layer):
def __init__(self):
super(CustomLayer, self).__init__()
def build(self, input_shape):
pass
def call(self, input_):
input_ = tf.Variable(input_)
img_input = tf.image.resize(input_,(267,400))
img_diff = (img_input[:,:,:,1])-((img_input[:,:,:,0]+img_input[:,:,:,2]))
img_input[:,:,:,2] = img_diff
output_img = tf.image.per_image_standardization(img_input)
return input_
However, when I use the custom layer in the functional API I get the error:
ValueError: Tensor-typed variable initializers must either be wrapped in an init_scope or callable (e.g., `tf.Variable(lambda : tf.truncated_normal([10, 40]))`) when building functions. Please file a feature request if this restriction inconveniences you.
It seems has something to do with tf.Variable. Even if I set validate_shape to False I still get the same error.
Simply removing tf.Variable does the job. Below is the full layer:
class CustomLayer(tf.keras.layers.Layer):
def __init__(self):
super(CustomLayer, self).__init__()
def build(self, input_shape):
pass
def call(self, inp):
img_input = tf.image.resize(inp, (267,400))
img_diff = (img_input[:,:,:,1])-((img_input[:,:,:,0]+img_input[:,:,:,2]))
img_diff = tf.expand_dims(img_diff, -1)
img_input = tf.keras.layers.Concatenate()([img_input[:,:,:,:-1], img_diff])
output_img = tf.image.per_image_standardization(img_input)
return output_img
I used tf.keras.layers.Concatenate to replace the last channel of img_input with img_diff.
Here the running notebook
Related
How to change the out_features of densenet121 model?
I am using the code below to train the model:
from torch.nn.modules.dropout import Dropout
class Densnet121(nn.Module):
def __init__(self):
super(Densnet121, self).__init__()
self.cnn1 = nn.Conv2d(in_channels=3 , out_channels=64 , kernel_size=3 , stride=1 )
self.Densenet_121 = models.densenet121(pretrained=True)
self.gap = AvgPool2d(kernel_size=2, stride=1, padding=1)
self.bn1 = nn.BatchNorm2d(1024)
self.do1 = nn.Dropout(0.25)
self.linear = nn.Linear(256,256)
self.bn2 = nn.BatchNorm2d(256)
self.do2 = nn.Dropout(0.25)
self.output = nn.Linear(64 * 64 * 64,2)
self.act = nn.ReLU()
def densenet(self):
for param in self.Densenet_121.parameters():
param.requires_grad = False
self.Densenet_121.classifier = nn.Linear(1024, 1024)
return self.Densenet_121
def forward(self, x):
img = self.act(self.cnn1(x))
img = self.densenet(img)
img = self.gap(img)
img = self.bn1(img)
img = self.do1(img)
img = self.linear(img)
img = self.bn2(img)
img = self.do2(img)
img = torch.flatten(img, 1)
img = self.output(img)
return img
When training this model, I face the following error:
RuntimeError: Given groups=1, weight of size [64, 3, 7, 7], expected input[64, 64, 62, 62] to have 3 channels, but got 64 channels instead
Your first conv layer outputs a tensor of shape (b, 64, h, w) while the following layer, the densenet model expects 3 channels. Hence the error that was raised:
"expected input [...] to have 3 channels, but got 64 channels instead"
Unfortunately, this value is hardcoded in the source of the Densenet class, see reference.
One workaround however is to overwrite the first convolutional layer after the densenet has been initialized. Something like this should work:
# First gather the conv layer specs
conv = self.Densenet_121.features.conv0
kwargs = {k: getattr(conv, k) for k in
('out_channels', 'stride', 'kernel_size', 'padding', 'bias')}
# overwrite with identical specs with new in_channels
model.features.conv0 = nn.Conv2d(in_channels=64, **kwargs)
Alternatively, you can do:
w = model.features.conv0.weight
w.data = torch.rand(len(w), 64, *w.shape[:2])
Which replaces the underlying convolutional layer weight without affecting its metadata (eg. conv.in_channels remains equal to 3), this could have side effects. So I would recommend following the first approach.
I'm trying to load a Keras RNN model which has a custom RNN_Cell here's my model code:
class RLSCell(keras.layers.Layer):
def __init__(self, units, filters, kernel, **kwargs):
self.units = filters
self.state_size = None
self.conv3d_1 = layers.Conv3D(filters, kernel, activation='relu', padding = 'SAME')
self.conv3d_2 = layers.Conv3D(1, kernel, activation='linear', padding = 'SAME')
super(RLSCell, self).__init__(**kwargs)
def build(self, input_shapes):
self.state_size = tf.TensorShape([input_shapes[1], input_shapes[2], input_shapes[3], input_shapes[4]])
def call(self, inputs, states):
prev_output = states[0]
conc = tf.concat([inputs, prev_output] ,axis = 4)
conv = self.conv3d_1(conc)
output = self.conv3d_2(conv)
return output, [output]
cell = RLSCell(img_size, 32, 3)
layer = layers.RNN(cell)
y = layer(inp, initial_state=x)
for saving the model data I used keras predefined tf.keras.callbacks.ModelCheckpoint function.
here's my code for loading the model:
model = tf.keras.models.load_model('RLS results/checkpoint ep11-loss0.02',
custom_objects = {
'RLSCell': RLSCell,
'RNN': layers.RNN(RLSCell(IMG_SIZE, 32, 3))})
no matter whether I write RNN attribute or not I get the following error:
RuntimeError: Unable to restore object of class 'RNN' likely due to name conflict with built-in Keras class '<class 'keras.layers.recurrent.RNN'>'.
To override the built-in Keras definition of the object, decorate your class with `#keras.utils.register_keras_serializable` and include that file in your program,
or pass your class in a `keras.utils.CustomObjectScope` that wraps this load call
I'm not sure how to tell it this is the actual keras RNN code and just the cell is different. please Help.
thanks in advance.
I’m getting the runtime error when trying to Sequentialise my linear layer with a DINO backbone from torch.hub.
import torch
import torch.distributed as dist
class LinearClassifier(torch.nn.Module):
def __init__(self, dim, num_labels=1000):
super(LinearClassifier, self).__init__()
self.num_labels = num_labels
self.linear = torch.nn.Linear(dim, num_labels)
self.linear.weight.data.normal_(mean=0.0, std=0.01)
self.linear.bias.data.zero_()
def forward(self, x):
# flatten
x = x.view(x.size(0), -1)
# linear layer
return self.linear(x)
dist.init_process_group('gloo', init_method='file:///tmp/somefile', rank=0, world_size=1)
# load backbone
model = torch.hub.load('facebookresearch/dino:main', 'dino_vits8')
#Setup linear layer
linear_classifier = LinearClassifier(1536, 1000)
linear_classifier = linear_classifier.cuda()
linear_classifier = torch.nn.parallel.DistributedDataParallel(linear_classifier)
state_dict = torch.hub.load_state_dict_from_url(url="https://dl.fbaipublicfiles.com/dino/dino_deitsmall8_pretrain/dino_deitsmall8_linearweights.pth")['state_dict']
linear_classifier.load_state_dict(state_dict, strict=True)
#Sequentialise
model = torch.nn.Sequential(model,
linear_classifier)
x = torch.ones((1, 3, 224, 224))
out = model(x)
print("out: " + out)
Here is the print of the last layers of my sequentialised model:
last layers printed
It loos lke the output of the model(x) (as defined by model = torch.hub...) has shape 1 x 384, but your linear_classifier expects something of shape _ x 1536 which is why you'll get this error. So you can just adjust that number of inputs by setting
linear_classifier = LinearClassifier(384, 1000)
I'm trying to follow this GradCam Tutorial with my own model. Here is its architecture:
import tensorflow as tf
from tensorflow import keras as K
import numpy as np
class CNNModel(K.Model):
def __init__(self):
super(CNNModel, self).__init__()
self.base = K.applications.EfficientNetB1(input_shape=(224, 224, 12),
include_top=False,
weights=None)
self.pool = K.layers.GlobalAveragePooling2D()
self.drop1 = K.layers.Dropout(0.25)
self.dense1 = K.layers.Dense(16, activation='relu')
self.drop2 = K.layers.Dropout(0.25)
self.out = K.layers.Dense(1, activation='sigmoid')
def call(self, x, training=None, **kwargs):
x = self.base(x)
x = self.pool(x)
x = self.drop1(x)
x = self.dense1(x)
x = self.drop2(x)
x = self.out(x)
return x
model = CNNModel()
model.build(input_shape=(None, 224, 224, 12))
I need to get the last convolutional layer, so I'm getting the one from the base (EfficientNet) model:
last_conv_layer_name = list(filter(lambda x: isinstance(x, tf.keras.layers.Conv2D), model.base.layers))[-1].name
Then I'm trying to make a 2 output model based on that, just like in the tutorial.
grad_model = tf.keras.models.Model(
[model.base.inputs], [model.base.get_layer(last_conv_layer_name).output, model.output]
)
I'm getting:
AttributeError: Layer cnn_model has no inbound nodes
I faced a similar issue regarding the subclassed API model and further trying to use it in grad-cam by incorporating it into functional API. Later, the thing that worked for me that time was to build a subclassed model separately for grad-cam either and build desired output model in __init__.
class CNNModel(K.Model):
def __init__(self):
super(CNNModel, self).__init__()
self.base = K.applications.EfficientNetB1(input_shape=(224, 224, 12),
include_top=False,
weights=None)
# desired model
self.base = K.Model(
[self.base.inputs],
[self.base.get_layer('top_conv').output, self.base.output]
)
self.pool = K.layers.GlobalAveragePooling2D()
self.drop1 = K.layers.Dropout(0.25)
self.dense1 = K.layers.Dense(16, activation='relu')
self.drop2 = K.layers.Dropout(0.25)
self.out = K.layers.Dense(1, activation='sigmoid')
def call(self, x, training=None, **kwargs):
x = self.base(x)
top_conv = x[0]
x = x[1]
x = self.pool(x)
x = self.drop1(x)
x = self.dense1(x)
x = self.drop2(x)
x = self.out(x)
return top_conv, x
model = CNNModel()
model.build(input_shape=(None, 224, 224, 12))
Passing some data to check.
img_array = np.random.rand(1, 224, 224, 12).astype(np.float32)
(convOutputs, predictions) = model(img_array)
print(convOutputs.shape, predictions.shape)
(1, 7, 7, 1280) (1, 1)
I'm trying to follow this tutrial https://colab.research.google.com/github/tensorflow/examples/blob/master/community/en/transformer_chatbot.ipynb, However, when I tried to save the model in order to load it again without training I got an error mentioned here NotImplementedError: Layers with arguments in `__init__` must override `get_config`
I understood from the answer that I need to make the encoder and decoder as classes and customise it(instead of leaving it as functions like the colab tutrial) so I went back to tensor flow documentation of this model here: https://www.tensorflow.org/tutorials/text/transformer#encoder_layer and tried to edit in it. I made the encoder layer as:
class EncoderLayer(tf.keras.layers.Layer):
def __init__(self, d_model, num_heads, rate=0.1,**kwargs,):
#super(EncoderLayer, self).__init__()
super().__init__(**kwargs)
self.mha = MultiHeadAttention(d_model, num_heads)
self.ffn = point_wise_feed_forward_network(d_model, dff)
self.layernorm1 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
self.layernorm2 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
self.dropout1 = tf.keras.layers.Dropout(rate)
self.dropout2 = tf.keras.layers.Dropout(rate)
def get_config(self):
config = super().get_config().copy()
config.update({
#'vocab_size': self.vocab_size,
#'num_layers': self.num_layers,
#'units': self.units,
'd_model': self.d_model,
'num_heads': self.num_heads,
'dropout': self.dropout,
})
return config
def call(self, x, training, mask):
attn_output, _ = self.mha(x, x, x, mask) # (batch_size, input_seq_len, d_model)
attn_output = self.dropout1(attn_output, training=training)
out1 = self.layernorm1(x + attn_output) # (batch_size, input_seq_len, d_model)
ffn_output = self.ffn(out1) # (batch_size, input_seq_len, d_model)
ffn_output = self.dropout2(ffn_output, training=training)
out2 = self.layernorm2(out1 + ffn_output) # (batch_size, input_seq_len, d_model)
return out2
and same for the decoder layer class. Then the same encoder in the documentation of tf
class Encoder(tf.keras.layers.Layer):
def __init__(self, num_layers, d_model, num_heads, dff, input_vocab_size,
maximum_position_encoding, rate=0.1):
super(Encoder, self).__init__()
self.d_model = d_model
self.num_layers = num_layers
self.embedding = tf.keras.layers.Embedding(input_vocab_size, d_model)
self.pos_encoding = positional_encoding(maximum_position_encoding,
self.d_model)
self.enc_layers = [EncoderLayer(d_model, num_heads, dff, rate)
for _ in range(num_layers)]
self.dropout = tf.keras.layers.Dropout(rate)
def call(self, x, training, mask):
seq_len = tf.shape(x)[1]
# adding embedding and position encoding.
x = self.embedding(x) # (batch_size, input_seq_len, d_model)
x *= tf.math.sqrt(tf.cast(self.d_model, tf.float32))
x += self.pos_encoding[:, :seq_len, :]
x = self.dropout(x, training=training)
for i in range(self.num_layers):
x = self.enc_layers[i](x, training, mask)
return x # (batch_size, input_seq_len, d_model)
the function of the model as:
def transformer(vocab_size,
num_layers,
units,
d_model,
num_heads,
dropout,
name="transformer"):
inputs = tf.keras.Input(shape=(None,), name="inputs")
dec_inputs = tf.keras.Input(shape=(None,), name="dec_inputs")
enc_padding_mask = tf.keras.layers.Lambda(
create_padding_mask, output_shape=(1, 1, None),
name='enc_padding_mask')(inputs)
# mask the future tokens for decoder inputs at the 1st attention block
look_ahead_mask = tf.keras.layers.Lambda(
create_look_ahead_mask,
output_shape=(1, None, None),
name='look_ahead_mask')(dec_inputs)
# mask the encoder outputs for the 2nd attention block
dec_padding_mask = tf.keras.layers.Lambda(
create_padding_mask, output_shape=(1, 1, None),
name='dec_padding_mask')(inputs)
enc_outputs = Encoder(
num_layers=num_layers, d_model=d_model, num_heads=num_heads,
input_vocab_size=vocab_size,
)(inputs=[inputs, enc_padding_mask])
dec_outputs = Decoder(
num_layers=num_layers, d_model=d_model, num_heads=num_heads,
target_vocab_size=vocab_size,
)(inputs=[dec_inputs, enc_outputs, look_ahead_mask, dec_padding_mask])
outputs = tf.keras.layers.Dense(units=vocab_size, name="outputs")(dec_outputs)
return tf.keras.Model(inputs=[inputs, dec_inputs], outputs=outputs, name=name)
and calling the model:
#the model itself with its paramters:
# Hyper-parameters
NUM_LAYERS = 3
D_MODEL = 256
#D_MODEL=tf.cast(D_MODEL, tf.float32)
NUM_HEADS = 8
UNITS = 512
DROPOUT = 0.1
model = transformer(
vocab_size=VOCAB_SIZE,
num_layers=NUM_LAYERS,
units=UNITS,
d_model=D_MODEL,
num_heads=NUM_HEADS,
dropout=DROPOUT)
However, I got that error:
TypeError: __init__() missing 2 required positional arguments: 'dff' and 'maximum_position_encoding'
I am really confused and I don't understand what dff and maximum position encoding mean in the documentation and when I removed them from the encoder and decoder classes, I got anther error as positional_encoding function takes maximum position as input and also dff is passed as input inside the class. I am not so sure what I should do as I am not sure whether I am following the right steps or not
If you get this error while calling transformer then your problem is with creating the model, not saving it.
Other than that, I see several issues with your get_config:
You defined dropout instead of rate.
The attributes you address (self.d_model etc.) are not defined or assigned at __init__.
It doesn't exist for your Encoder class.