I'm trying to implement CRelu layer in Keras
One option that seems work is to use Lambda layer:
def _crelu(x):
x = tf.nn.crelu(x, axis=-1)
return x
def _conv_bn_crelu(x, n_filters, kernel_size):
x = Conv2D(filters=n_filters, kernel_size=kernel_size, strides=(1, 1), padding='same')(x)
x = BatchNormalization(axis=-1)(x)
x = Lambda(_crelu)(x)
return x
But I wonder is Lamda layer introduce some overhead in training or inference process?
My second attemp is to create keras layer that is wrapper around tf.nn.crelu
class CRelu(Layer):
def __init__(self, **kwargs):
super(CRelu, self).__init__(**kwargs)
def build(self, input_shape):
super(CRelu, self).build(input_shape)
def call(self, x):
x = tf.nn.crelu(x, axis=-1)
return x
def compute_output_shape(self, input_shape):
output_shape = list(input_shape)
output_shape[-1] = output_shape[-1] * 2
output_shape = tuple(output_shape)
return output_shape
def _conv_bn_crelu(x, n_filters, kernel_size):
x = Conv2D(filters=n_filters, kernel_size=kernel_size, strides=(1, 1), padding='same')(x)
x = BatchNormalization(axis=-1)(x)
x = CRelu()(x)
return x
Which version will be more efficient?
Also looking forward for pure Keras implementation, if it's possible.
I don't think there is a significant difference between the two implementations speed-wise.
The Lambda implementation is the simplest actually but writing a custom Layer as you have done usually is better, especially for what regards model saving and loading (get_config method).
But in this case it doesn't matter as the CReLU is trivial and don't require saving and restoring parameters. You can store the axis parameter actually as in the code below. In this way it will be retrieved automatically when the model is loaded.
class CRelu(Layer):
def __init__(self, axis=-1, **kwargs):
self.axis = axis
super(CRelu, self).__init__(**kwargs)
def build(self, input_shape):
super(CRelu, self).build(input_shape)
def call(self, x):
x = tf.nn.crelu(x, axis=self.axis)
return x
def compute_output_shape(self, input_shape):
output_shape = list(input_shape)
output_shape[-1] = output_shape[-1] * 2
output_shape = tuple(output_shape)
return output_shape
def get_config(self, input_shape):
config = {'axis': self.axis, }
base_config = super(CReLU, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
Related
I am writing a custom layer using Keras that returns a tensors of zeros the first three times it is invoked and does nothing the other times. The code is the following
class MyLayer(tf.keras.layers.Layer):
def __init__(self, **kwargs):
super(MyLayer, self).__init__(**kwargs)
self.__iteration = 0
self.__returning_zeros = None
def build(self, input_shape):
self.__returning_zeros = tf.zeros(shape=input_shape, dtype=tf.float32)
def call(self, inputs):
self.__iteration += 1
if self.__iteration <= 3:
return self.__returning_zeros
else:
return inputs
Unfortunately if I try to build a model using this layer like this
def build_model(input_shape, num_classes):
input_layer = keras.Input(shape=input_shape, name='input')
conv1 = layers.Conv2D(32, kernel_size=(3, 3), activation="relu", name='conv1')(input_layer)
maxpool1 = layers.MaxPooling2D(pool_size=(2, 2), name='maxpool1')(conv1)
conv2 = layers.Conv2D(64, kernel_size=(3, 3), activation="relu", name='conv2')(maxpool1)
mylayer = MyLayer()(conv2)
maxpool2 = layers.MaxPooling2D(pool_size=(2, 2), name='maxpool2')(mylayer)
flatten = layers.Flatten(name='flatten')(maxpool2)
dropout = layers.Dropout(0.5, name='dropout')(flatten)
dense = layers.Dense(num_classes, activation="softmax", name='dense')(dropout)
return keras.Model(inputs=(input_layer,), outputs=dense)
I get the following error message
File "customlayerkeras.py", line 25, in build
self.__returning_zeros = tf.zeros(shape=input_shape, dtype=tf.float32)
ValueError: Cannot convert a partially known TensorShape (None, 13, 13, 64) to a Tensor.
Where it seems that, despite using the build function as suggested in the documentation I am not able to retrieve the correct shape of the input.
How can I fix this problem?
EDIT:
I was complicating the problem without thinking, the best solution is to just multiply the inputs per zero like this
def call(self, inputs):
self.__iteration += 1
if self.__iteration <= 3:
return inputs*0
else:
return inputs
Pretty sure you don't need the dimension of the batch, so you can do something like this:
class MyLayer(tf.keras.layers.Layer):
def __init__(self, **kwargs):
super(MyLayer, self).__init__(**kwargs)
self.__iteration = 0
self.__returning_zeros = None
def build(self, input_shape):
self.__returning_zeros = tf.zeros(shape=input_shape[1:], dtype=tf.float32)
def call(self, inputs):
self.__iteration += 1
if self.__iteration <= 3:
return inputs * self.__returning_zeros
# or like return tf.repeat(self.__returning_zeros[None,...], tf.shape(inputs)[0], axis=0)
else:
return inputs
I'm following Quantization aware training comprehensive guide and struggling with QAT for custom layers, working with tf=2.6.0, py=3.9.7.
Below is a toy example of my problem:
I wrote a simple custom layer that implements Conv2D
class MyConv(tf.keras.layers.Layer):
'''costume conv2d'''
def __init__(self, filt=1, name=None, **kwargs):
super(MyConv, self).__init__(name=name)
self.filt = filt
super(MyConv, self).__init__(**kwargs)
def get_config(self):
config = super().get_config().copy()
config.update({"filt": self.filt})
return config
def build(self, shape):
self.conv = tf.keras.layers.Conv2D(self.filt, 1, padding="same")
def call(self, input):
return self.conv(input)
I've created a small model with that layer, then recursively pass over its layers and annotates them using tfmot.guantization.keras.quantize_annotate_layer (each custom layer could have more custom sub-layers that needs to be quantized). Then I apply tfmot.quantization.keras.quantize_apply to the annotated model. The result model consists of all the quantized layers, except of my custom layer, that had not been quantized.
I'll note that when I'm replacing the custom layer MyConv with the code below, as in the comprehensive guide, the quantization works.
def MyConv(tf.keras.layers.Conv2D):
pass
Please help me solve this issue. Might be some issue with my QuantizeConfig?
Below is my full code:
import tensorflow as tf
import tensorflow_model_optimization as tfmot
class MyConv(tf.keras.layers.Layer):
'''costume conv2d'''
def __init__(self, filt=1, name=None, **kwargs):
super(MyConv, self).__init__(name=name)
self.filt = filt
super(MyConv, self).__init__(**kwargs)
def get_config(self):
config = super().get_config().copy()
config.update({"filt": self.filt})
return config
def build(self, shape):
self.conv = tfmot.quantization.keras.quantize_annotate_layer(tf.keras.layers.Conv2D(self.filt, 1, padding="same"))
def call(self, input):
return self.conv(input)
def get_toy_model():
input = tf.keras.Input((10, 10, 1), name='input')
x = tf.keras.layers.Conv2D(1, 3, padding="same")(input)
x = tf.keras.layers.ReLU()(x)
x = MyConv()(x)
for _ in range(2):
y = tf.keras.layers.Conv2D(1, 3, padding="same")(x)
y = tf.keras.layers.ReLU()(y)
out = tf.keras.layers.Conv2D(1, 3, padding="same")(y)
return tf.keras.Model(input, out, name='toy_Conv2D')
LastValueQuantizer = tfmot.quantization.keras.quantizers.LastValueQuantizer
MovingAverageQuantizer = tfmot.quantization.keras.quantizers.MovingAverageQuantizer
class DefaultCostumeQuantizeConfig(tfmot.quantization.keras.QuantizeConfig):
# Configure how to quantize weights.
def get_weights_and_quantizers(self, layer):
return []
# Configure how to quantize activations.
def get_activations_and_quantizers(self, layer):
return []
def set_quantize_weights(self, layer, quantize_weights):
pass
def set_quantize_activations(self, layer, quantize_activations):
pass
# Configure how to quantize outputs (may be equivalent to activations).
def get_output_quantizers(self, layer):
return [tfmot.quantization.keras.quantizers.MovingAverageQuantizer(num_bits=8, per_axis=False, symmetric=False, narrow_range=False)]
def get_config(self):
return {}
def recursive_depth_layers(layer):
for l in list(layer.__dict__.values()):
if isinstance(l, tf.keras.layers.Layer):
recursive_depth_layers(l)
if isinstance(l, (
tf.keras.layers.Dense, tf.keras.layers.Conv2D, tf.keras.layers.ReLU, tf.keras.layers.LeakyReLU, tf.keras.layers.Activation)):
ql = tfmot.quantization.keras.quantize_annotate_layer(l, DefaultCostumeQuantizeConfig())
ql._name += "_" + l.name
return ql
def apply_quantization(layer):
# regular layer
if isinstance(layer, (tf.keras.layers.Dense, tf.keras.layers.Conv2D, tf.keras.layers.ReLU, tf.keras.layers.LeakyReLU,tf.keras.layers.Activation)):
l = tfmot.quantization.keras.quantize_annotate_layer(layer, DefaultCostumeQuantizeConfig())
l._name += '_' + layer.name
return l
if layer.__module__ == "__main__":
# custom layer
recursive_depth_layers(layer)
l = tfmot.quantization.keras.quantize_annotate_layer(layer, DefaultCostumeQuantizeConfig())
l._name += '_' + layer.name
return l
return layer
model = get_toy_model()
model.summary()
annotated_model = tf.keras.models.clone_model(model, clone_function=apply_quantization)
annotated_model.summary()
quantize_scope = tfmot.quantization.keras.quantize_scope
with quantize_scope({'DefaultCostumeQuantizeConfig': DefaultCostumeQuantizeConfig, 'MyConv': MyConv}):
quant_aware_model = tfmot.quantization.keras.quantize_apply(annotated_model)
quant_aware_model._name += "_quant"
quant_aware_model.summary()
quant_aware_model.compile()
I am creating a series of custom Tensorflow (version 2.4.1) layers and am running into a problem where the model summary shows zero trainable parameters. Below is a series of examples showing how everything is fine until I add in the last custom layer.
Here are the imports and custom classes:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (BatchNormalization, Conv2D, Input, ReLU,
Layer)
class basic_conv_stack(Layer):
def __init__(self, filters, kernel_size, strides):
super(basic_conv_stack, self).__init__()
self.conv1 = Conv2D(filters, kernel_size, strides, padding='same')
self.bn1 = BatchNormalization()
self.relu = ReLU()
def call(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
return x
class basic_residual(Layer):
def __init__(self, filters, kernel_size, strides):
super(basic_residual, self).__init__()
self.bcs1 = basic_conv_stack(filters, kernel_size, strides)
self.bcs2 = basic_conv_stack(filters, kernel_size, strides)
def call(self, x):
x = self.bcs1(x)
x = self.bcs2(x)
return x
class basic_module(Layer):
def __init__(self, filters, kernel_size, strides):
super(basic_module, self).__init__()
self.res = basic_residual
self.args = (filters, kernel_size, strides)
def call(self, x):
for _ in range(4):
x = self.res(*self.args)(x)
return x
Now, if I do the following, everything works out ok and I get 300 trainable parameters:
input_layer = Input((128, 128, 3))
conv = basic_conv_stack(10, 3, 1)(input_layer)
model = Model(input_layer, conv)
print (model.summary())
Similarly, if I do the following, I get 1,230 trainable parameters:
input_layer = Input((128, 128, 3))
conv = basic_residual(10, 3, 1)(input_layer)
model = Model(input_layer, conv)
print (model.summary())
However, if I try the basic_module class, I get zero trainable parameters:
input_layer = Input((128, 128, 3))
conv = basic_module(10, 3, 1)(input_layer)
model = Model(input_layer, conv)
print (model.summary())
Does anyone know why this is happening?
Edit to add:
I discovered that the layers used in the call must be initialized in the class's init for things to work properly. So if I change the basic module to this:
class basic_module(Layer):
def __init__(self, filters, kernel_size, strides):
super(basic_module, self).__init__()
self.clayers = [basic_residual(filters, kernel_size, strides) for _ in range(4)]
def call(self, x):
for idx in range(4):
x = self.clayers[idx](x)
return x
Everything works fine. I don't know why this is the case, so I'll leave this question open in case someone can answer the why of this question.
You have to initialize the class instances with the required parameter such as filters, kernel_size, strides to the predefined base_mdoule. Also, note that these hyper-parameters are related to trainable weights properties.
# >>> a = basic_module
# >>> a __main__.basic_module
# >>> a = basic_module(10, 3, 1)
# >>> a
# >>> <__main__.basic_module at 0x7f6123eed510>
class basic_module(Layer):
def __init__(self, filters, kernel_size, strides):
super(basic_module, self).__init__()
self.res = basic_residual # < ---
self.args = (filters, kernel_size, strides)
def call(self, x):
for _ in range(4):
x = self.res(*self.args)(x)
return x
I want create a layer Linear like y = w*x+b, with w is matrix[Mxd], x is matrix[dx1], b is bias as matrix[Mx1], with M is the units in a layer, d is dim of vector x.
my code like:
class Linear(keras.layers.Layer):
def __init__(self, units=32):
super(Linear, self).__init__()
self.units = units
print(self.units)
def build(self, input_shape):
self.w = self.add_weight(
shape=( self.units, input_shape[1]),
initializer="random_normal",
trainable=True,
)
self.b = self.add_weight(
shape=(self.units,1), initializer="random_normal", trainable=True
)
print(input_shape[1])
def call(self, inputs):
return tf.matmul(self.w, inputs) + self.b
def get_output_shape_for(self, input_shape):
return (self.units, 1)
And when i create model i use:
input_tensor1 = Input((2,1))
L11 = Linear(32)(input_tensor1)
model = Model(inputs=input_tensor1, outputs=L11)
model.summary()
In this code d=2 and M=32
As i think y =w*x+b, y will be a matrix[Mx1] cause w is [Mxd], x is [dx1] and b is [Mx1]. But when model summary, output_shape is (None, 2, 1)
Can anyone explain it? where my code wrong?
Here's my code
#Here's my custom GRU layer
class GRU(keras.layers.Layer):
def __init__(self, units, **kwargs):
super(GRU, self).__init__(**kwargs)
self.units = units
self.h_init = False
def build(self, input_shape):
dims = input_shape[-1]
self.steps = input_shape[-2]
self.wr = self.add_weight(name="wr", shape=[dims+self.units, self.units], initializer="glorot_normal")
self.br = self.add_weight(name="br", shape=[self.units], initializer="zeros")
self.wz = self.add_weight(name="wz", shape=[dims+self.units, self.units], initializer="glorot_normal")
self.bz = self.add_weight(name="bz", shape=[self.units], initializer="zeros")
self.wn = self.add_weight(name="wn", shape=[dims+self.units, self.units], initializer="glorot_normal")
self.bn = self.add_weight(name="bn", shape=[self.units], initializer="zeros")
super(GRU, self).build(input_shape)
def dense_forward(self, w, b, x, activation):
return activation(x # w + b)
def update_step(self, x):
self.z.assign(self.dense_forward(self.wz, self.bz, tf.concat([x, self.h], axis=1), tf.nn.sigmoid))
self.r.assign(self.dense_forward(self.wr, self.br, tf.concat([x, self.h], axis=1), tf.nn.sigmoid))
self.n.assign(self.dense_forward(self.wn, self.bn, tf.concat([x, self.r * self.h], axis=1), tf.nn.tanh))
# z = tf.nn.sigmoid(self.wz # tf.concat([x, self.h], axis=1) + self.bz)
# r = tf.nn.sigmoid(self.wr # tf.concat([x, self.h], axis=1) + self.br)
# n = tf.nn.tanh(self.wn # tf.concat([x, r * self.h], axis=1)+ self.bn)
self.h = (1 - self.z) * self.h + self.z * self.n
def call(self, x):
if not self.h_init:
initializer = tf.zeros_initializer()
self.h = initializer(shape=(x.shape[0], self.units))
self.z = tf.Variable(initializer(shape=[x.shape[0], self.units], dtype=tf.float32))
self.r = tf.Variable(initializer(shape=[x.shape[0], self.units], dtype=tf.float32))
self.n = tf.Variable(initializer(shape=[x.shape[0], self.units], dtype=tf.float32))
self.h_init = True
assert x.shape[1] == self.steps
for step in tf.range(self.steps):
self.update_step(x[:, step, :])
return self.h
def compute_output_shape(self, input_shape):
return tf.TensorShape([None, self.units])
def get_config(self):
config = super(GRU, self).get_config()
config.update({'units': self.units})
return config
#Here's my custom dense layer
class Dense(keras.layers.Layer):
def __init__(self, units, activation=None, **kwargs):
super().__init__(**kwargs)
self.units = units
self.activation = keras.activations.get(activation)
def build(self, batch_input_shape):
self.kernel = self.add_weight(
name="kernel", shape=[batch_input_shape[-1], self.units],
initializer="glorot_normal")
self.bias = self.add_weight(
name="bias", shape=[self.units], initializer="zeros")
super().build(batch_input_shape)
def call(self, X):
return self.activation(X # self.kernel + self.bias)
def compute_output_shape(self, batch_input_shape):
return tf.TensorShape(batch_input_shape.as_list()[:-1] + [self.units])
def get_config(self):
base_config = super().get_config()
return {**base_config, "units": self.units,
"activation": keras.activations.serialize(self.activation)}
'''
gru = GRU(units=32)
gru.build(input_shape=(None, 128, 2))
gru.compute_output_shape(input_shape=(None, 128, 2))
dense = Dense(1)
dense.build((None, 32, 1))
dense.compute_output_shape(batch_input_shape=tf.TensorShape([None, 32, 1]))
'''
#Here's the model to use my custom GRU for regression
class GRUReg(keras.models.Model):
def __init__(self, units, **kwargs):
super(GRUReg, self).__init__(**kwargs)
self.gru = GRU(units=units)
self.dense = Dense(1)
def build(self, input_shape):
self.gru.build(input_shape)
self.dense.build(self.gru.compute_output_shape(input_shape))
def call(self, x):
x = self.gru(x)
return self.dense(x)
Here's the training codes
model = GRUReg(32)
model.build([None, 128, 2])
model(tf.ones((2, 128, 2)))
model.compile(optimizer=keras.optimizers.RMSprop(), loss='mae', metrics=['mae'])
reduce_lr = keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2,
patience=5, min_lr=0.001)
Then fit
history = model.fit(X_train, y_train, epochs=100, validation_split=0.2, callbacks=[reduce_lr])
An error occurs in the model.fit line
Here's the Error
:13 call *
x = self.gru(x)
D:\Anaconda3\envs\tf2\lib\site-packages\tensorflow_core\python\keras\engine\base_layer.py:776 __call__
outputs = base_layer_utils.mark_as_return(outputs, acd)
D:\Anaconda3\envs\tf2\lib\site-packages\tensorflow_core\python\keras\engine\base_layer_utils.py:639 mark_as_return
return nest.map_structure(_mark_as_return, outputs)
D:\Anaconda3\envs\tf2\lib\site-packages\tensorflow_core\python\util\nest.py:568 map_structure
structure[0], [func(*x) for x in entries],
D:\Anaconda3\envs\tf2\lib\site-packages\tensorflow_core\python\util\nest.py:568
structure[0], [func(*x) for x in entries],
D:\Anaconda3\envs\tf2\lib\site-packages\tensorflow_core\python\keras\engine\base_layer_utils.py:625 _mark_as_return
return_tensor = acd.mark_as_return(tensor)
D:\Anaconda3\envs\tf2\lib\site-packages\tensorflow_core\python\framework\auto_control_deps.py:167 mark_as_return
tensor = array_ops.identity(tensor)
D:\Anaconda3\envs\tf2\lib\site-packages\tensorflow_core\python\util\dispatch.py:180 wrapper
return target(*args, **kwargs)
D:\Anaconda3\envs\tf2\lib\site-packages\tensorflow_core\python\ops\array_ops.py:267 identity
ret = gen_array_ops.identity(input, name=name)
D:\Anaconda3\envs\tf2\lib\site-packages\tensorflow_core\python\ops\gen_array_ops.py:3829 identity
"Identity", input=input, name=name)
D:\Anaconda3\envs\tf2\lib\site-packages\tensorflow_core\python\framework\op_def_library.py:742 _apply_op_helper
attrs=attr_protos, op_def=op_def)
D:\Anaconda3\envs\tf2\lib\site-packages\tensorflow_core\python\framework\func_graph.py:591 _create_op_internal
inp = self.capture(inp)
D:\Anaconda3\envs\tf2\lib\site-packages\tensorflow_core\python\framework\func_graph.py:641 capture
% (tensor, tensor.graph, self))
InaccessibleTensorError: The tensor 'Tensor("add_4:0", shape=(2, 32), dtype=float32)' cannot be accessed here: it is defined in another function or code block. Use return values, explicit Python locals or TensorFlow collections to access it. Defined in: FuncGraph(name=gru_reg_6_gru_11_while_body_19141, id=2570529255624); accessed from: FuncGraph(name=keras_graph, id=2566523294088).
I wrote the code out of the similar codes from a tf2 book which made me more baffled.
Them I googled the error but cannot find an appropriate solution to the Error...
My OS:win10
Tensorflow's Version:'2.1.0'
Python: '3.7'
CudaToolKit:10.1
Graphics Card:GTX1660Ti