I'm following Quantization aware training comprehensive guide and struggling with QAT for custom layers, working with tf=2.6.0, py=3.9.7.
Below is a toy example of my problem:
I wrote a simple custom layer that implements Conv2D
class MyConv(tf.keras.layers.Layer):
'''costume conv2d'''
def __init__(self, filt=1, name=None, **kwargs):
super(MyConv, self).__init__(name=name)
self.filt = filt
super(MyConv, self).__init__(**kwargs)
def get_config(self):
config = super().get_config().copy()
config.update({"filt": self.filt})
return config
def build(self, shape):
self.conv = tf.keras.layers.Conv2D(self.filt, 1, padding="same")
def call(self, input):
return self.conv(input)
I've created a small model with that layer, then recursively pass over its layers and annotates them using tfmot.guantization.keras.quantize_annotate_layer (each custom layer could have more custom sub-layers that needs to be quantized). Then I apply tfmot.quantization.keras.quantize_apply to the annotated model. The result model consists of all the quantized layers, except of my custom layer, that had not been quantized.
I'll note that when I'm replacing the custom layer MyConv with the code below, as in the comprehensive guide, the quantization works.
def MyConv(tf.keras.layers.Conv2D):
pass
Please help me solve this issue. Might be some issue with my QuantizeConfig?
Below is my full code:
import tensorflow as tf
import tensorflow_model_optimization as tfmot
class MyConv(tf.keras.layers.Layer):
'''costume conv2d'''
def __init__(self, filt=1, name=None, **kwargs):
super(MyConv, self).__init__(name=name)
self.filt = filt
super(MyConv, self).__init__(**kwargs)
def get_config(self):
config = super().get_config().copy()
config.update({"filt": self.filt})
return config
def build(self, shape):
self.conv = tfmot.quantization.keras.quantize_annotate_layer(tf.keras.layers.Conv2D(self.filt, 1, padding="same"))
def call(self, input):
return self.conv(input)
def get_toy_model():
input = tf.keras.Input((10, 10, 1), name='input')
x = tf.keras.layers.Conv2D(1, 3, padding="same")(input)
x = tf.keras.layers.ReLU()(x)
x = MyConv()(x)
for _ in range(2):
y = tf.keras.layers.Conv2D(1, 3, padding="same")(x)
y = tf.keras.layers.ReLU()(y)
out = tf.keras.layers.Conv2D(1, 3, padding="same")(y)
return tf.keras.Model(input, out, name='toy_Conv2D')
LastValueQuantizer = tfmot.quantization.keras.quantizers.LastValueQuantizer
MovingAverageQuantizer = tfmot.quantization.keras.quantizers.MovingAverageQuantizer
class DefaultCostumeQuantizeConfig(tfmot.quantization.keras.QuantizeConfig):
# Configure how to quantize weights.
def get_weights_and_quantizers(self, layer):
return []
# Configure how to quantize activations.
def get_activations_and_quantizers(self, layer):
return []
def set_quantize_weights(self, layer, quantize_weights):
pass
def set_quantize_activations(self, layer, quantize_activations):
pass
# Configure how to quantize outputs (may be equivalent to activations).
def get_output_quantizers(self, layer):
return [tfmot.quantization.keras.quantizers.MovingAverageQuantizer(num_bits=8, per_axis=False, symmetric=False, narrow_range=False)]
def get_config(self):
return {}
def recursive_depth_layers(layer):
for l in list(layer.__dict__.values()):
if isinstance(l, tf.keras.layers.Layer):
recursive_depth_layers(l)
if isinstance(l, (
tf.keras.layers.Dense, tf.keras.layers.Conv2D, tf.keras.layers.ReLU, tf.keras.layers.LeakyReLU, tf.keras.layers.Activation)):
ql = tfmot.quantization.keras.quantize_annotate_layer(l, DefaultCostumeQuantizeConfig())
ql._name += "_" + l.name
return ql
def apply_quantization(layer):
# regular layer
if isinstance(layer, (tf.keras.layers.Dense, tf.keras.layers.Conv2D, tf.keras.layers.ReLU, tf.keras.layers.LeakyReLU,tf.keras.layers.Activation)):
l = tfmot.quantization.keras.quantize_annotate_layer(layer, DefaultCostumeQuantizeConfig())
l._name += '_' + layer.name
return l
if layer.__module__ == "__main__":
# custom layer
recursive_depth_layers(layer)
l = tfmot.quantization.keras.quantize_annotate_layer(layer, DefaultCostumeQuantizeConfig())
l._name += '_' + layer.name
return l
return layer
model = get_toy_model()
model.summary()
annotated_model = tf.keras.models.clone_model(model, clone_function=apply_quantization)
annotated_model.summary()
quantize_scope = tfmot.quantization.keras.quantize_scope
with quantize_scope({'DefaultCostumeQuantizeConfig': DefaultCostumeQuantizeConfig, 'MyConv': MyConv}):
quant_aware_model = tfmot.quantization.keras.quantize_apply(annotated_model)
quant_aware_model._name += "_quant"
quant_aware_model.summary()
quant_aware_model.compile()
Related
This code runs fine to create a simple feed-forward neural Network. The layer (torch.nn.Linear) is assigned to the class variable by using self.
class MultipleRegression3L(torch.nn.Module):
def __init__(self, num_features):
super(MultipleRegression3L, self).__init__()
self.layer_1 = torch.nn.Linear(num_features, 16)
## more layers
self.relu = torch.nn.ReLU()
def forward(self, inputs):
x = self.relu(self.layer_1(inputs))
x = self.relu(self.layer_2(x))
x = self.relu(self.layer_3(x))
x = self.layer_out(x)
return (x)
def predict(self, test_inputs):
return self.forward(test_inputs)
However, when I tried to store the layer using the list:
class MultipleRegression(torch.nn.Module):
def __init__(self, num_features, params):
super(MultipleRegression, self).__init__()
number_of_layers = 3 if not 'number_of_layers' in params else params['number_of_layers']
number_of_neurons_in_each_layer = [16, 32, 16] if not 'number_of_neurons_in_each_layer' in params else params['number_of_neurons_in_each_layer']
activation_function = "relu" if not 'activation_function' in params else params['activation_function']
self.layers = []
v1 = num_features
for i in range(0, number_of_layers):
v2 = number_of_neurons_in_each_layer[i]
self.layers.append(torch.nn.Linear(v1, v2))
v1 = v2
self.layer_out = torch.nn.Linear(v2, 1)
if activation_function == "relu":
self.act_func = torch.nn.ReLU()
else:
raise Exception("Activation function %s is not supported" % (activation_function))
def forward(self, inputs):
x = self.act_func(self.layers[0](inputs))
for i in range(1, len(self.layers)):
x = self.act_func(self.layers[i](x))
x = self.layer_out(x)
return (x)
The two models do not behave the same way. What can be wrong here?
Pytorch needs to keep the graph of the modules in the model, so using a list does not work. Using self.layers = torch.nn.ModuleList() fixed the problem.
I am creating a series of custom Tensorflow (version 2.4.1) layers and am running into a problem where the model summary shows zero trainable parameters. Below is a series of examples showing how everything is fine until I add in the last custom layer.
Here are the imports and custom classes:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (BatchNormalization, Conv2D, Input, ReLU,
Layer)
class basic_conv_stack(Layer):
def __init__(self, filters, kernel_size, strides):
super(basic_conv_stack, self).__init__()
self.conv1 = Conv2D(filters, kernel_size, strides, padding='same')
self.bn1 = BatchNormalization()
self.relu = ReLU()
def call(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
return x
class basic_residual(Layer):
def __init__(self, filters, kernel_size, strides):
super(basic_residual, self).__init__()
self.bcs1 = basic_conv_stack(filters, kernel_size, strides)
self.bcs2 = basic_conv_stack(filters, kernel_size, strides)
def call(self, x):
x = self.bcs1(x)
x = self.bcs2(x)
return x
class basic_module(Layer):
def __init__(self, filters, kernel_size, strides):
super(basic_module, self).__init__()
self.res = basic_residual
self.args = (filters, kernel_size, strides)
def call(self, x):
for _ in range(4):
x = self.res(*self.args)(x)
return x
Now, if I do the following, everything works out ok and I get 300 trainable parameters:
input_layer = Input((128, 128, 3))
conv = basic_conv_stack(10, 3, 1)(input_layer)
model = Model(input_layer, conv)
print (model.summary())
Similarly, if I do the following, I get 1,230 trainable parameters:
input_layer = Input((128, 128, 3))
conv = basic_residual(10, 3, 1)(input_layer)
model = Model(input_layer, conv)
print (model.summary())
However, if I try the basic_module class, I get zero trainable parameters:
input_layer = Input((128, 128, 3))
conv = basic_module(10, 3, 1)(input_layer)
model = Model(input_layer, conv)
print (model.summary())
Does anyone know why this is happening?
Edit to add:
I discovered that the layers used in the call must be initialized in the class's init for things to work properly. So if I change the basic module to this:
class basic_module(Layer):
def __init__(self, filters, kernel_size, strides):
super(basic_module, self).__init__()
self.clayers = [basic_residual(filters, kernel_size, strides) for _ in range(4)]
def call(self, x):
for idx in range(4):
x = self.clayers[idx](x)
return x
Everything works fine. I don't know why this is the case, so I'll leave this question open in case someone can answer the why of this question.
You have to initialize the class instances with the required parameter such as filters, kernel_size, strides to the predefined base_mdoule. Also, note that these hyper-parameters are related to trainable weights properties.
# >>> a = basic_module
# >>> a __main__.basic_module
# >>> a = basic_module(10, 3, 1)
# >>> a
# >>> <__main__.basic_module at 0x7f6123eed510>
class basic_module(Layer):
def __init__(self, filters, kernel_size, strides):
super(basic_module, self).__init__()
self.res = basic_residual # < ---
self.args = (filters, kernel_size, strides)
def call(self, x):
for _ in range(4):
x = self.res(*self.args)(x)
return x
I am trying to implement StyleGAN with TensorFlow version 2 and I have no idea how to do an equalized learning rate. I tried to scale gradients this way:
def equalize_in_list(datalist):
for i in range(len(datalist)):
if (datalist[i] is list):
equalize_in_list(datalist[i])
else:
datalist[i] = datalist[i] * np.sqrt(2)/np.prod(datalist[i].shape)
return datalist
gen_grad = equalize_in_list(gen_grad)
disc_grad = equalize_in_list(disc_grad)
But it doesn't work correctly.
You can just create a custom layer.
class DenseEQ(Dense):
"""
Standard dense layer but includes learning rate equilization
at runtime as per Karras et al. 2017.
Inherits Dense layer and overides the call method.
"""
def __init__(self, **kwargs):
if 'kernel_initializer' in kwargs:
raise Exception("Cannot override kernel_initializer")
super().__init__(kernel_initializer=normal(0,1), **kwargs)
def build(self, input_shape):
super().build(input_shape)
# The number of inputs
n = np.product([int(val) for val in input_shape[1:]])
# He initialisation constant
self.c = np.sqrt(2/n)
def call(self, inputs):
output = K.dot(inputs, self.kernel*self.c) # scale kernel
if self.use_bias:
output = K.bias_add(output, self.bias, data_format='channels_last')
if self.activation is not None:
output = self.activation(output)
return output
And then create a model as you normally would... (But you'll have to specify its arguments explicitly e.g. : units=x. Positionaly arguments will not work)
model_in = Input(shape(12,))
x = DenseEq(name="whatever_1", units=16)(model_in)
x = LeakyRelu(0.2)(x)
x = DenseEq(name="whatever_2", units=1)(model_in)
model_out = LeakyRelu(0.2)(x)
model = Model(model_in, model_out)
You can do the same thing for a convolution.
class Conv2DEQ(Conv2D):
"""
Standard Conv2D layer but includes learning rate equilization
at runtime as per Karras et al. 2017.
Inherits Conv2D layer and overrides the call method, following
https://github.com/keras-team/keras/blob/master/keras/layers/convolutional.py
"""
def __init__(self, **kwargs):
if 'kernel_initializer' in kwargs:
raise Exception("Cannot override kernel_initializer")
super().__init__(kernel_initializer=normal(0,1), **kwargs)
def build(self, input_shape):
super().build(input_shape)
# The number of inputs
n = np.product([int(val) for val in input_shape[1:]])
# He initialisation constant
self.c = np.sqrt(2/n)
def call(self, inputs):
if self.rank == 2:
outputs = K.conv2d(
inputs,
self.kernel*self.c, # scale kernel
strides=self.strides,
padding=self.padding,
data_format=self.data_format,
dilation_rate=self.dilation_rate)
if self.use_bias:
outputs = K.bias_add(
outputs,
self.bias,
data_format=self.data_format)
if self.activation is not None:
return self.activation(outputs)
return outputs
I'm trying to implement CRelu layer in Keras
One option that seems work is to use Lambda layer:
def _crelu(x):
x = tf.nn.crelu(x, axis=-1)
return x
def _conv_bn_crelu(x, n_filters, kernel_size):
x = Conv2D(filters=n_filters, kernel_size=kernel_size, strides=(1, 1), padding='same')(x)
x = BatchNormalization(axis=-1)(x)
x = Lambda(_crelu)(x)
return x
But I wonder is Lamda layer introduce some overhead in training or inference process?
My second attemp is to create keras layer that is wrapper around tf.nn.crelu
class CRelu(Layer):
def __init__(self, **kwargs):
super(CRelu, self).__init__(**kwargs)
def build(self, input_shape):
super(CRelu, self).build(input_shape)
def call(self, x):
x = tf.nn.crelu(x, axis=-1)
return x
def compute_output_shape(self, input_shape):
output_shape = list(input_shape)
output_shape[-1] = output_shape[-1] * 2
output_shape = tuple(output_shape)
return output_shape
def _conv_bn_crelu(x, n_filters, kernel_size):
x = Conv2D(filters=n_filters, kernel_size=kernel_size, strides=(1, 1), padding='same')(x)
x = BatchNormalization(axis=-1)(x)
x = CRelu()(x)
return x
Which version will be more efficient?
Also looking forward for pure Keras implementation, if it's possible.
I don't think there is a significant difference between the two implementations speed-wise.
The Lambda implementation is the simplest actually but writing a custom Layer as you have done usually is better, especially for what regards model saving and loading (get_config method).
But in this case it doesn't matter as the CReLU is trivial and don't require saving and restoring parameters. You can store the axis parameter actually as in the code below. In this way it will be retrieved automatically when the model is loaded.
class CRelu(Layer):
def __init__(self, axis=-1, **kwargs):
self.axis = axis
super(CRelu, self).__init__(**kwargs)
def build(self, input_shape):
super(CRelu, self).build(input_shape)
def call(self, x):
x = tf.nn.crelu(x, axis=self.axis)
return x
def compute_output_shape(self, input_shape):
output_shape = list(input_shape)
output_shape[-1] = output_shape[-1] * 2
output_shape = tuple(output_shape)
return output_shape
def get_config(self, input_shape):
config = {'axis': self.axis, }
base_config = super(CReLU, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
Following this paper on domain adaptation, I am trying to implement the following layer for gradient reversal (written for Keras with the Theano backend, as found in this Keras issue) in Tensorflow, as my model does not run well with Theano.
class GradientReversalLayer(Layer):
""" Reverse a gradient
<feedforward> return input x
<backward> return -lambda * delta
"""
def __init__(self, hp_lambda, **kwargs):
super(GradientReversalLayer, self).__init__(**kwargs)
self.hp_lambda = hp_lambda
self.gr_op = ReverseGradient(self.hp_lambda)
def build(self, input_shape):
self.trainable_weights = []
def call(self, x, mask=None):
return self.gr_op(x)
def get_output_shape_for(self, input_shape):
return input_shape
def get_config(self):
config = {"name": self.__class__.__name__,
"lambda": self.hp_lambda}
base_config = super(GradientReversalLayer, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
The layer performs this operation:
import theano
from keras.engine import Layer
class ReverseGradient(theano.Op):
""" theano operation to reverse the gradients
Introduced in http://arxiv.org/pdf/1409.7495.pdf
"""
view_map = {0: [0]}
__props__ = ('hp_lambda', )
def __init__(self, hp_lambda):
super(ReverseGradient, self).__init__()
self.hp_lambda = hp_lambda
def make_node(self, x):
assert hasattr(self, '_props'), "Your version of theano is too old to support __props__."
x = theano.tensor.as_tensor_variable(x)
return theano.Apply(self, [x], [x.type()])
def perform(self, node, inputs, output_storage):
xin, = inputs
xout, = output_storage
xout[0] = xin
def grad(self, input, output_gradients):
return [-self.hp_lambda * output_gradients[0]]
def infer_shape(self, node, i0_shapes):
return i0_shapes
Why can I not use it like this?
If I run my model with the tf backend and with this function written in Theano I get the following error:
theano.tensor.var.AsTensorError: ('Cannot convert Tensor("concatenate_1/concat:0", shape=(?, ?, 128), dtype=float32) to TensorType', <class 'tensorflow.python.framework.ops.Tensor'>)
After calling it like this:
lstm_concat = concatenate([hidden_out_1, hidden_out_2])
lstm_concat = FlipGradientKeras.GradientReversalLayer(0.31)(lstm_concat)
How do I convert this operation to a TF operation?
The documentation about adding a new operation only suggests to implement it in C++.
The ops codes show the general framework, but I'd like to be sure that everything that I'm implementing everything that the Theano op does.
I would assume it would be something on the lines of:
def ReverseGradient(input_tensor, hp_lambda):
with ops.name_scope(name, "ReverseGradient", [input_tensor, hp_lambda]) as name:
input_tensor = ops.convert_to_tensor(input_tensor, name="input_tensor")
But I'm really not sure about the rest.
Thanks in advance!
I solved the problem by expanding on the work done here.
Here's the working code:
import tensorflow as tf
from keras.engine import Layer
import keras.backend as K
def reverse_gradient(X, hp_lambda):
'''Flips the sign of the incoming gradient during training.'''
try:
reverse_gradient.num_calls += 1
except AttributeError:
reverse_gradient.num_calls = 1
grad_name = "GradientReversal%d" % reverse_gradient.num_calls
#tf.RegisterGradient(grad_name)
def _flip_gradients(op, grad):
return [tf.negative(grad) * hp_lambda]
g = K.get_session().graph
with g.gradient_override_map({'Identity': grad_name}):
y = tf.identity(X)
return y
class GradientReversal(Layer):
'''Flip the sign of gradient during training.'''
def __init__(self, hp_lambda, **kwargs):
super(GradientReversal, self).__init__(**kwargs)
self.supports_masking = False
self.hp_lambda = hp_lambda
def build(self, input_shape):
self.trainable_weights = []
def call(self, x, mask=None):
return reverse_gradient(x, self.hp_lambda)
def get_output_shape_for(self, input_shape):
return input_shape
def get_config(self):
config = {}
base_config = super(GradientReversal, self).get_config()
return dict(list(base_config.items()) + list(config.items()))