How to implement current pytorch activation functions with parameters? - python

I am looking for a simple way to use an activation function which exist in the pytorch library, but using some sort of parameter. for example:
Tanh(x/10)
The only way I came up with looking for solution was implementing the custom function completely from scratch. Is there any better/more elegant way to do this?
edit:
I am looking for some way to append to my model the function Tanh(x/10) rather than plain Tanh(x). Here is the relevant code block:
self.model = nn.Sequential()
for i in range(len(self.layers)-1):
self.model.add_module("linear_layer_" + str(i), nn.Linear(self.layers[i], self.layers[i + 1]))
if activations == None:
self.model.add_module("activation_" + str(i), nn.Tanh())
else:
if activations[i] == "T":
self.model.add_module("activation_" + str(i), nn.Tanh())
elif activations[i] == "R":
self.model.add_module("activation_" + str(i), nn.ReLU())
else:
#no activation
pass

Instead of defining it as a specific function, you could inline it in a custom layer.
For instance your solution could look like:
import torch
import torch.nn as nn
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.fc1 = nn.Linear(4, 10)
self.fc2 = nn.Linear(10, 3)
self.fc3 = nn.Softmax()
def forward(self, x):
return self.fc3(self.fc2(torch.tanh(self.fc1(x)/10)))
where torch.tanh(output/10) is inlined in the forward function of your module.

You can create a layer with the multiplying parameter:
import torch
import torch.nn as nn
class CustomTanh(nn.Module):
#the init method takes the parameter:
def __init__(self, multiplier):
self.multiplier = multiplier
#the forward calls it:
def forward(self, x):
x = self.multiplier * x
return torch.tanh(x)
Add it to your models with CustomTanh(1/10) instead of nn.Tanh().

Related

python class call function without indicate function name

I was learning pytorch and I encountered a case I could not understand what's happening. Here is a class called MLP, with init function and a forward function. When I pass X as a parameter to the MLP instance net, without using net.forward(X), it seems forward function has been autimatically called. Why this is the case?
import torch
from torch import nn
from torch.nn import functional as F
class MLP(nn.Module):
def __init__(self):
super().__init__() # nn.Module's params
self.hidden = nn.Linear(20, 256)
self.out = nn.Linear(256, 10)
def forward(self, X):
return self.out(F.relu(self.hidden(X)))
X = torch.rand(2, 20)
net = MLP()
net(X)
"""
output of net(X)
tensor([[ 0.0614, -0.0143, -0.0546, 0.1173, -0.1838, -0.1843, 0.0861, 0.1152,
0.0990, 0.1818],
[-0.0483, -0.0196, 0.0720, 0.1243, 0.0261, -0.2727, -0.0480, 0.1391,
-0.0685, 0.2025]], grad_fn=<AddmmBackward0>)
"""
My initial guess is that the forward is the only function is MLP receives a parameter, but after I added another function that takes the same parameters, calling net(X) seems still choose forward function
class MLP(nn.Module):
def __init__(self):
super().__init__() # nn.Module's params
self.hidden = nn.Linear(20, 256)
self.out = nn.Linear(256, 10)
def forward2(self, X):
print("hello")
return self.out((self.hidden(X)))
def forward(self, X):
return self.out(F.relu(self.hidden(X)))
net = MLP()
net(X)
net.forward(X)
net.forward2(X)
then I got
>>> net.forward(X)
tensor([[-0.1273, -0.0338, -0.1412, -0.1321, -0.1213, 0.0589, 0.0752, 0.0066,
-0.0057, -0.1374],
[-0.1660, -0.0044, -0.1765, -0.0451, -0.0386, 0.0824, 0.0486, -0.1293,
0.0511, -0.1285]], grad_fn=<AddmmBackward0>)
>>> net.forward2(X)
hello
tensor([[-0.2027, -0.2304, -0.3597, -0.3741, -0.5000, -0.2698, 0.2464, 0.1709,
-0.2262, -0.1462],
[-0.1168, -0.0417, -0.3584, -0.3133, -0.2366, -0.1521, 0.2428, 0.0043,
-0.1296, -0.2021]], grad_fn=<AddmmBackward0>)
>>> net(X)
tensor([[-0.1273, -0.0338, -0.1412, -0.1321, -0.1213, 0.0589, 0.0752, 0.0066,
-0.0057, -0.1374],
[-0.1660, -0.0044, -0.1765, -0.0451, -0.0386, 0.0824, 0.0486, -0.1293,
0.0511, -0.1285]], grad_fn=<AddmmBackward0>)
What did I miss? Really appreciate with any help!

How to define pytorch fullyconnect model more simple and convenient?

i am a beginner of pytorch, and i want to build a fully connect model using Pytorch;
the model is very simple like:
def forward(self, x):
x = self.relu(self.fc1(x))
x = self.relu(self.fc2(x))
return self.fc3(x)
but when i want to add some layers or adjust the hidden layers, i found i have to write lots of Redundant code like:
def forward(self, x):
x = self.relu(self.fc1(x))
x = self.relu(self.fc2(x))
x = self.relu(self.fc3(x))
x = self.relu(self.fc4(x))
x = self.relu(self.fc5(x))
...
return self.fcn(x)
besides, if i want to change some layer's feature nums, i have to change the layer adjacent
so i want to know a way which is more grace(maybe more pythonic and more easy to adjust hyper parameter).
i tried to write code like:
def __init__(self):
super().__init__()
self.hidden_num = [2881, 5500, 2048, 20] # i just want to change here! to try some new structure
self.fc = [nn.Linear(self.hidden_num[i], self.hidden_num[i + 1]).to(DEVICE) for i in range(len(self.hidden_num) - 1)]
self.relu = nn.ReLU()
def forward(self, x):
for i in range(len(self.fc)):
x = self.fc[i](x)
if i != (len(self.fc) - 1):
x = self.relu(x)
return x
but i found this way doesn't work, the model can't be built
so could any bro tell me, how to define a fullyconnect model like above??
(so i can adjust the model layers only by adjust the list named hidden_num )
If you want to keep the same approach then you can use nn.ModuleList to properly register all linear layers inside the module's __init__:
class Model(nn.Module):
def __init__(self, hidden_num=[2881, 5500, 2048, 20]):
super().__init__()
self.fc = nn.ModuleList([
nn.Linear(hidden_num[i], hidden_num[i+1])
for i in range(len(hidden_num) - 1)])
def forward(self, x):
for i, m in enumerate(self.fc.children()):
x = m(x)
print(i)
if i != len(self.fc) - 1:
x = torch.relu(x)
return x
However, you may want to handle the logic inside the __init__ function once. One alternative is to use nn.Sequential.
class Model(nn.Module):
def __init__(self, hidden_num=[2881, 5500, 2048, 20]):
super().__init__()
fc = []
for i in range(len(hidden_num) - 1):
fc.append(nn.Linear(hidden_num[i], hidden_num[i+1]))
if i != len(self.fc) - 1:
fc.append(nn.ReLU())
self.fc = nn.Sequential(fc)
def forward(self, x):
x = self.fc(x)
return x
Ideally, you would inherit from nn.Sequential directly to avoid re-writing the forward function which is unnecessary in this case:
class Model(nn.Sequential):
def __init__(self, hidden_num=[2881, 5500, 2048, 20]):
fc = []
for i in range(len(hidden_num) - 1):
fc.append(nn.Linear(hidden_num[i], hidden_num[i+1]))
if i != len(self.fc) - 1:
fc.append(nn.ReLU())
super().__init__(fc)

Fail to quantize custom layer - Quantization Aware Training

I'm following Quantization aware training comprehensive guide and struggling with QAT for custom layers, working with tf=2.6.0, py=3.9.7.
Below is a toy example of my problem:
I wrote a simple custom layer that implements Conv2D
class MyConv(tf.keras.layers.Layer):
'''costume conv2d'''
def __init__(self, filt=1, name=None, **kwargs):
super(MyConv, self).__init__(name=name)
self.filt = filt
super(MyConv, self).__init__(**kwargs)
def get_config(self):
config = super().get_config().copy()
config.update({"filt": self.filt})
return config
def build(self, shape):
self.conv = tf.keras.layers.Conv2D(self.filt, 1, padding="same")
def call(self, input):
return self.conv(input)
I've created a small model with that layer, then recursively pass over its layers and annotates them using tfmot.guantization.keras.quantize_annotate_layer (each custom layer could have more custom sub-layers that needs to be quantized). Then I apply tfmot.quantization.keras.quantize_apply to the annotated model. The result model consists of all the quantized layers, except of my custom layer, that had not been quantized.
I'll note that when I'm replacing the custom layer MyConv with the code below, as in the comprehensive guide, the quantization works.
def MyConv(tf.keras.layers.Conv2D):
pass
Please help me solve this issue. Might be some issue with my QuantizeConfig?
Below is my full code:
import tensorflow as tf
import tensorflow_model_optimization as tfmot
class MyConv(tf.keras.layers.Layer):
'''costume conv2d'''
def __init__(self, filt=1, name=None, **kwargs):
super(MyConv, self).__init__(name=name)
self.filt = filt
super(MyConv, self).__init__(**kwargs)
def get_config(self):
config = super().get_config().copy()
config.update({"filt": self.filt})
return config
def build(self, shape):
self.conv = tfmot.quantization.keras.quantize_annotate_layer(tf.keras.layers.Conv2D(self.filt, 1, padding="same"))
def call(self, input):
return self.conv(input)
def get_toy_model():
input = tf.keras.Input((10, 10, 1), name='input')
x = tf.keras.layers.Conv2D(1, 3, padding="same")(input)
x = tf.keras.layers.ReLU()(x)
x = MyConv()(x)
for _ in range(2):
y = tf.keras.layers.Conv2D(1, 3, padding="same")(x)
y = tf.keras.layers.ReLU()(y)
out = tf.keras.layers.Conv2D(1, 3, padding="same")(y)
return tf.keras.Model(input, out, name='toy_Conv2D')
LastValueQuantizer = tfmot.quantization.keras.quantizers.LastValueQuantizer
MovingAverageQuantizer = tfmot.quantization.keras.quantizers.MovingAverageQuantizer
class DefaultCostumeQuantizeConfig(tfmot.quantization.keras.QuantizeConfig):
# Configure how to quantize weights.
def get_weights_and_quantizers(self, layer):
return []
# Configure how to quantize activations.
def get_activations_and_quantizers(self, layer):
return []
def set_quantize_weights(self, layer, quantize_weights):
pass
def set_quantize_activations(self, layer, quantize_activations):
pass
# Configure how to quantize outputs (may be equivalent to activations).
def get_output_quantizers(self, layer):
return [tfmot.quantization.keras.quantizers.MovingAverageQuantizer(num_bits=8, per_axis=False, symmetric=False, narrow_range=False)]
def get_config(self):
return {}
def recursive_depth_layers(layer):
for l in list(layer.__dict__.values()):
if isinstance(l, tf.keras.layers.Layer):
recursive_depth_layers(l)
if isinstance(l, (
tf.keras.layers.Dense, tf.keras.layers.Conv2D, tf.keras.layers.ReLU, tf.keras.layers.LeakyReLU, tf.keras.layers.Activation)):
ql = tfmot.quantization.keras.quantize_annotate_layer(l, DefaultCostumeQuantizeConfig())
ql._name += "_" + l.name
return ql
def apply_quantization(layer):
# regular layer
if isinstance(layer, (tf.keras.layers.Dense, tf.keras.layers.Conv2D, tf.keras.layers.ReLU, tf.keras.layers.LeakyReLU,tf.keras.layers.Activation)):
l = tfmot.quantization.keras.quantize_annotate_layer(layer, DefaultCostumeQuantizeConfig())
l._name += '_' + layer.name
return l
if layer.__module__ == "__main__":
# custom layer
recursive_depth_layers(layer)
l = tfmot.quantization.keras.quantize_annotate_layer(layer, DefaultCostumeQuantizeConfig())
l._name += '_' + layer.name
return l
return layer
model = get_toy_model()
model.summary()
annotated_model = tf.keras.models.clone_model(model, clone_function=apply_quantization)
annotated_model.summary()
quantize_scope = tfmot.quantization.keras.quantize_scope
with quantize_scope({'DefaultCostumeQuantizeConfig': DefaultCostumeQuantizeConfig, 'MyConv': MyConv}):
quant_aware_model = tfmot.quantization.keras.quantize_apply(annotated_model)
quant_aware_model._name += "_quant"
quant_aware_model.summary()
quant_aware_model.compile()

Functions within Keras 'Recurrent' layer

I am trying to follow an implementation of an attention decoder
from keras.layers.recurrent import Recurrent
...
class AttentionDecoder(Recurrent):
...
#######################################################
# The functionality of init, build and call is clear
#######################################################
def __init__(self, units, output_dim,
activation='tanh',
...
def build(self, input_shape):
...
def call(self, x):
self.x_seq = x
...
return super(AttentionDecoder, self).call(x)
##################################################################
# What is the purpose of 'get_initial_state' and 'step' functions
# Do these functions override the Recurrent base class functions?
##################################################################
def get_initial_state(self, inputs):
# apply the matrix on the first time step to get the initial s0.
s0 = activations.tanh(K.dot(inputs[:, 0], self.W_s))
# from keras.layers.recurrent to initialize a vector of (batchsize,
# output_dim)
y0 = K.zeros_like(inputs) # (samples, timesteps, input_dims)
y0 = K.sum(y0, axis=(1, 2)) # (samples, )
y0 = K.expand_dims(y0) # (samples, 1)
y0 = K.tile(y0, [1, self.output_dim])
return [y0, s0]
def step(self, x, states):
ytm, stm = states
...
return yt, [yt, st]
The AttentionDecoder class is inherited from Recurrent, an abstract base class for recurrent layers ( Recurrent#keras.layers.recurrent, documented here ).
How do the get_initial_state and step function work withing the class (who calls them, when, etc.)? If these function are related to the base class Recurrent, where can I find the relevant documentation?

Embed trainable bijector into Keras model

I am trying to implement normalizing flows embedded in a Keras model. In all examples I can find, such as the documentation of MAF, the bijectors which constitute the normalizing flows are embedded into a TransformedDistribution and exposed directly for training etc.
I am trying to embed this TransformedDistribution in a keras Model to match the architecture of other models I have which are inheriting from keras Model.
Unfortunately all my attempts (see code) so far fail at transferring the trainable variables inside the transformed distribution to the keras Model.
I have tried to make the bijector inherit from tf.keras.layers.Layer, which did not change anything.
import tensorflow as tf
import tensorflow_probability as tfp
tfd = tfp.distributions
tfb = tfp.bijectors
class Flow(tfb.Bijector, tf.Module):
"""
tf.Module to register trainable_variables
"""
def __init__(self, d, init_sigma=0.1, **kwargs):
super(Flow, self).__init__(
dtype=tf.float32,
forward_min_event_ndims=0,
inverse_min_event_ndims=0,
**kwargs
)
# Shape of the flow goes from Rd to Rd
self.d = d
# Weights/Variables initializer
self.init_sigma = init_sigma
w_init = tf.random_normal_initializer(stddev=self.init_sigma)
# Variables
self.u = tf.Variable(
w_init(shape=[1, self.d], dtype=tf.float32),
dtype=tf.float32,
name='u',
trainable=True,
)
def _forward(self, x):
return x
def _inverse(self, y):
return y
class Flows(tf.keras.Model):
def __init__(self, d=2, shape=(100, 2), n_flows=10, ):
super(Flows, self).__init__()
# Parameters
self.d = d
self.shape = shape
self.n_flows = n_flows
# Base distribution - MF = Multivariate normal diag
base_distribution = tfd.MultivariateNormalDiag(
loc=tf.zeros(shape=shape, dtype=tf.float32)
)
# Flows as chain of bijector
flows = []
for n in range(n_flows):
flows.append(Flow(self.d, name=f"flow_{n + 1}"))
bijector = tfb.Chain(list(reversed(flows)))
self.flow = tfd.TransformedDistribution(
distribution=base_distribution,
bijector=bijector
)
def call(self, *inputs):
return self.flow.bijector.forward(*inputs)
def log_prob(self, *inputs):
return self.flow.log_prob(*inputs)
def sample(self, num):
return self.flow.sample(num)
q = Flows()
# Call to instantiate variables
q(tf.zeros(q.shape))
# Prints no trainable params
print(q.summary())
# Prints expected trainable params
print(q.flow.trainable_variables)
Any idea if this is even possible? Thanks!
I bumped into this issue as well. It seems to be caused by the incompatibility issues between TFP and TF 2.0 (a couple relevant issues https://github.com/tensorflow/probability/issues/355 and https://github.com/tensorflow/probability/issues/946).
As a workaround, you need to add the (trainable) variables of your transformed distribution / bijector as an attribute to your Keras Model:
class Flows(tf.keras.Model):
def __init__(self, d=2, shape=(100, 2), n_flows=10, ):
super(Flows, self).__init__()
# Parameters
self.d = d
self.shape = shape
self.n_flows = n_flows
# Base distribution - MF = Multivariate normal diag
base_distribution = tfd.MultivariateNormalDiag(
loc=tf.zeros(shape=shape, dtype=tf.float32)
)
# Flows as chain of bijector
flows = []
for n in range(n_flows):
flows.append(Flow(self.d, name=f"flow_{n + 1}"))
bijector = tfb.Chain(list(reversed(flows)))
self.flow = tfd.TransformedDistribution(
distribution=base_distribution,
bijector=bijector
)
# issue: https://github.com/tensorflow/probability/issues/355, https://github.com/tensorflow/probability/issues/946
# need to add bijector's trainable variables as an attribute (name does not matter)
# otherwise this layer has zero trainable variables
self._variables = self.flow.variables # https://github.com/tensorflow/probability/issues/355
def call(self, *inputs):
return self.flow.bijector.forward(*inputs)
def log_prob(self, *inputs):
return self.flow.log_prob(*inputs)
def sample(self, num):
return self.flow.sample(num)
After adding this your model should have trainable variables and weights to optimize.

Categories