2x nested Tensorflow custom layers results in zero trainable parameters - python

I am creating a series of custom Tensorflow (version 2.4.1) layers and am running into a problem where the model summary shows zero trainable parameters. Below is a series of examples showing how everything is fine until I add in the last custom layer.
Here are the imports and custom classes:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (BatchNormalization, Conv2D, Input, ReLU,
class basic_conv_stack(Layer):
def __init__(self, filters, kernel_size, strides):
super(basic_conv_stack, self).__init__()
self.conv1 = Conv2D(filters, kernel_size, strides, padding='same')
self.bn1 = BatchNormalization()
self.relu = ReLU()
def call(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
return x
class basic_residual(Layer):
def __init__(self, filters, kernel_size, strides):
super(basic_residual, self).__init__()
self.bcs1 = basic_conv_stack(filters, kernel_size, strides)
self.bcs2 = basic_conv_stack(filters, kernel_size, strides)
def call(self, x):
x = self.bcs1(x)
x = self.bcs2(x)
return x
class basic_module(Layer):
def __init__(self, filters, kernel_size, strides):
super(basic_module, self).__init__()
self.res = basic_residual
self.args = (filters, kernel_size, strides)
def call(self, x):
for _ in range(4):
x = self.res(*self.args)(x)
return x
Now, if I do the following, everything works out ok and I get 300 trainable parameters:
input_layer = Input((128, 128, 3))
conv = basic_conv_stack(10, 3, 1)(input_layer)
model = Model(input_layer, conv)
print (model.summary())
Similarly, if I do the following, I get 1,230 trainable parameters:
input_layer = Input((128, 128, 3))
conv = basic_residual(10, 3, 1)(input_layer)
model = Model(input_layer, conv)
print (model.summary())
However, if I try the basic_module class, I get zero trainable parameters:
input_layer = Input((128, 128, 3))
conv = basic_module(10, 3, 1)(input_layer)
model = Model(input_layer, conv)
print (model.summary())
Does anyone know why this is happening?
Edit to add:
I discovered that the layers used in the call must be initialized in the class's init for things to work properly. So if I change the basic module to this:
class basic_module(Layer):
def __init__(self, filters, kernel_size, strides):
super(basic_module, self).__init__()
self.clayers = [basic_residual(filters, kernel_size, strides) for _ in range(4)]
def call(self, x):
for idx in range(4):
x = self.clayers[idx](x)
return x
Everything works fine. I don't know why this is the case, so I'll leave this question open in case someone can answer the why of this question.

You have to initialize the class instances with the required parameter such as filters, kernel_size, strides to the predefined base_mdoule. Also, note that these hyper-parameters are related to trainable weights properties.
# >>> a = basic_module
# >>> a __main__.basic_module
# >>> a = basic_module(10, 3, 1)
# >>> a
# >>> <__main__.basic_module at 0x7f6123eed510>
class basic_module(Layer):
def __init__(self, filters, kernel_size, strides):
super(basic_module, self).__init__()
self.res = basic_residual # < ---
self.args = (filters, kernel_size, strides)
def call(self, x):
for _ in range(4):
x = self.res(*self.args)(x)
return x


Get input shape with Keras custom layer

I am writing a custom layer using Keras that returns a tensors of zeros the first three times it is invoked and does nothing the other times. The code is the following
class MyLayer(tf.keras.layers.Layer):
def __init__(self, **kwargs):
super(MyLayer, self).__init__(**kwargs)
self.__iteration = 0
self.__returning_zeros = None
def build(self, input_shape):
self.__returning_zeros = tf.zeros(shape=input_shape, dtype=tf.float32)
def call(self, inputs):
self.__iteration += 1
if self.__iteration <= 3:
return self.__returning_zeros
return inputs
Unfortunately if I try to build a model using this layer like this
def build_model(input_shape, num_classes):
input_layer = keras.Input(shape=input_shape, name='input')
conv1 = layers.Conv2D(32, kernel_size=(3, 3), activation="relu", name='conv1')(input_layer)
maxpool1 = layers.MaxPooling2D(pool_size=(2, 2), name='maxpool1')(conv1)
conv2 = layers.Conv2D(64, kernel_size=(3, 3), activation="relu", name='conv2')(maxpool1)
mylayer = MyLayer()(conv2)
maxpool2 = layers.MaxPooling2D(pool_size=(2, 2), name='maxpool2')(mylayer)
flatten = layers.Flatten(name='flatten')(maxpool2)
dropout = layers.Dropout(0.5, name='dropout')(flatten)
dense = layers.Dense(num_classes, activation="softmax", name='dense')(dropout)
return keras.Model(inputs=(input_layer,), outputs=dense)
I get the following error message
File "customlayerkeras.py", line 25, in build
self.__returning_zeros = tf.zeros(shape=input_shape, dtype=tf.float32)
ValueError: Cannot convert a partially known TensorShape (None, 13, 13, 64) to a Tensor.
Where it seems that, despite using the build function as suggested in the documentation I am not able to retrieve the correct shape of the input.
How can I fix this problem?
I was complicating the problem without thinking, the best solution is to just multiply the inputs per zero like this
def call(self, inputs):
self.__iteration += 1
if self.__iteration <= 3:
return inputs*0
return inputs
Pretty sure you don't need the dimension of the batch, so you can do something like this:
class MyLayer(tf.keras.layers.Layer):
def __init__(self, **kwargs):
super(MyLayer, self).__init__(**kwargs)
self.__iteration = 0
self.__returning_zeros = None
def build(self, input_shape):
self.__returning_zeros = tf.zeros(shape=input_shape[1:], dtype=tf.float32)
def call(self, inputs):
self.__iteration += 1
if self.__iteration <= 3:
return inputs * self.__returning_zeros
# or like return tf.repeat(self.__returning_zeros[None,...], tf.shape(inputs)[0], axis=0)
return inputs

How can I add reshape layer in nn.Sequential?

So I'm implementing Generator of a GAN and I need the architecture as shown as below:
The problem is when I try to reshape the output of Linear layer after BatchNorm and ReLU (in fig. Dense as they have used Tensorflow) it is throwing error as :TypeError: reshape(): argument 'input' (position 1) must be Tensor, not int
I understand the error but I can't find its solution.
Is there any other way to reshape within nn.Sequential instead of calling torch explicitly?
class Generator(nn.Module):
def __init__(self, z_dim=100, im_chan=1, hidden_dim=64, rdim=9216):
super(Generator, self).__init__()
self.z_dim = z_dim
self.gen = nn.Sequential(
nn.Linear(z_dim, rdim),
----> torch.reshape(rdim, (6,6,256)),
self.make_gen_block(rdim, hidden_dim*2),
def make_gen_block(self, input_channels, output_channels, kernel_size=1, stride=2, final_layer=False):
if not final_layer:
return nn.Sequential(
nn.ConvTranspose2d(input_channels, output_channels, kernel_size, stride),
return nn.Sequential(
nn.ConvTranspose2d(input_channels, output_channels, kernel_size, stride),
def unsqueeze_noise(self, noise):
return noise.view(len(noise), self.zdim, 1, 1)
def forward(self, noise):
x = self.unsqueeze_noise(noise)
return self.gen(x)
def get_noise(n_samples, z_dim, device='cpu'):
return torch.randn(n_samples, z_dim, device=device)
#Testing the Gen arch
gen = Generator()
num_test = 100
#test the hidden block
test_hidden_noise = get_noise(num_test, gen.z_dim)
test_hidden_block = gen.make_gen_block(6, 6, kernel_size=1,stride=2)
test_uns_noise = gen.unsqueeze_noise(test_hidden_noise)
hidden_output = test_hidden_block(test_uns_noise)
In nn.Sequential, torch.nn.Unflatten() can help you achieve reshape operation.
For nn.Linear, its input shape is (N, *, H_{in}) and output shape is (H, *, H_{out}). Note that the feature dimension is last. So unsqueeze_noise() is not useful here.
Based on the network structure, the arguments passed to make_gen_block are wrong.
I have checked the following code:
import torch
from torch import nn
class Generator(nn.Module):
def __init__(self, z_dim=100, im_chan=1, hidden_dim=64, rdim=9216):
super(Generator, self).__init__()
self.z_dim = z_dim
self.gen = nn.Sequential(
nn.Linear(z_dim, rdim),
nn.BatchNorm1d(rdim,momentum=0.9), # use BN1d
nn.Unflatten(1, (256,6,6)),
self.make_gen_block(256, hidden_dim*2,kernel_size=2), # note arguments
self.make_gen_block(hidden_dim*2,hidden_dim,kernel_size=2), # note kernel_size
self.make_gen_block(hidden_dim,im_chan,kernel_size=2,final_layer=True), # note kernel_size
def make_gen_block(self, input_channels, output_channels, kernel_size=1, stride=2, final_layer=False):
if not final_layer:
return nn.Sequential(
nn.ConvTranspose2d(input_channels, output_channels, kernel_size, stride),
return nn.Sequential(
nn.ConvTranspose2d(input_channels, output_channels, kernel_size, stride),
def forward(self, x):
return self.gen(x)
def get_noise(n_samples, z_dim, device='cpu'):
return torch.randn(n_samples, z_dim, device=device)
gen = Generator()
num_test = 100
input_noise = get_noise(num_test, gen.z_dim)
output = gen(input_noise)
assert output.shape == (num_test, 1, 48, 48)

Changing the output of a convolutional layer to a tuple of tensors

For processing video frames, I use the squeeze and excitation block for weighting the channels of a convolutional layer.
I want to combine (using torch.stack) the channels(feature maps) of a convolutional layer with the weighted channels (by using the mentioned squeeze and excitation block). But I faced with an error that when using the torch.stack(x, weighted_channels) the argument that is related with the convolutional layer's channelsx, the error says that the TypeError: stack(): argument 'tensors' (position 1) must be tuple of Tensors, not Tensor.
class conv(nn.Module):
def __init__(self, in_channel, out_channel, out_sigmoid=False):
super(conv, self).__init__()
self.deconv = self._deconv(in_channel=512, out_channel=256, num_conv=3)
self.upsample = Upsample(scale_factor=2, mode='bilinear')
self.SEBlock = SE_Block(c=256)
def _deconv(self, in_channel, out_channel, num_conv=2, kernel_size=3, stride=1, padding=1):
layers.append(BasicConv2d(in_channel, out_channel,kernel_size=kernel_size, stride=stride, padding=padding))
for i in range(1, num_conv):
layers.append(_SepConv2d(out_channel, out_channel,kernel_size=kernel_size, stride=stride, padding=padding))
return nn.Sequential(*layers)
def forward(self, x):
x = self.upsample(x)
stack = torch.stack(x, self.SEBlock(x,c=256))
return x
class BasicConv2d(nn.Module):
def __init__(self, in_planes, out_planes, kernel_size, stride, padding=0):
super(BasicConv2d, self).__init__()
self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=padding, bias=False)
self.bn = nn.BatchNorm2d(out_planes, eps=1e-3, momentum=0.001, affine=True)
self.relu = nn.ReLU()
def forward(self, x):
x = self.conv(x)
x = self.bn(x)
x = self.relu(x)
return x
class _SepConv2d(nn.Module):
def __init__(self, in_planes, out_planes, kernel_size, stride, padding=0):
super(_SepConv2d, self).__init__()
self.conv_s = nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=padding, bias=False, groups=in_planes)
self.bn_s = nn.BatchNorm2d(out_planes)
self.relu_s = nn.ReLU()
self.conv_t = nn.Conv2d(out_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
self.bn_t = nn.BatchNorm2d(out_planes)
self.relu_t = nn.ReLU()
def forward(self, x):
x = self.conv_s(x)
x = self.bn_s(x)
x = self.relu_s(x)
x = self.conv_t(x)
x = self.bn_t(x)
x = self.relu_t(x)
return x
class SE_Block(nn.Module):
"credits: https://github.com/moskomule/senet.pytorch/blob/master/senet/se_module.py#L4"
def __init__(self, c, r=16):
self.squeeze = nn.AdaptiveAvgPool2d(1)
self.excitation = nn.Sequential(
nn.Linear(c, c // r, bias=False),
nn.Linear(c // r, c, bias=False),
def forward(self, x):
bs, c, _, _ = x.shape
y = self.squeeze(x).view(bs, c)
y = self.excitation(y).view(bs, c, 1, 1)
return x * y.expand_as(x)
I checked two arguments of torch.stack but the both are of the same size.
See https://pytorch.org/docs/stable/generated/torch.stack.html.
torch.stack(tensors, dim=0, *, out=None) → Tensor
tensors (sequence of Tensors) – sequence of tensors to concatenate
A sequences of tensors can be a tuple like (tensor1, tensor2, tensor3) or a list [tensor1, tensor2, tensor3]. What you did is input x which is a tensor instead of a sequence of tensors and weighted_channels as the dim parameter into the function.
So as noted in the comments either
torch.stack((x, weighted_channels)) or torch.stack([x, weighted_channels]) should work.
Keep in mind that this is the same for all functions which take an arbitrary number of tensors and does something with them, e.g. torch.cat and all other stack functions like vstack, hstack-

How can I make a trainable parameter in keras?

thanks for looking my question.
For example.
The final output is the sum of two matrix A and B,like this:
output = keras.layers.add([A, B])
Now,I want to build a new parameter x to change the output.
I want to make newoutput = Ax+B(1-x)
and x is a trainable parameter in my network.
what should I do?
please help me ~ thanks very much!
edit(part of code ):
conv1 = Conv2D(512, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(input)
drop1 = Dropout(0.5)(conv1)
pool1 = MaxPooling2D(pool_size=(2, 2))(drop1)
conv2 = Conv2D(1024, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(pool1)
conv2 = Conv2D(1024, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv2)
drop2 = Dropout(0.5)(conv2)
up1 = Conv2D(512, 2, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(UpSampling2D(size = (2,2))(drop2))
#the line I want to change:
merge = add([drop2,up1])
#this layer is simply add drop2 and up1 layer.now I want to add a trainable parameter x to adjust the weight of thoese two layers.
I tried to use the codes,but still occured some questions:
1.how can I use my own layer?
merge = Mylayer()(drop2,up1)
or otherway?
2.what is the meaning of out_dim?
those parameters are all 3-dim matrix.what is the mening of out_dim?
thank you...T.T
from keras import backend as K
from keras.engine.topology import Layer
import numpy as np
from keras.layers import add
class MyLayer(Layer):
def __init__(self, **kwargs):
super(MyLayer, self).__init__(**kwargs)
def build(self, input_shape):
self._x = K.variable(0.5)
self.trainable_weights = [self._x]
super(MyLayer, self).build(input_shape) # Be sure to call this at the end
def call(self, x):
A, B = x
result = add([self._x*A ,(1-self._x)*B])
return result
def compute_output_shape(self, input_shape):
return input_shape[0]
You have to create a custom class which inherits from Layer and create the trainable parameter using self.add_weight(...). You can find an example of this here and there.
For your example, the layer would somehow look like this:
from keras import backend as K
from keras.engine.topology import Layer
import numpy as np
class MyLayer(Layer):
def __init__(self, output_dim, **kwargs):
self.output_dim = output_dim
super(MyLayer, self).__init__(**kwargs)
def build(self, input_shape):
# Create a trainable weight variable for this layer.
self._A = self.add_weight(name='A',
shape=(input_shape[1], self.output_dim),
self._B = self.add_weight(name='B',
shape=(input_shape[1], self.output_dim),
super(MyLayer, self).build(input_shape) # Be sure to call this at the end
def call(self, x):
return K.dot(x, self._A) + K.dot(1-x, self._B)
def compute_output_shape(self, input_shape):
return (input_shape[0], self.output_dim)
Edit: Just based on the names I (wrongly) assumed that x is the layers input and you want to optimize A and B. But, as you stated, you want to optimize x. For this, you can do something like this:
from keras import backend as K
from keras.engine.topology import Layer
import numpy as np
class MyLayer(Layer):
def __init__(self, **kwargs):
super(MyLayer, self).__init__(**kwargs)
def build(self, input_shape):
# Create a trainable weight variable for this layer.
self._x = self.add_weight(name='x',
super(MyLayer, self).build(input_shape) # Be sure to call this at the end
def call(self, x):
A, B = x
return K.dot(self._x, A) + K.dot(1-self._x, B)
def compute_output_shape(self, input_shape):
return input_shape[0]
Edit2: You can call this layer using
merge = Mylayer()([drop2,up1])

How to do fully connected batch norm in PyTorch?

torch.nn has classes BatchNorm1d, BatchNorm2d, BatchNorm3d, but it doesn't have a fully connected BatchNorm class? What is the standard way of doing normal Batch Norm in PyTorch?
Ok. I figured it out. BatchNorm1d can also handle Rank-2 tensors, thus it is possible to use BatchNorm1d for the normal fully-connected case.
So for example:
import torch.nn as nn
class Policy(nn.Module):
def __init__(self, num_inputs, action_space, hidden_size1=256, hidden_size2=128):
super(Policy, self).__init__()
self.action_space = action_space
num_outputs = action_space
self.linear1 = nn.Linear(num_inputs, hidden_size1)
self.linear2 = nn.Linear(hidden_size1, hidden_size2)
self.linear3 = nn.Linear(hidden_size2, num_outputs)
self.bn1 = nn.BatchNorm1d(hidden_size1)
self.bn2 = nn.BatchNorm1d(hidden_size2)
def forward(self, inputs):
x = inputs
x = self.bn1(F.relu(self.linear1(x)))
x = self.bn2(F.relu(self.linear2(x)))
out = self.linear3(x)
return out
The BatchNorm1d normally comes before the ReLU, and the bias is redundant, so
import torch.nn as nn
class Policy(nn.Module):
def __init__(self, num_inputs, action_space, hidden_size1=256, hidden_size2=128):
super(Policy2, self).__init__()
self.action_space = action_space
num_outputs = action_space
self.linear1 = nn.Linear(num_inputs, hidden_size1, bias=False)
self.linear2 = nn.Linear(hidden_size1, hidden_size2, bias=False)
self.linear3 = nn.Linear(hidden_size2, num_outputs)
self.bn1 = nn.BatchNorm1d(hidden_size1)
self.bn2 = nn.BatchNorm1d(hidden_size2)
def forward(self, inputs):
x = inputs
x = F.relu(self.bn1(self.linear1(x)))
x = F.relu(self.bn2(self.linear2(x)))
out = self.linear3(x)
return out
