How can I add reshape layer in nn.Sequential? - python

So I'm implementing Generator of a GAN and I need the architecture as shown as below:
The problem is when I try to reshape the output of Linear layer after BatchNorm and ReLU (in fig. Dense as they have used Tensorflow) it is throwing error as :TypeError: reshape(): argument 'input' (position 1) must be Tensor, not int
I understand the error but I can't find its solution.
Is there any other way to reshape within nn.Sequential instead of calling torch explicitly?
class Generator(nn.Module):
def __init__(self, z_dim=100, im_chan=1, hidden_dim=64, rdim=9216):
super(Generator, self).__init__()
self.z_dim = z_dim
self.gen = nn.Sequential(
nn.Linear(z_dim, rdim),
nn.BatchNorm2d(rdim,momentum=0.9),
nn.ReLU(inplace=True),
----> torch.reshape(rdim, (6,6,256)),
self.make_gen_block(rdim, hidden_dim*2),
self.make_gen_block(hidden_dim*2,hidden_dim),
self.make_gen_block(hidden_dim,im_chan,final_layer=True),
)
def make_gen_block(self, input_channels, output_channels, kernel_size=1, stride=2, final_layer=False):
if not final_layer:
return nn.Sequential(
nn.ConvTranspose2d(input_channels, output_channels, kernel_size, stride),
nn.BatchNorm2d(output_channels),
nn.ReLU(inplace=True)
)
else:
return nn.Sequential(
nn.ConvTranspose2d(input_channels, output_channels, kernel_size, stride),
nn.Tanh()
)
def unsqueeze_noise(self, noise):
return noise.view(len(noise), self.zdim, 1, 1)
def forward(self, noise):
x = self.unsqueeze_noise(noise)
return self.gen(x)
def get_noise(n_samples, z_dim, device='cpu'):
return torch.randn(n_samples, z_dim, device=device)
#Testing the Gen arch
gen = Generator()
num_test = 100
#test the hidden block
test_hidden_noise = get_noise(num_test, gen.z_dim)
test_hidden_block = gen.make_gen_block(6, 6, kernel_size=1,stride=2)
test_uns_noise = gen.unsqueeze_noise(test_hidden_noise)
hidden_output = test_hidden_block(test_uns_noise)

In nn.Sequential, torch.nn.Unflatten() can help you achieve reshape operation.
For nn.Linear, its input shape is (N, *, H_{in}) and output shape is (H, *, H_{out}). Note that the feature dimension is last. So unsqueeze_noise() is not useful here.
Based on the network structure, the arguments passed to make_gen_block are wrong.
I have checked the following code:
import torch
from torch import nn
class Generator(nn.Module):
def __init__(self, z_dim=100, im_chan=1, hidden_dim=64, rdim=9216):
super(Generator, self).__init__()
self.z_dim = z_dim
self.gen = nn.Sequential(
nn.Linear(z_dim, rdim),
nn.BatchNorm1d(rdim,momentum=0.9), # use BN1d
nn.ReLU(inplace=True),
nn.Unflatten(1, (256,6,6)),
self.make_gen_block(256, hidden_dim*2,kernel_size=2), # note arguments
self.make_gen_block(hidden_dim*2,hidden_dim,kernel_size=2), # note kernel_size
self.make_gen_block(hidden_dim,im_chan,kernel_size=2,final_layer=True), # note kernel_size
)
def make_gen_block(self, input_channels, output_channels, kernel_size=1, stride=2, final_layer=False):
if not final_layer:
return nn.Sequential(
nn.ConvTranspose2d(input_channels, output_channels, kernel_size, stride),
nn.BatchNorm2d(output_channels),
nn.ReLU(inplace=True)
)
else:
return nn.Sequential(
nn.ConvTranspose2d(input_channels, output_channels, kernel_size, stride),
nn.Tanh()
)
def forward(self, x):
return self.gen(x)
def get_noise(n_samples, z_dim, device='cpu'):
return torch.randn(n_samples, z_dim, device=device)
gen = Generator()
num_test = 100
input_noise = get_noise(num_test, gen.z_dim)
output = gen(input_noise)
assert output.shape == (num_test, 1, 48, 48)

Related

Changing the output of a convolutional layer to a tuple of tensors

For processing video frames, I use the squeeze and excitation block for weighting the channels of a convolutional layer.
I want to combine (using torch.stack) the channels(feature maps) of a convolutional layer with the weighted channels (by using the mentioned squeeze and excitation block). But I faced with an error that when using the torch.stack(x, weighted_channels) the argument that is related with the convolutional layer's channelsx, the error says that the TypeError: stack(): argument 'tensors' (position 1) must be tuple of Tensors, not Tensor.
class conv(nn.Module):
def __init__(self, in_channel, out_channel, out_sigmoid=False):
super(conv, self).__init__()
self.deconv = self._deconv(in_channel=512, out_channel=256, num_conv=3)
self.upsample = Upsample(scale_factor=2, mode='bilinear')
self.SEBlock = SE_Block(c=256)
def _deconv(self, in_channel, out_channel, num_conv=2, kernel_size=3, stride=1, padding=1):
layers=[]
layers.append(BasicConv2d(in_channel, out_channel,kernel_size=kernel_size, stride=stride, padding=padding))
for i in range(1, num_conv):
layers.append(_SepConv2d(out_channel, out_channel,kernel_size=kernel_size, stride=stride, padding=padding))
return nn.Sequential(*layers)
def forward(self, x):
x=self.deconv(x)
x = self.upsample(x)
stack = torch.stack(x, self.SEBlock(x,c=256))
return x
class BasicConv2d(nn.Module):
def __init__(self, in_planes, out_planes, kernel_size, stride, padding=0):
super(BasicConv2d, self).__init__()
self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=padding, bias=False)
self.bn = nn.BatchNorm2d(out_planes, eps=1e-3, momentum=0.001, affine=True)
self.relu = nn.ReLU()
def forward(self, x):
x = self.conv(x)
x = self.bn(x)
x = self.relu(x)
return x
class _SepConv2d(nn.Module):
def __init__(self, in_planes, out_planes, kernel_size, stride, padding=0):
super(_SepConv2d, self).__init__()
self.conv_s = nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=padding, bias=False, groups=in_planes)
self.bn_s = nn.BatchNorm2d(out_planes)
self.relu_s = nn.ReLU()
self.conv_t = nn.Conv2d(out_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
self.bn_t = nn.BatchNorm2d(out_planes)
self.relu_t = nn.ReLU()
def forward(self, x):
x = self.conv_s(x)
x = self.bn_s(x)
x = self.relu_s(x)
x = self.conv_t(x)
x = self.bn_t(x)
x = self.relu_t(x)
return x
class SE_Block(nn.Module):
"credits: https://github.com/moskomule/senet.pytorch/blob/master/senet/se_module.py#L4"
def __init__(self, c, r=16):
super().__init__()
self.squeeze = nn.AdaptiveAvgPool2d(1)
self.excitation = nn.Sequential(
nn.Linear(c, c // r, bias=False),
nn.ReLU(inplace=True),
nn.Linear(c // r, c, bias=False),
nn.Sigmoid()
)
def forward(self, x):
bs, c, _, _ = x.shape
y = self.squeeze(x).view(bs, c)
y = self.excitation(y).view(bs, c, 1, 1)
return x * y.expand_as(x)
I checked two arguments of torch.stack but the both are of the same size.
See https://pytorch.org/docs/stable/generated/torch.stack.html.
torch.stack(tensors, dim=0, *, out=None) → Tensor
tensors (sequence of Tensors) – sequence of tensors to concatenate
A sequences of tensors can be a tuple like (tensor1, tensor2, tensor3) or a list [tensor1, tensor2, tensor3]. What you did is input x which is a tensor instead of a sequence of tensors and weighted_channels as the dim parameter into the function.
So as noted in the comments either
torch.stack((x, weighted_channels)) or torch.stack([x, weighted_channels]) should work.
Keep in mind that this is the same for all functions which take an arbitrary number of tensors and does something with them, e.g. torch.cat and all other stack functions like vstack, hstack-

2x nested Tensorflow custom layers results in zero trainable parameters

I am creating a series of custom Tensorflow (version 2.4.1) layers and am running into a problem where the model summary shows zero trainable parameters. Below is a series of examples showing how everything is fine until I add in the last custom layer.
Here are the imports and custom classes:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (BatchNormalization, Conv2D, Input, ReLU,
Layer)
class basic_conv_stack(Layer):
def __init__(self, filters, kernel_size, strides):
super(basic_conv_stack, self).__init__()
self.conv1 = Conv2D(filters, kernel_size, strides, padding='same')
self.bn1 = BatchNormalization()
self.relu = ReLU()
def call(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
return x
class basic_residual(Layer):
def __init__(self, filters, kernel_size, strides):
super(basic_residual, self).__init__()
self.bcs1 = basic_conv_stack(filters, kernel_size, strides)
self.bcs2 = basic_conv_stack(filters, kernel_size, strides)
def call(self, x):
x = self.bcs1(x)
x = self.bcs2(x)
return x
class basic_module(Layer):
def __init__(self, filters, kernel_size, strides):
super(basic_module, self).__init__()
self.res = basic_residual
self.args = (filters, kernel_size, strides)
def call(self, x):
for _ in range(4):
x = self.res(*self.args)(x)
return x
Now, if I do the following, everything works out ok and I get 300 trainable parameters:
input_layer = Input((128, 128, 3))
conv = basic_conv_stack(10, 3, 1)(input_layer)
model = Model(input_layer, conv)
print (model.summary())
Similarly, if I do the following, I get 1,230 trainable parameters:
input_layer = Input((128, 128, 3))
conv = basic_residual(10, 3, 1)(input_layer)
model = Model(input_layer, conv)
print (model.summary())
However, if I try the basic_module class, I get zero trainable parameters:
input_layer = Input((128, 128, 3))
conv = basic_module(10, 3, 1)(input_layer)
model = Model(input_layer, conv)
print (model.summary())
Does anyone know why this is happening?
Edit to add:
I discovered that the layers used in the call must be initialized in the class's init for things to work properly. So if I change the basic module to this:
class basic_module(Layer):
def __init__(self, filters, kernel_size, strides):
super(basic_module, self).__init__()
self.clayers = [basic_residual(filters, kernel_size, strides) for _ in range(4)]
def call(self, x):
for idx in range(4):
x = self.clayers[idx](x)
return x
Everything works fine. I don't know why this is the case, so I'll leave this question open in case someone can answer the why of this question.
You have to initialize the class instances with the required parameter such as filters, kernel_size, strides to the predefined base_mdoule. Also, note that these hyper-parameters are related to trainable weights properties.
# >>> a = basic_module
# >>> a __main__.basic_module
# >>> a = basic_module(10, 3, 1)
# >>> a
# >>> <__main__.basic_module at 0x7f6123eed510>
class basic_module(Layer):
def __init__(self, filters, kernel_size, strides):
super(basic_module, self).__init__()
self.res = basic_residual # < ---
self.args = (filters, kernel_size, strides)
def call(self, x):
for _ in range(4):
x = self.res(*self.args)(x)
return x

Object has no attribute even though function is defined [closed]

Closed. This question needs debugging details. It is not currently accepting answers.
Edit the question to include desired behavior, a specific problem or error, and the shortest code necessary to reproduce the problem. This will help others answer the question.
Closed 1 year ago.
Improve this question
The error message that I get is:
'Network' object has no attribute '_block'
This is part of the code that I use:
import torch
import torch.nn as nn
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
class Network(nn.Module):
def __init__(self, channels_img, features_d, num_classes, img_size):
super(Network, self).__init__()
self.critic = nn.Sequential(
self._block(
in_channels = features_d,
out_channels = features_d * 2,
kernel_size = (4, 4),
)
)
def _block(self, in_channels, out_channels, kernel_size, stride, padding):
return nn.Conv2d(
in_channels = in_channels,
out_channels = out_channels,
kernel_size = kernel_size,
)
def forward(self, x, labels):
return self.critic(x)
channels_img = 1
features_d = 16
model = Network(
channels_img = channels_img,
features_d = features_d,
num_classes = 10,
img_size = 28
).to(device)
(Please note that this is only a minimal working example, and that there is much more code. However, this code already leads to the described error message..)
Looks like you are missing self parameters in the function.
Try like this:
def _block(self, in_channels, out_channels, kernel_size):
[...]
Explanation:
The self parameter is a reference to the current instance of the
class, and is used to access variables that belongs to the class.
It does not have to be named self , you can call it whatever you like,
but it has to be the first parameter of any function in the class.
Edit:
As MisterMiyagi pointed out, although the missing self is a problem in itself, it would cause a different error, something like:
TypeError: _foo() takes 3 positional arguments but 4 were given
Edit #2
Since the original question was posted, the question was update.
With this code there is an indentation issue.
The _block method is inside the __init__ method (look at the indentation):
class Network(nn.Module):
def __init__(self, channels_img, features_d, num_classes, img_size):
super(Network, self).__init__()
self.critic = nn.Sequential(
self._block(
in_channels = features_d,
out_channels = features_d * 2,
kernel_size = (4, 4),
)
)
def _block(self, in_channels, out_channels, kernel_size, stride, padding):
...
With fixed indentation the code works:
import torch
import torch.nn as nn
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
class Network(nn.Module):
def __init__(self, channels_img, features_d, num_classes, img_size):
super(Network, self).__init__()
self.critic = nn.Sequential(
self._block(
in_channels = features_d,
out_channels = features_d * 2,
kernel_size = (4, 4),
stride=None,
padding=None,
)
)
def _block(self, in_channels, out_channels, kernel_size, stride, padding):
return nn.Conv2d(
in_channels = in_channels,
out_channels = out_channels,
kernel_size = kernel_size,
)
def forward(self, x, labels):
return self.critic(x)
channels_img = 1
features_d = 16
model = Network(
channels_img = channels_img,
features_d = features_d,
num_classes = 10,
img_size = 28
).to(device)
print('Code ran!')
Output:
$ python3 test_so.py
Code ran!
There is another problem with the code your updated:
_block is expecting two adicional arguments that you did not add in the method call in __init__, which gives this error: TypeError: _block() missing 2 required positional arguments: 'stride' and 'padding'. I added those arguments to make the example work;
I added a print statement in the end to show that the code ran;
In summary, be careful with your indentation.
I see that you're not passing the "self" parameter to the function, hence it doesn't recognize it as a method.
You should do def _block(self, ...args):

Understanding the code in pyTorch

I am having problems with understanding the following part of the code from ResNet architecture. The full code is available at https://github.com/yunjey/pytorch-tutorial/blob/master/tutorials/02-intermediate/deep_residual_network/main-gpu.py . I am not very familiar with Python.
# Residual Block
class ResidualBlock(nn.Module):
def __init__(self, in_channels, out_channels, stride=1, downsample=None):
super(ResidualBlock, self).__init__()
self.conv1 = conv3x3(in_channels, out_channels, stride)
self.bn1 = nn.BatchNorm2d(out_channels)
self.relu = nn.ReLU(inplace=True)
self.conv2 = conv3x3(out_channels, out_channels)
self.bn2 = nn.BatchNorm2d(out_channels)
self.downsample = downsample
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
# ResNet Module
class ResNet(nn.Module):
def __init__(self, block, layers, num_classes=10):
super(ResNet, self).__init__()
self.in_channels = 16
self.conv = conv3x3(3, 16)
self.bn = nn.BatchNorm2d(16)
self.relu = nn.ReLU(inplace=True)
self.layer1 = self.make_layer(block, 16, layers[0])
self.layer2 = self.make_layer(block, 32, layers[0], 2)
self.layer3 = self.make_layer(block, 64, layers[1], 2)
self.avg_pool = nn.AvgPool2d(8)
self.fc = nn.Linear(64, num_classes)
def make_layer(self, block, out_channels, blocks, stride=1):
downsample = None
if (stride != 1) or (self.in_channels != out_channels):
downsample = nn.Sequential(
conv3x3(self.in_channels, out_channels, stride=stride),
nn.BatchNorm2d(out_channels))
layers = []
layers.append(block(self.in_channels, out_channels, stride, downsample))
self.in_channels = out_channels
for i in range(1, blocks):
layers.append(block(out_channels, out_channels))
return nn.Sequential(*layers)
def forward(self, x):
out = self.conv(x)
out = self.bn(out)
out = self.relu(out)
out = self.layer1(out)
out = self.layer2(out)
out = self.layer3(out)
out = self.avg_pool(out)
out = out.view(out.size(0), -1)
out = self.fc(out)
return out
resnet = ResNet(ResidualBlock, [3, 3, 3])
My main question is why should we pass 'block' every time? In the function
def make_layer(self, block, out_channels, blocks, stride=1):
instead of passing 'block' why cant we create an instance of 'ResidualBlock' and append it with layers as follows?
block = ResidualBlock(self.in_channels, out_channels, stride, downsample)
layers.append(block)
The ResNet module is designed to be generic, so that it can create networks with arbitrary blocks. So, if you do not pass the block which you want to create you'll have to write the name of the block explicitly like below.
# Residual Block
class ResidualBlock(nn.Module):
def __init__(self, in_channels, out_channels, stride=1, downsample=None):
super(ResidualBlock, self).__init__()
self.conv1 = conv3x3(in_channels, out_channels, stride)
self.bn1 = nn.BatchNorm2d(out_channels)
self.relu = nn.ReLU(inplace=True)
self.conv2 = conv3x3(out_channels, out_channels)
self.bn2 = nn.BatchNorm2d(out_channels)
self.downsample = downsample
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
# ResNet Module
class ResNet(nn.Module):
def __init__(self, layers, num_classes=10):
super(ResNet, self).__init__()
self.in_channels = 16
self.conv = conv3x3(3, 16)
self.bn = nn.BatchNorm2d(16)
self.relu = nn.ReLU(inplace=True)
self.layer1 = self.make_layer(16, layers[0])
self.layer2 = self.make_layer(32, layers[0], 2)
self.layer3 = self.make_layer(64, layers[1], 2)
self.avg_pool = nn.AvgPool2d(8)
self.fc = nn.Linear(64, num_classes)
def make_layer(self, out_channels, blocks, stride=1):
downsample = None
if (stride != 1) or (self.in_channels != out_channels):
downsample = nn.Sequential(
conv3x3(self.in_channels, out_channels, stride=stride),
nn.BatchNorm2d(out_channels))
layers = []
layers.append(ResidualBlock(self.in_channels, out_channels, stride, downsample)) # Major change here
self.in_channels = out_channels
for i in range(1, blocks):
layers.append(ResidualBlock(out_channels, out_channels)) # Major change here
return nn.Sequential(*layers)
def forward(self, x):
out = self.conv(x)
out = self.bn(out)
out = self.relu(out)
out = self.layer1(out)
out = self.layer2(out)
out = self.layer3(out)
out = self.avg_pool(out)
out = out.view(out.size(0), -1)
out = self.fc(out)
return out
resnet = ResNet([3, 3, 3])
This reduces the capability of your ResNet module and binds it with only the ResidualBlock. Now, if you create some other type of block (say ResidualBlock2), you will need to create another Resnet2 module specifically for that. So, it's better to create a generic ResNet module which takes in the block parameter, so that it can be used with different types of blocks.
A trivial python example to clarify
Suppose you want to create a function that can apply a mathematical operation on a list and returns its output. So, you might create something like below
def exp(inp_list):
out_list = []
for num in inp_list:
out_list.append(math.exp(num))
return out_list
def floor(inp_list):
out_list = []
for num in inp_list:
out_list.append(math.floor(num))
return out_list
Here, we are doing an exponent and a floor operation on some input list. But, we can do a better job by defining a generic function to do the same as
def apply_func(fn, inp_list):
out_list = []
for num in inp_list:
out_list.append(fn(num))
return out_list
and now call this apply_func as apply_func(math.exp, inp_list) for exponential and as apply_func(math.floor, inp_list) for floor function. Also this opens up possibility for any kind of operation.
Note: It's not a practical example as you can always use map or list comprehension for achieving the same thing. But, it demonstrates the use clearly.

How to do fully connected batch norm in PyTorch?

torch.nn has classes BatchNorm1d, BatchNorm2d, BatchNorm3d, but it doesn't have a fully connected BatchNorm class? What is the standard way of doing normal Batch Norm in PyTorch?
Ok. I figured it out. BatchNorm1d can also handle Rank-2 tensors, thus it is possible to use BatchNorm1d for the normal fully-connected case.
So for example:
import torch.nn as nn
class Policy(nn.Module):
def __init__(self, num_inputs, action_space, hidden_size1=256, hidden_size2=128):
super(Policy, self).__init__()
self.action_space = action_space
num_outputs = action_space
self.linear1 = nn.Linear(num_inputs, hidden_size1)
self.linear2 = nn.Linear(hidden_size1, hidden_size2)
self.linear3 = nn.Linear(hidden_size2, num_outputs)
self.bn1 = nn.BatchNorm1d(hidden_size1)
self.bn2 = nn.BatchNorm1d(hidden_size2)
def forward(self, inputs):
x = inputs
x = self.bn1(F.relu(self.linear1(x)))
x = self.bn2(F.relu(self.linear2(x)))
out = self.linear3(x)
return out
The BatchNorm1d normally comes before the ReLU, and the bias is redundant, so
import torch.nn as nn
class Policy(nn.Module):
def __init__(self, num_inputs, action_space, hidden_size1=256, hidden_size2=128):
super(Policy2, self).__init__()
self.action_space = action_space
num_outputs = action_space
self.linear1 = nn.Linear(num_inputs, hidden_size1, bias=False)
self.linear2 = nn.Linear(hidden_size1, hidden_size2, bias=False)
self.linear3 = nn.Linear(hidden_size2, num_outputs)
self.bn1 = nn.BatchNorm1d(hidden_size1)
self.bn2 = nn.BatchNorm1d(hidden_size2)
def forward(self, inputs):
x = inputs
x = F.relu(self.bn1(self.linear1(x)))
x = F.relu(self.bn2(self.linear2(x)))
out = self.linear3(x)
return out

Categories