How to change the out_features of densenet121 model?
I am using the code below to train the model:
from torch.nn.modules.dropout import Dropout
class Densnet121(nn.Module):
def __init__(self):
super(Densnet121, self).__init__()
self.cnn1 = nn.Conv2d(in_channels=3 , out_channels=64 , kernel_size=3 , stride=1 )
self.Densenet_121 = models.densenet121(pretrained=True)
self.gap = AvgPool2d(kernel_size=2, stride=1, padding=1)
self.bn1 = nn.BatchNorm2d(1024)
self.do1 = nn.Dropout(0.25)
self.linear = nn.Linear(256,256)
self.bn2 = nn.BatchNorm2d(256)
self.do2 = nn.Dropout(0.25)
self.output = nn.Linear(64 * 64 * 64,2)
self.act = nn.ReLU()
def densenet(self):
for param in self.Densenet_121.parameters():
param.requires_grad = False
self.Densenet_121.classifier = nn.Linear(1024, 1024)
return self.Densenet_121
def forward(self, x):
img = self.act(self.cnn1(x))
img = self.densenet(img)
img = self.gap(img)
img = self.bn1(img)
img = self.do1(img)
img = self.linear(img)
img = self.bn2(img)
img = self.do2(img)
img = torch.flatten(img, 1)
img = self.output(img)
return img
When training this model, I face the following error:
RuntimeError: Given groups=1, weight of size [64, 3, 7, 7], expected input[64, 64, 62, 62] to have 3 channels, but got 64 channels instead
Your first conv layer outputs a tensor of shape (b, 64, h, w) while the following layer, the densenet model expects 3 channels. Hence the error that was raised:
"expected input [...] to have 3 channels, but got 64 channels instead"
Unfortunately, this value is hardcoded in the source of the Densenet class, see reference.
One workaround however is to overwrite the first convolutional layer after the densenet has been initialized. Something like this should work:
# First gather the conv layer specs
conv = self.Densenet_121.features.conv0
kwargs = {k: getattr(conv, k) for k in
('out_channels', 'stride', 'kernel_size', 'padding', 'bias')}
# overwrite with identical specs with new in_channels
model.features.conv0 = nn.Conv2d(in_channels=64, **kwargs)
Alternatively, you can do:
w = model.features.conv0.weight
w.data = torch.rand(len(w), 64, *w.shape[:2])
Which replaces the underlying convolutional layer weight without affecting its metadata (eg. conv.in_channels remains equal to 3), this could have side effects. So I would recommend following the first approach.
Related
My Input data and target data are different types of data, and in different dimensions. The input data is 1D, and the target data is 2D. I don’t know how to modify the model to fit this situation.
I have built an Encoder/Decoder model with tokenized MIDI data as input, and coordinate data in CSV format as output.
The input dimension is
(num_of_data, sequence_length, data_dimension) = (22, 1000~3000, 1)
The target dimension is
(num_of_data, sequence_length, data_dimension) = (22, 1000~3000, 102)
The Encoder/Decoder model architecture I found :
class Encoder(nn.Module):
def __init__(self,
input_size = 2,
embedding_size = 128,
hidden_size = 256,
n_layers = 4,
dropout = 0.5):
super().__init__()
self.hidden_size = hidden_size
self.n_layers = n_layers
self.linear = nn.Linear(input_size, embedding_size)
self.rnn = nn.LSTM(embedding_size, hidden_size, n_layers,
dropout = dropout)
self.dropout = nn.Dropout(dropout)
def forward(self, x):
embedded = self.dropout(F.relu(self.linear(x)))
output, (hidden, cell) = self.rnn(embedded)
return hidden, cell
class Decoder(nn.Module):
def __init__(self,
output_size = 2,
embedding_size = 128,
hidden_size = 256,
n_layers = 4,
dropout = 0.5):
super().__init__()
self.output_size = output_size
self.hidden_size = hidden_size
self.n_layers = n_layers
self.embedding = nn.Linear(output_size, embedding_size)
self.rnn = nn.LSTM(embedding_size, hidden_size, n_layers, dropout = dropout)
self.linear = nn.Linear(hidden_size, output_size)
self.dropout = nn.Dropout(dropout)
def forward(self, x, hidden, cell):
x = x.unsqueeze(0)
embedded = self.dropout(F.relu(self.embedding(x)))
output, (hidden, cell) = self.rnn(embedded, (hidden, cell))
prediction = self.linear(output.squeeze(0))
return prediction, hidden, cell
class Seq2Seq(nn.Module):
def __init__(self, encoder, decoder, device):
super().__init__()
self.encoder = encoder
self.decoder = decoder
self.device = device
assert encoder.hidden_size == decoder.hidden_size, \
"Hidden dimensions of encoder and decoder must be equal!"
assert encoder.n_layers == decoder.n_layers, \
"Encoder and decoder must have equal number of layers!"
def forward(self, x, y, teacher_forcing_ratio = 0.5):
batch_size = x.shape[1]
target_len = y.shape[0]
outputs = torch.zeros(y.shape).to(self.device)
hidden, cell = self.encoder(x)
decoder_input = x[-1, :, :]
for i in range(target_len):
output, hidden, cell = self.decoder(decoder_input, hidden, cell)
outputs[i] = output
teacher_forcing = random.random() < teacher_forcing_ratio
decoder_input = y[i] if teacher_forcing else output
return outputs
I set the input data and target data length to 900 as they need to have the same sequence_length before I convert them to tensor:
tokenized_data shape: (22, n)
target_data shape: (22, m, 102)
↓
tokenized_data shape: (22, 900)
target_data shape: (22, 900, 102)
input_tensor = torch.Tensor(input_data)
target_tensor = torch.Tensor(target_data)
torch.Size([22, 900])
torch.Size([22, 900, 102])
The parameters and model are listed below:
source = input_tensor.to(device)
target = target_tensor.to(device)
input_size = 900 # I am not sure if this is correct
output_size = (900,102) # I am not sure if this is correct
print('Input : {} Output : {}'.format(input_size, output_size))
embed_size = 256
hidden_size = 512
num_layers = 3
num_iteration = 100
ENC_DROPOUT = 0.5
DEC_DROPOUT = 0.5
encoder = Encoder(input_size, hidden_size, embed_size, num_layers, ENC_DROPOUT)
decoder = Decoder(output_size, hidden_size, embed_size, num_layers, DEC_DROPOUT)
model = Seq2Seq(encoder, decoder, device).to(device)
I then tried to train the model with:
model = trainModel(model, source, target, num_iteration)
The above results in the error message shown below:
TypeError: empty(): argument 'size' must be tuple of ints, but found element of type tuple at pos 2
I revised output_size = [900,102] to output_size = 900, but I got the error below:
IndexError: too many indices for tensor of dimension 2
I am having problems with the Encoder/Decoder model that can’t deal with the input and target tensors having different dimensions. Any help or advice on how to create an Encoder/Decoder model that will take input and target tensors in different dimensions will be greatly appreciated.
Thank you in advance for your input.
I am trying to reproduce the multi-input neural network of this tutorial: tutorial. This article uses PyTorch Lightning, while I want to use PyTorch, so I am adapting to my case. Basically I created my dataloaders and my network:
# Define loaders
from torch.utils.data import DataLoader
train_loader = DataLoader(train_set, batch_size=64, num_workers=2, drop_last=True, shuffle=True)
val_loader = DataLoader(val_set, batch_size=64, num_workers=2, drop_last=False, shuffle=False)
test_loader = DataLoader(test_set, batch_size=64, num_workers=2, drop_last=False, shuffle=False)
def conv_block(input_size, output_size):
block = nn.Sequential(
nn.Conv2d(input_size, output_size, (3, 3)), nn.BatchNorm2d(output_size), nn.ReLU(), nn.MaxPool2d((2, 2)),
)
return block
class SimpleCNN(nn.Module):
#Constructor
def __init__(self):
# Call parent contructor
super().__init__()
self.conv1 = conv_block(3, 16)
self.conv2 = conv_block(16, 32)
self.conv3 = conv_block(32, 64)
self.ln1 = nn.Linear(64 * 26 * 26, 16)
self.relu = nn.ReLU()
self.batchnorm = nn.BatchNorm1d(16)
self.dropout = nn.Dropout2d(0.5)
self.ln2 = nn.Linear(16, 5)
self.ln4 = nn.Linear(5, 10)
self.ln5 = nn.Linear(10, 10)
self.ln6 = nn.Linear(10, 5)
self.ln7 = nn.Linear(10, 1)
# Forward
def forward(self, img, tab):
img = self.conv1(img)
img = self.conv2(img)
img = self.conv3(img)
img = img.reshape(img.shape[0], -1)
img = self.ln1(img)
img = self.relu(img)
img = self.batchnorm(img)
img = self.dropout(img)
img = self.ln2(img)
img = self.relu(img)
tab = self.ln4(tab)
tab = self.relu(tab)
tab = self.ln5(tab)
tab = self.relu(tab)
tab = self.ln6(tab)
tab = self.relu(tab)
x = torch.cat((img, tab), dim=1)
x = self.relu(x)
return self.ln7(x)
Now, I try to pass a sample to my network to check if it works correctly, in order to start training later:
# Create the model
model = SimpleCNN()
img_x, tab_x, label_x = train_set[0]
print(img_x.shape, tab_x, label_x)
img_x
img_x = img_x.unsqueeze(dim=0)
output = model(img_x, tab_x)
output.shape
But here, I have this error: ValueError: Expected more than 1 value per channel when training, got input size torch.Size([1, 16]) at this line:
----> 4 output = model(img_x, tab_x)
........
---> 37 img = self.batchnorm(img)
I know that probably is because I am passing only one element to my network (I am not sure). However, If I remove that line from my neural network, so basically changing the forward method in this way:
def forward(self, img, tab):
img = self.conv1(img)
img = self.conv2(img)
img = self.conv3(img)
img = img.reshape(img.shape[0], -1)
img = self.ln1(img)
img = self.relu(img)
img = self.dropout(img)
img = self.ln2(img)
img = self.relu(img)
tab = self.ln4(tab)
tab = self.relu(tab)
tab = self.ln5(tab)
tab = self.relu(tab)
tab = self.ln6(tab)
tab = self.relu(tab)
x = torch.cat((img, tab), dim=1)
x = self.relu(x)
return self.ln7(x)
then I have this other error: RuntimeError: Tensors must have same number of dimensions: got 2 and 1 but this time referred to this line: ---> 49 x = torch.cat((img, tab), dim=1)
Edit: I solved the second error RuntimeError: Tensors must have same number of dimensions: got 2 and 1: basically I needed to unsqueeze also the tab_x in input:
model = SimpleCNN()
img_x, tab_x, label_x = train_set[0]
img_x = img_x.unsqueeze(dim=0)
tab_x = tab_x.unsqueeze(dim=0)
output = model(img_x, tab_x)
output.shape
and my output shape is torch.Size([1, 1]). However, I am still not able to fix the first error (the one in title)
Edit for #Edwin: I am printing the shapes in this way:
def forward(self, img, tab):
img = self.conv1(img)
print(img.shape)
img = self.conv2(img)
print(img.shape)
img = self.conv3(img)
print(img.shape)
img = img.reshape(img.shape[0], -1)
print(img.shape)
img = self.ln1(img)
print(img.shape)
...
output:
torch.Size([1, 16, 111, 111])
torch.Size([1, 32, 54, 54])
torch.Size([1, 64, 26, 26])
torch.Size([1, 43264])
torch.Size([1, 16])
I’m getting the runtime error when trying to Sequentialise my linear layer with a DINO backbone from torch.hub.
import torch
import torch.distributed as dist
class LinearClassifier(torch.nn.Module):
def __init__(self, dim, num_labels=1000):
super(LinearClassifier, self).__init__()
self.num_labels = num_labels
self.linear = torch.nn.Linear(dim, num_labels)
self.linear.weight.data.normal_(mean=0.0, std=0.01)
self.linear.bias.data.zero_()
def forward(self, x):
# flatten
x = x.view(x.size(0), -1)
# linear layer
return self.linear(x)
dist.init_process_group('gloo', init_method='file:///tmp/somefile', rank=0, world_size=1)
# load backbone
model = torch.hub.load('facebookresearch/dino:main', 'dino_vits8')
#Setup linear layer
linear_classifier = LinearClassifier(1536, 1000)
linear_classifier = linear_classifier.cuda()
linear_classifier = torch.nn.parallel.DistributedDataParallel(linear_classifier)
state_dict = torch.hub.load_state_dict_from_url(url="https://dl.fbaipublicfiles.com/dino/dino_deitsmall8_pretrain/dino_deitsmall8_linearweights.pth")['state_dict']
linear_classifier.load_state_dict(state_dict, strict=True)
#Sequentialise
model = torch.nn.Sequential(model,
linear_classifier)
x = torch.ones((1, 3, 224, 224))
out = model(x)
print("out: " + out)
Here is the print of the last layers of my sequentialised model:
last layers printed
It loos lke the output of the model(x) (as defined by model = torch.hub...) has shape 1 x 384, but your linear_classifier expects something of shape _ x 1536 which is why you'll get this error. So you can just adjust that number of inputs by setting
linear_classifier = LinearClassifier(384, 1000)
I want to perform some transformation on the input batch during training. For example, if I have a batch of images of size (number of samples, width, height, channels), I want to replace the 3rd channel with the difference of the first two channels, then resize the image size and finally normalize it.
I tried to define a custom layer:
class CustomLayer(tf.keras.layers.Layer):
def __init__(self):
super(CustomLayer, self).__init__()
def build(self, input_shape):
pass
def call(self, input_):
#Loaded images
self.img_tr = []
for image in input_:
img_input = resize(image,(267,400))#from skimage import resize
img_diff = (img_input[:,:,1]/np.max(img_input[:,:,1]))-((img_input[:,:,0]+img_input[:,:,2])/np.max(img_input[:,:,0]+img_input[:,:,2]))
img_temp = np.zeros((267,400,3))
img_temp[:,:,0] = img_input[:,:,0]/np.max(img_input[:,:,0])
img_temp[:,:,1] = img_input[:,:,1]/np.max(img_input[:,:,1])
img_temp[:,:,2] = img_diff/np.max(img_diff)
self.img_tr.append(img_temp)
self.img_tr= np.asarray(self.img_tr)
return self.img_tr
Then I used:
input_0 = tf.keras.Input(shape = (None,None,3))
clayer = CustomLayer()
input_1 = clayer(input_0)
x = tf.keras.layers.Conv2D(filters = 16, kernel_size = (7,7), activation = tf.keras.activations.relu)(input_1)
x = tf.keras.layers.MaxPool2D(pool_size = (2,2))(x)
x = tf.keras.layers.Flatten()(x)
x = tf.keras.layers.Dense(units = 64, activation = tf.keras.activations.relu)(x)
output = tf.keras.layers.Dense(units = 12)(x)
model = tf.keras.Model(inputs = input_0, outputs = output)
model.compile(
optimizer = tf.keras.optimizers.Adam(),
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits = True),
metrics = tf.keras.metrics.SparseCategoricalAccuracy()
)
model.summary()
I get an error that says:
AttributeError: 'Tensor' object has no attribute 'ndim'
I think the issue related to the fact that my custom layer expects a 4d numpy array but the input is having this format:
<KerasTensor: shape=(None, None, None, 3) dtype=float32 (created by layer 'input_20')>
How can I resolve the issue? I cannot find a way to convert KerasTensor to a numpy array inside my custom layer.
Edit
I tried to avoid for loops and numpy so I tried:
class CustomLayer(tf.keras.layers.Layer):
def __init__(self):
super(CustomLayer, self).__init__()
def build(self, input_shape):
pass
def call(self, input_):
input_ = tf.Variable(input_)
img_input = tf.image.resize(input_,(267,400))
img_diff = (img_input[:,:,:,1])-((img_input[:,:,:,0]+img_input[:,:,:,2]))
img_input[:,:,:,2] = img_diff
output_img = tf.image.per_image_standardization(img_input)
return input_
However, when I use the custom layer in the functional API I get the error:
ValueError: Tensor-typed variable initializers must either be wrapped in an init_scope or callable (e.g., `tf.Variable(lambda : tf.truncated_normal([10, 40]))`) when building functions. Please file a feature request if this restriction inconveniences you.
It seems has something to do with tf.Variable. Even if I set validate_shape to False I still get the same error.
Simply removing tf.Variable does the job. Below is the full layer:
class CustomLayer(tf.keras.layers.Layer):
def __init__(self):
super(CustomLayer, self).__init__()
def build(self, input_shape):
pass
def call(self, inp):
img_input = tf.image.resize(inp, (267,400))
img_diff = (img_input[:,:,:,1])-((img_input[:,:,:,0]+img_input[:,:,:,2]))
img_diff = tf.expand_dims(img_diff, -1)
img_input = tf.keras.layers.Concatenate()([img_input[:,:,:,:-1], img_diff])
output_img = tf.image.per_image_standardization(img_input)
return output_img
I used tf.keras.layers.Concatenate to replace the last channel of img_input with img_diff.
Here the running notebook
We are in the process of training an AE on CIFAR10 images. We used the following architecture:
class OurAE(nn.Module):
def __init__(self, in_channels, z_channels):
super(OurAE, self).__init__()
self.tot_diff = None
self.in_channels = in_channels
curr_channels = in_channels
encoder = []
channels = [3, 16, 64] + [z_channels]
for out_channels in channels:
encoder += [
nn.Conv2d(in_channels=curr_channels, out_channels=out_channels, kernel_size=3, padding=1, stride=2),
nn.ReLU()
]
curr_channels = out_channels
self.encoder = nn.Sequential(*encoder)
curr_channels = z_channels
decoder = []
channels = [64, 16, 3] + [in_channels]
for out_channels in channels:
decoder += [
nn.ConvTranspose2d(in_channels=curr_channels, out_channels=out_channels, kernel_size=4, padding=1, stride=2),
nn.ReLU()
]
curr_channels = out_channels
decoder = decoder[:-1] # removing the RELU layer
decoder.append(nn.Sigmoid())
self.decoder = nn.Sequential(*decoder)
def forward(self, x):
return self.decoder(self.encoder(x))
We are not sure why but we always get black and white images.
We tried to replace the Sigmoid with ReLU in the last layer but to no avail.
These are the loss function and the optimizers that we used:
optimizer = torch.optim.Adam(classifier.parameters(), lr=lr)
criterion = torch.nn.CrossEntropyLoss()
Here is an example of an input and output of the AE after training:
I had the same problem. I changed the loss function to 'mae'. That cleared the issue for me.