I'm trying to follow a Cyclegan tutorial here:https://www.youtube.com/watch?v=4LktBHGCNfw. It also has the Github to his code. At first I thought I had a typo, but then I tried copying and pasting his code and still the results are only black images. Is anybody else able to copy the code and get actual images?
Generator model
class ConvBlock(nn.Module):
def __init__(self, in_channels, out_channels, down=True, use_act=True, **kwargs):
super().__init__()
self.conv = nn.Sequential(
nn.Conv2d(in_channels, out_channels, padding_mode="reflect", **kwargs)
if down
else nn.ConvTranspose2d(in_channels, out_channels, **kwargs),
nn.InstanceNorm2d(out_channels),
nn.ReLU(inplace=True) if use_act else nn.Identity()
)
def forward(self, x):
return self.conv(x)
class ResidualBlock(nn.Module):
def __init__(self, channels):
super().__init__()
self.block = nn.Sequential(
ConvBlock(channels, channels, kernel_size=3, padding=1),
ConvBlock(channels, channels, use_act=False, kernel_size=3, padding=1),
)
def forward(self, x):
return x + self.block(x)
class Generator(nn.Module):
def __init__(self, img_channels, num_features = 64, num_residuals=9):
super().__init__()
self.initial = nn.Sequential(
nn.Conv2d(img_channels, num_features, kernel_size=7, stride=1, padding=3, padding_mode="reflect"),
nn.InstanceNorm2d(num_features),
nn.ReLU(inplace=True),
)
self.down_blocks = nn.ModuleList(
[
ConvBlock(num_features, num_features*2, kernel_size=3, stride=2, padding=1),
ConvBlock(num_features*2, num_features*4, kernel_size=3, stride=2, padding=1),
]
)
self.res_blocks = nn.Sequential(
*[ResidualBlock(num_features*4) for _ in range(num_residuals)]
)
self.up_blocks = nn.ModuleList(
[
ConvBlock(num_features*4, num_features*2, down=False, kernel_size=3, stride=2, padding=1, output_padding=1),
ConvBlock(num_features*2, num_features*1, down=False, kernel_size=3, stride=2, padding=1, output_padding=1),
]
)
self.last = nn.Conv2d(num_features*1, img_channels, kernel_size=7, stride=1, padding=3, padding_mode="reflect")
def forward(self, x):
x = self.initial(x)
for layer in self.down_blocks:
x = layer(x)
x = self.res_blocks(x)
for layer in self.up_blocks:
x = layer(x)
return torch.tanh(self.last(x))
Discriminator Model
class Block(nn.Module):
def __init__(self, in_channels, out_channels, stride):
super().__init__()
self.conv = nn.Sequential(
nn.Conv2d(in_channels, out_channels, 4, stride, 1, bias=True, padding_mode="reflect"),
nn.InstanceNorm2d(out_channels),
nn.LeakyReLU(0.2, inplace=True),
)
def forward(self, x):
return self.conv(x)
class Discriminator(nn.Module):
def __init__(self, in_channels=3, features=[64, 128, 256, 512]):
super().__init__()
self.initial = nn.Sequential(
nn.Conv2d(
in_channels,
features[0],
kernel_size=4,
stride=2,
padding=1,
padding_mode="reflect",
),
nn.LeakyReLU(0.2, inplace=True),
)
layers = []
in_channels = features[0]
for feature in features[1:]:
layers.append(Block(in_channels, feature, stride=1 if feature==features[-1] else 2))
in_channels = feature
layers.append(nn.Conv2d(in_channels, 1, kernel_size=4, stride=1, padding=1, padding_mode="reflect"))
self.model = nn.Sequential(*layers)
def forward(self, x):
x = self.initial(x)
return torch.sigmoid(self.model(x))
Train:
def train_fn(disc_H, disc_Z, gen_Z, gen_H, loader, opt_disc, opt_gen, l1, mse, d_scaler, g_scaler):
H_reals = 0
H_fakes = 0
loop = tqdm(loader, leave=True)
for idx, (zebra, horse) in enumerate(loop):
zebra = zebra.to(config.DEVICE)
horse = horse.to(config.DEVICE)
# Train Discriminators H and Z
with torch.cuda.amp.autocast():
fake_horse = gen_H(zebra)
D_H_real = disc_H(horse)
D_H_fake = disc_H(fake_horse.detach())
H_reals += D_H_real.mean().item()
H_fakes += D_H_fake.mean().item()
D_H_real_loss = mse(D_H_real, torch.ones_like(D_H_real))
D_H_fake_loss = mse(D_H_fake, torch.zeros_like(D_H_fake))
D_H_loss = D_H_real_loss + D_H_fake_loss
fake_zebra = gen_Z(horse)
D_Z_real = disc_Z(zebra)
D_Z_fake = disc_Z(fake_zebra.detach())
D_Z_real_loss = mse(D_Z_real, torch.ones_like(D_Z_real))
D_Z_fake_loss = mse(D_Z_fake, torch.zeros_like(D_Z_fake))
D_Z_loss = D_Z_real_loss + D_Z_fake_loss
# put it togethor
D_loss = (D_H_loss + D_Z_loss)/2
opt_disc.zero_grad()
d_scaler.scale(D_loss).backward()
d_scaler.step(opt_disc)
d_scaler.update()
# Train Generators H and Z
with torch.cuda.amp.autocast():
# adversarial loss for both generators
D_H_fake = disc_H(fake_horse)
D_Z_fake = disc_Z(fake_zebra)
loss_G_H = mse(D_H_fake, torch.ones_like(D_H_fake))
loss_G_Z = mse(D_Z_fake, torch.ones_like(D_Z_fake))
# cycle loss
cycle_zebra = gen_Z(fake_horse)
cycle_horse = gen_H(fake_zebra)
cycle_zebra_loss = l1(zebra, cycle_zebra)
cycle_horse_loss = l1(horse, cycle_horse)
# identity loss (remove these for efficiency if you set lambda_identity=0)
identity_zebra = gen_Z(zebra)
identity_horse = gen_H(horse)
identity_zebra_loss = l1(zebra, identity_zebra)
identity_horse_loss = l1(horse, identity_horse)
# add all togethor
G_loss = (
loss_G_Z
+ loss_G_H
+ cycle_zebra_loss * config.LAMBDA_CYCLE
+ cycle_horse_loss * config.LAMBDA_CYCLE
+ identity_horse_loss * config.LAMBDA_IDENTITY
+ identity_zebra_loss * config.LAMBDA_IDENTITY
)
opt_gen.zero_grad()
g_scaler.scale(G_loss).backward()
g_scaler.step(opt_gen)
g_scaler.update()
if idx % 200 == 0:
save_image(fake_horse*0.5+0.5, f"saved_images/horse_{idx}.png")
save_image(fake_zebra*0.5+0.5, f"saved_images/zebra_{idx}.png")
#loop.set_postfix(H_real=H_reals/(idx+1), H_fake=H_fakes/(idx+1))
def main():
disc_H = Discriminator(in_channels=3).to(config.DEVICE)
disc_Z = Discriminator(in_channels=3).to(config.DEVICE)
gen_Z = Generator(img_channels=3, num_residuals=9).to(config.DEVICE)
gen_H = Generator(img_channels=3, num_residuals=9).to(config.DEVICE)
opt_disc = optim.Adam(
list(disc_H.parameters()) + list(disc_Z.parameters()),
lr=config.LEARNING_RATE,
betas=(0.5, 0.999),
)
opt_gen = optim.Adam(
list(gen_Z.parameters()) + list(gen_H.parameters()),
lr=config.LEARNING_RATE,
betas=(0.5, 0.999),
)
L1 = nn.L1Loss()
mse = nn.MSELoss()
if config.LOAD_MODEL:
load_checkpoint(
config.CHECKPOINT_GEN_H, gen_H, opt_gen, config.LEARNING_RATE,
)
load_checkpoint(
config.CHECKPOINT_GEN_Z, gen_Z, opt_gen, config.LEARNING_RATE,
)
load_checkpoint(
config.CHECKPOINT_CRITIC_H, disc_H, opt_disc, config.LEARNING_RATE,
)
load_checkpoint(
config.CHECKPOINT_CRITIC_Z, disc_Z, opt_disc, config.LEARNING_RATE,
)
dataset = HorseZebraDataset(
root_horse=config.TRAIN_DIR+"/horses", root_zebra=config.TRAIN_DIR+"/zebras", transform=config.transforms
)
#val_dataset = HorseZebraDataset(
#root_horse="cyclegan_test/horse1", root_zebra="cyclegan_test/zebra1", transform=config.transforms
#)
#val_loader = DataLoader(
#val_dataset,
#batch_size=1,
#shuffle=False,
#pin_memory=True,
#)
loader = DataLoader(
dataset,
batch_size=config.BATCH_SIZE,
shuffle=True,
#num_workers=config.NUM_WORKERS,
pin_memory=True
)
g_scaler = torch.cuda.amp.GradScaler()
d_scaler = torch.cuda.amp.GradScaler()
for epoch in range(config.NUM_EPOCHS):
train_fn(disc_H, disc_Z, gen_Z, gen_H, loader, opt_disc, opt_gen, L1, mse, d_scaler, g_scaler)
if config.SAVE_MODEL:
save_checkpoint(gen_H, opt_gen, filename=config.CHECKPOINT_GEN_H)
save_checkpoint(gen_Z, opt_gen, filename=config.CHECKPOINT_GEN_Z)
save_checkpoint(disc_H, opt_disc, filename=config.CHECKPOINT_CRITIC_H)
save_checkpoint(disc_Z, opt_disc, filename=config.CHECKPOINT_CRITIC_Z)
Related
I am trying to implement deeplabv3plus in pytorch. This is my code for creating deeplabv3plus head. But I am getting an Index error.
low_level_feature = self.project( feature['low_level'] )
IndexError: too many indices for tensor of dimension 4
def _segm_model(name, backbone_name, num_classes, pretrained_backbone=True):
aspp_dilate = [6, 12, 18]
backbone = resnet.__dict__[backbone_name](
pretrained=pretrained_backbone,
replace_stride_with_dilation=[False, False, True])
inplanes = 2048
low_level_planes = 256
if name == 'deeplabv3plus':
return_layers = {'layer4': 'out', 'layer1': 'low_level'}
classifier = DeepLabHeadV3Plus(inplanes, low_level_planes, num_classes, aspp_dilate)
backbone = IntermediateLayerGetter(backbone, return_layers=return_layers)
model = deeplabv3.DeepLabV3(backbone, classifier)
return model
class DeepLabHeadV3Plus(nn.Module):
def __init__(self, in_channels, low_level_channels, num_classes, aspp_dilate=[12, 24, 36]):
super(DeepLabHeadV3Plus, self).__init__()
self.project = nn.Sequential(
nn.Conv2d(low_level_channels, 48, 1, bias=False),
nn.BatchNorm2d(48),
nn.ReLU(inplace=True),
)
self.aspp = ASPP(in_channels, aspp_dilate)
self.classifier = nn.Sequential(
nn.Conv2d(304, 256, 3, padding=1, bias=False),
nn.BatchNorm2d(256),
nn.ReLU(inplace=True),
nn.Conv2d(256, num_classes, 1)
)
self._init_weight()
def forward(self, feature):
low_level_feature = self.project( feature['low_level'] )
output_feature = self.aspp(feature['out'])
output_feature = F.interpolate(output_feature, size=low_level_feature.shape[2:], mode='bilinear', align_corners=False)
return self.classifier( torch.cat( [ low_level_feature, output_feature ], dim=1 ) )
def _init_weight(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight)
elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
I get the following error for a GAN model I am using to perform image colorization. It uses the LAB color space as is common in image colorization. The generator generates the a ad b channels for a given L channel. The discriminator is fed all three channels after concatenation.
RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.cuda.FloatTensor [64, 64, 128, 128]], which is output 0 of ReluBackward0, is at version 1; expected version 0 instead. Hint: enable anomaly detection to find the operation that failed to compute its gradient, with torch.autograd.set_detect_anomaly(True).
I believe the error is due to the skip connections but I cannot quite put my finger on it. Any help would be appreciated!
Here is the model:
class NetGen(nn.Module):
'''Generator'''
def __init__(self):
super(NetGen, self).__init__()
self.conv1 = nn.Conv2d(1, 64, 3, stride=2, padding=1, bias=False)
self.bnorm1 = nn.BatchNorm2d(64)
self.relu1 = nn.LeakyReLU(0.1)
self.conv2 = nn.Conv2d(64, 128, 3, stride=2, padding=1, bias=False)
self.bnorm2 = nn.BatchNorm2d(128)
self.relu2 = nn.LeakyReLU(0.1)
self.conv3 = nn.Conv2d(128, 256, 3, stride=2, padding=1, bias=False)
self.bnorm3 = nn.BatchNorm2d(256)
self.relu3 = nn.LeakyReLU(0.1)
self.conv4 = nn.Conv2d(256, 512, 3, stride=2, padding=1, bias=False)
self.bnorm4 = nn.BatchNorm2d(512)
self.relu4 = nn.LeakyReLU(0.1)
self.conv5 = nn.Conv2d(512, 512, 3, stride=2, padding=1, bias=False)
self.bnorm5 = nn.BatchNorm2d(512)
self.relu5 = nn.LeakyReLU(0.1)
self.deconv6 = nn.ConvTranspose2d(512, 512, 3, stride=2, padding=1, output_padding=1, bias=False)
self.bnorm6 = nn.BatchNorm2d(512)
self.relu6 = nn.ReLU()
self.deconv7 = nn.ConvTranspose2d(512, 256, 3, stride=2, padding=1, output_padding=1, bias=False)
self.bnorm7 = nn.BatchNorm2d(256)
self.relu7 = nn.ReLU()
self.deconv8 = nn.ConvTranspose2d(256, 128, 3, stride=2, padding=1, output_padding=1, bias=False)
self.bnorm8 = nn.BatchNorm2d(128)
self.relu8 = nn.ReLU()
self.deconv9 = nn.ConvTranspose2d(128, 64, 3, stride=2, padding=1, output_padding=1, bias=False)
self.bnorm9 = nn.BatchNorm2d(64)
self.relu9 = nn.ReLU()
self.deconv10 = nn.ConvTranspose2d(64, 2, 3, stride=2, padding=1, output_padding=1, bias=False)
self.tanh = nn.Tanh()
def forward(self, x):
h = x
h = self.conv1(h)
h = self.bnorm1(h)
h = self.relu1(h)
pool1 = h
h = self.conv2(h)
h = self.bnorm2(h)
h = self.relu2(h)
pool2 = h
h = self.conv3(h)
h = self.bnorm3(h)
h = self.relu3(h)
pool3 = h
h = self.conv4(h)
h = self.bnorm4(h)
h = self.relu4(h)
pool4 = h
h = self.conv5(h)
h = self.bnorm5(h)
h = self.relu5(h)
h = self.deconv6(h)
h = self.bnorm6(h)
h = self.relu6(h)
h += pool4
h = self.deconv7(h)
h = self.bnorm7(h)
h = self.relu7(h)
h += pool3
h = self.deconv8(h)
h = self.bnorm8(h)
h = self.relu8(h)
h += pool2
h = self.deconv9(h)
h = self.bnorm9(h)
h = self.relu9(h)
h += pool1
h = self.deconv10(h)
h = self.tanh(h)
return h
class NetDis(nn.Module):
'''Discriminator'''
def __init__(self):
super(NetDis, self).__init__()
self.main = nn.Sequential(
nn.Conv2d(3, 64, 3, stride=2, padding=1, bias=False),
nn.BatchNorm2d(64),
nn.LeakyReLU(0.1),
nn.Conv2d(64, 128, 3, stride=2, padding=1, bias=False),
nn.BatchNorm2d(128),
nn.LeakyReLU(0.1),
nn.Conv2d(128, 256, 3, stride=2, padding=1, bias=False),
nn.BatchNorm2d(256),
nn.LeakyReLU(0.1),
nn.Conv2d(256, 512, 3, stride=2, padding=1, bias=False),
nn.BatchNorm2d(512),
nn.LeakyReLU(0.1),
nn.Conv2d(512, 512, 3, stride=2, padding=1, bias=False),
nn.BatchNorm2d(512),
nn.LeakyReLU(0.1),
nn.Conv2d(512, 512, 8, stride=1, padding=0, bias=False),
nn.BatchNorm2d(512),
nn.LeakyReLU(0.1),
nn.Conv2d(512, 1, 1, stride=1, padding=0, bias=False),
nn.Sigmoid()
)
def forward(self, x):
return self.main(x)
Here is the weight init function:
def weights_init(m):
classname = m.__class__.__name__
if classname.find('Conv') != -1:
nn.init.normal_(m.weight.data, 0.0, 0.02)
elif classname.find('BatchNorm') != -1:
nn.init.normal_(m.weight.data, 1.0, 0.02)
nn.init.constant_(m.bias.data, 0)
Here is the training and validation code:
class Trainer:
def __init__(self, epochs, batch_size, learning_rate, num_workers):
self.epochs = epochs
self.batch_size = batch_size
self.learning_rate = learning_rate
self.num_workers = num_workers
self.train_paths = train_paths
self.val_paths = val_paths
self.real_label = 1
self.fake_label = 0
def train(self):
train_dataset = ColorizeData(paths=self.train_paths)
train_dataloader = DataLoader(train_dataset, batch_size=self.batch_size, num_workers=self.num_workers,pin_memory=True, drop_last = True)
# Model
model_G = NetGen().to(device)
model_D = NetDis().to(device)
model_G.apply(weights_init)
model_D.apply(weights_init)
optimizer_G = torch.optim.Adam(model_G.parameters(),
lr=self.learning_rate, betas=(0.5, 0.999),
eps=1e-8, weight_decay=0)
optimizer_D = torch.optim.Adam(model_D.parameters(),
lr=self.learning_rate, betas=(0.5, 0.999),
eps=1e-8, weight_decay=0)
criterion = nn.BCELoss()
L1 = nn.L1Loss()
model_G.train()
model_D.train()
# train loop
for epoch in range(self.epochs):
print("Starting Training Epoch " + str(epoch + 1))
for i, data in enumerate(tqdm(train_dataloader)):
inputs, input_ab, input_l = data
inputs = inputs.to(device)
input_ab = input_ab.to(device)
input_l = input_l.to(device)
model_D.zero_grad()
label = torch.full((self.batch_size,), self.real_label, dtype=torch.float, device=device)
output = model_D(torch.cat([input_l, input_ab], dim=1))
errD_real = criterion(torch.squeeze(output), label)
errD_real.backward()
fake = model_G(input_l)
label.fill_(self.fake_label)
output = model_D(torch.cat([input_l, fake.detach()], dim=1))
errD_fake = criterion(torch.squeeze(output), label)
errD_fake.backward()
errD = errD_real + errD_fake
optimizer_D.step()
model_G.zero_grad()
label.fill_(self.real_label)
output = model_D(torch.cat([input_l, fake], dim=1))
errG = criterion(torch.squeeze(output), label)
errG_L1 = L1(fake.view(fake.size(0),-1), input_ab.view(input_ab.size(0),-1))
errG = errG + 100 * errG_L1
errG.backward()
optimizer_G.step()
print(f'Training: Epoch {epoch + 1} \t\t Discriminator Loss: {\
errD / len(train_dataloader)} \t\t Generator Loss: {\
errG / len(train_dataloader)}')
if (epoch + 1) % 1 == 0:
errD_val, errG_val, val_len = self.validate(model_D, model_G, criterion, L1)
print(f'Validation: Epoch {epoch + 1} \t\t Discriminator Loss: {\
errD_val / val_len} \t\t Generator Loss: {\
errG_val / val_len}')
torch.save(model_G.state_dict(), '../Results/Model_GAN/Generator/saved_model_' + str(epoch + 1) + '.pth')
torch.save(model_D.state_dict(), '../Results/Model_GAN/Discriminator/saved_model_' + str(epoch + 1) + '.pth')
def validate(self, model_D, model_G, criterion, L1):
model_G.eval()
model_D.eval()
with torch.no_grad():
valid_loss = 0.0
val_dataset = ColorizeData(paths=self.val_paths)
val_dataloader = DataLoader(val_dataset, batch_size=self.batch_size, num_workers=self.num_workers, pin_memory=True, drop_last = True)
for i, data in enumerate(val_dataloader):
inputs, input_ab, input_l = data
inputs = inputs.to(device)
input_ab = input_ab.to(device)
input_l = input_l.to(device)
label = torch.full((self.batch_size,), self.real_label, dtype=torch.float, device=device)
output = model_D(torch.cat([input_l, input_ab], dim=1))
errD_real = criterion(torch.squeeze(output), label)
fake = model_G(input_l)
label.fill_(self.fake_label)
output = model_D(torch.cat([input_l, fake.detach()], dim=1))
errD_fake = criterion(torch.squeeze(output), label)
errD = errD_real + errD_fake
label.fill_(self.real_label)
output = model_D(torch.cat([input_l, fake], dim=1))
errG = criterion(torch.squeeze(output), label)
errG_L1 = L1(fake.view(fake.size(0),-1), input_ab.view(input_ab.size(0),-1))
errG = errG + 100 * errG_L1
return errD, errG, len(val_dataloader)
EDIT
As suggested by #manaclan here is the code I use to run the pipeline:
trainer = Trainer(epochs = 100, batch_size = 64, learning_rate = 0.0002, num_workers = 2)
trainer.train()
Here is the data loader:
class ColorizeData(Dataset):
def __init__(self, paths):
self.input_transform = T.Compose([T.ToTensor(),
T.Resize(size=(256,256)),
T.Grayscale(),
T.Normalize((0.5), (0.5))
])
self.lab_transform = T.Compose([T.ToTensor(),
T.Resize(size=(256,256)),
T.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
self.paths = paths
def __len__(self) -> int:
return len(self.paths)
def __getitem__(self, index: int) -> Tuple[torch.Tensor, torch.Tensor]:
image = Image.open(self.paths[index]).convert("RGB")
input_image = self.input_transform(image)
image_lab = rgb2lab(image)
image_lab = self.lab_transform(image_lab)
image_l = image_lab[0, :, :]
image_ab = image_lab[1:3, :, :]
return (input_image.float(), image_ab.float(), image_l.float().reshape(1, 256, 256))
Here are the imports:
from typing import Tuple
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as T
import torch
import numpy as np
import os
import torch.nn as nn
import torchvision.models as models
import torchvision
import torch.nn.functional as functional
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
from PIL import Image
import glob
import matplotlib.pyplot as plt
from tqdm import tqdm
from skimage.color import lab2rgb, rgb2lab, rgb2gray
from skimage import io
from torchvision.transforms.functional import resize
To reproduce the error, just use any dataset of color images.
I have the following code to get my train, test, and validation images from the folder "Dataset":
path = "../Dataset/"
paths = np.array(glob.glob(path + "/*.jpg"))
rand_indices = np.random.permutation(len(paths)) # Number of images in dataset
train_indices, val_indices, test_indices = rand_indices[:3600], rand_indices[3600:4000], rand_indices[4000:]
train_paths = paths[train_indices]
val_paths = paths[val_indices]
test_paths = paths[test_indices]
NOTE: I am using Google Colab, maybe this might be a potential problem? Also, I am using torch version 1.10.0+cu111.
I did use a sequential model without skip connections for the generator before this, and I did not have this error then.
So apparently, the problem is the inplace skip connection written as h += poolX. Writing this update out of place as h = h + poolX fixed it. h is needed for gradient calculation in some layers, so inplace modification will mess it up.
Maybe try to use the output of the layers directly for the skip connections, like this:
def forward(self, x):
h = x
h = self.conv1(h)
h = self.bnorm1(h)
h1 = self.relu1(h)
h = self.conv2(h1)
h = self.bnorm2(h)
h2 = self.relu2(h)
h = self.conv3(h2)
h = self.bnorm3(h)
h3 = self.relu3(h)
h = self.conv4(h3)
h = self.bnorm4(h)
h4 = self.relu4(h)
h = self.conv5(h4)
h = self.bnorm5(h)
h = self.relu5(h)
h = self.deconv6(h5)
h = self.bnorm6(h)
h = self.relu6(h)
h += h4
h = self.deconv7(h)
h = self.bnorm7(h)
h = self.relu7(h)
h += h3
h = self.deconv8(h)
h = self.bnorm8(h)
h = self.relu8(h)
h += h2
h = self.deconv9(h)
h = self.bnorm9(h)
h = self.relu9(h)
h += h1
h = self.deconv10(h)
h = self.tanh(h)
return h
I am loading an already trained dataset. My dataset is a 3D image and i want to test that model now. So I created a different function but is showing an error at. i tried to save using state_dict() first but it showed an error, so I saved the whole model.
My = MyNet(data)
My = torch.load("model_par.pt")
The error is:
File "main_SUDF.py", line 238, in <module>
My = MyNet(data)
File "main_SUDF.py", line 148, in __init__
self.conv1 = nn.Conv2d(input_dim, nChannel, kernel_size=3, stride=1, padding=1)
File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/conv.py", line 410, in __init__
super(Conv2d, self).__init__(
File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/conv.py", line 50, in __init__
if in_channels % groups != 0:
RuntimeError: Boolean value of Tensor with more than one value is ambiguous
and my model is:
class MyNet(nn.Module):
def __init__(self, input_dim):
super(MyNet, self).__init__()
self.conv1 = nn.Conv2d(input_dim, nChannel, kernel_size=3, stride=1, padding=1)
self.bn1 = nn.BatchNorm2d(nChannel)
self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
self.conv2 = []
self.bn2 = []
self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
for i in range(nConv - 1):
self.conv2.append(nn.Conv2d(nChannel, nChannel, kernel_size=3, stride=1, padding=1))
self.bn2.append(nn.BatchNorm2d(nChannel))
self.conv3 = nn.Conv2d(nChannel, nChannel, kernel_size=1, stride=1, padding=0)
self.bn3 = nn.BatchNorm2d(nChannel)
self.UB1 = nn.UpsamplingBilinear2d(scale_factor=2)
self.deconv = nn.ConvTranspose2d(nChannel, nChannel, kernel_size=3, stride=1, padding=1)
def forward(self, x):
x = self.conv1(x)
x = F.relu(x)
x = self.bn1(x)
x = self.pool1(x)
for i in range(nConv - 1):
x = self.conv2[i](x)
x = F.relu(x)
x = self.bn2[i](x)
if i == 0:
x = self.pool2(x)
x = self.conv3(x)
# x = F.relu(x)
x = self.bn3(x)
x = self.UB1(x)
x = self.deconv(x)
x = F.relu(x)
x = self.bn3(x)
x = self.UB1(x)
x = self.deconv(x)
x = F.relu(x)
x = self.bn3(x)
return x
I'm trying to train a lenet model in pytorch, The ideia is to put images of any size in it, so I started doing with nn.AdaptiveAvgPool2d but the error comes as
mat1 dim 1 must match mat2 dim 0
Here is my code
class LeNet5(nn.Module):
def __init__(self, num_classes=10):
super(LeNet5, self).__init__()
self.conv_1 = nn.Conv2d(
in_channels=1, out_channels=32, kernel_size=5, bias=False
)
self.relu_1 = nn.ReLU(inplace=True)
self.maxpool_1 = nn.MaxPool2d(kernel_size=2, stride=2)
self.conv_2 = nn.Conv2d(
in_channels=32, out_channels=256, kernel_size=5, bias=False
)
self.relu_2 = nn.ReLU(inplace=True)
self.maxpool_2 = nn.MaxPool2d(kernel_size=2, stride=2)
self.avgpool = nn.AdaptiveAvgPool2d(output_size=1)
self.flatten = nn.Flatten()
self.fc_1 = nn.Linear(in_features=4096, out_features=120, bias=False)
self.fc_2 = nn.Linear(in_features=120, out_features=84)
self.fc_3 = nn.Linear(in_features=84, out_features=num_classes)
def forward(self, input):
conv_1_output = self.conv_1(input)
relu_1_output = self.relu_1(conv_1_output)
maxpool_1_output = self.maxpool_1(relu_1_output)
conv_2_output = self.conv_2(maxpool_1_output)
relu_2_output = self.relu_2(conv_2_output)
maxpool_2_output = self.maxpool_2(relu_2_output)
flatten_output = self.flatten((self.avgpool(maxpool_2_output).view(maxpool_2_output.shape[0], -1)))
fc_1_output = self.fc_1(flatten_output)
fc_2_output = self.fc_2(fc_1_output)
fc_3_output = self.fc_3(fc_2_output)
return fc_3_output
if you read the theory on AdaptiveAvgPool2d, this is what it says " we specify the output size And the stride and kernel-size are automatically selected to adapt to the needs"
More info available here
Hence Your spatial dimension is reduced by AdaptiveAvgPool2d and not the depth of feature maps.
So, the spatial dimension will be 1x1 and depth will still be 256 , making your
self.fc_1 = nn.Linear(in_features=256, out_features=120, bias=False) and not self.fc_1 = nn.Linear(in_features=4096, out_features=120, bias=False)
We are trying to write a multi-step decay function in Tensorflow using tf.train.piecewise_constant() as suggested here. Tensorflow documentation here states that:
"When eager execution is enabled, this function returns a function which in turn returns the decayed learning rate Tensor"
However, when we tried running the code, it returned a TypeError.
It returns the same error even when lr() is used.
import tensorflow as tf
tf.enable_eager_execution()
import numpy as np
def conv3x3(out_planes, data_format ='channels_last', stride=1, padding='same', dilation=1, name = None,use_bias = False):
"""3x3 convolution with padding"""
return tf.keras.layers.Conv2D(filters = out_planes, kernel_size = 3,data_format= data_format,
strides=(stride, stride), padding='same', use_bias=use_bias,
dilation_rate = (dilation,dilation) , kernel_initializer=tf.initializers.he_normal(),name = name)
def conv1x1(out_planes,data_format ='channels_last', padding = 'same', stride=1):
"""1x1 convolution"""
return tf.keras.layers.Conv2D(filters = out_planes, kernel_size = 1, strides=(stride, stride),data_format= data_format,
padding=padding, use_bias=False, kernel_initializer=tf.initializers.he_normal())
class BasicBlock(tf.keras.Model):
expansion = 1
def __init__(self, planes=1, stride=1, data_format= 'channels_last', downsample=None, dilation=(1, 1), residual=True, key=None, stage = None):
super(BasicBlock, self).__init__()
self.data_format = data_format
bn_axis = 1 if self.data_format == 'channels_first' else 3
self.conv1 = conv3x3(out_planes= planes, stride = stride, padding='same' ,
data_format = self.data_format, dilation=dilation[0], name = '{}_{}_conv0'.format(key,stage))
self.bn1 = tf.keras.layers.BatchNormalization(axis=bn_axis, name = '{}_{}_BN0'.format(key,stage))
self.conv2 = conv3x3(out_planes =planes, padding='same',
data_format = self.data_format, dilation=dilation[0],name = '{}_{}_conv1'.format(key,stage))
self.bn2 = tf.keras.layers.BatchNormalization(axis=bn_axis,name = '{}_{}_BN1'.format(key,stage))
self.downsample = downsample
self.relu = tf.keras.layers.ReLU(name = '{}_{}_Relu'.format(key,stage))
self.stride = stride
self.residual = residual
def get_config(self):
base_config = {}
base_config['conv1'] = self.conv1.get_config()
base_config['bn1'] = self.bn1.get_config()
base_config['conv2'] = self.conv2.get_config()
base_config['bn2'] = self.bn2.get_config()
if self.downsample is not None:
base_config['downsample'] = self.downsample.get_config()
return base_config
def call(self, inputs, training=None):
residual = inputs
out = self.conv1(inputs)
out = self.bn1(out,training = training)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample is not None:
residual = self.downsample(inputs)
if self.residual:
out += residual
out = self.relu(out)
return out
class Bottleneck(tf.keras.Model):
expansion = 4
def __init__(self, planes, stride=1, data_format = 'channels_last',downsample=None,dilation=(1, 1)):
super(Bottleneck, self).__init__()
bn_axis = 1 if data_format == 'channels_first' else 3
self.conv1 = conv1x1(planes, data_format = data_format)
self.bn1 = tf.keras.layers.BatchNormalization(axis=bn_axis)
self.relu = tf.keras.layers.ReLU()
self.conv2 = conv3x3(planes, stride, padding= 'same', bias=False, data_format = data_format, dilation=dilation[1])
self.bn2 = tf.keras.layers.BatchNormalization(axis=bn_axis)
self.conv3 =conv1x1( planes * 4, data_format = data_format, )
self.bn3 = tf.keras.layers.BatchNormalization(axis=bn_axis) # nn.BatchNorm2d(planes * self.expansion)
self.downsample = downsample
self.stride = stride
def get_config(self):
base_config = {}
base_config['conv1'] = self.conv1.get_config()
base_config['bn1'] = self.bn1.get_config()
base_config['conv2'] = self.conv2.get_config()
base_config['bn2'] = self.bn2.get_config()
base_config['conv3'] = self.conv3.get_config()
base_config['bn3'] = self.bn3.get_config()
if self.downsample is not None:
base_config['downsample'] = self.downsample.get_config()
return base_config
def call(self, inputs, training=None):
identity = inputs
out = self.conv1(inputs)
out = self.bn1(out,training = training)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out,training = training)
out = tf.nn.relu(out)
out = self.conv3(out)
out = self.bn3(out,training = training)
if self.downsample is not None:
identity = self.downsample(inputs)
out += identity
out = self.relu(out)
return out
class pooling (tf.keras.Model):
def __init__(self, pool_size, stride = None, data_format='channels_last'):
super(pooling, self).__init__()
self.pool_size = pool_size
self.data_format = data_format
if stride is None:
self.stride =self.pool_size
else:
self.stride = stride
def call(self, inputs):
return tf.layers.average_pooling2d(inputs, strides =self.stride, pool_size = self.pool_size, data_format = self.data_format)
class DRN(tf.keras.Model):
def __init__(self, block, layers, data_format='channels_last', num_classes=7,channels=(16, 32, 64, 128, 256, 512, 512, 512),
out_map=False, out_middle=False, pool_size=28, arch='D'):
super(DRN, self).__init__()
self.inplanes = channels[0]
self.out_map = out_map
self.out_dim = channels[-1]
self.out_middle = out_middle
self.arch = arch
self.poolsize = pool_size
self.data_format = data_format
self.bn_axis = 1 if data_format == 'channels_first' else 3
self.conv0 = tf.keras.layers.Conv2D(filters=channels[0], kernel_size=7, strides=1, padding='same',
use_bias=False, data_format = self.data_format, kernel_initializer=tf.initializers.he_normal(), name ='L0_conv0' )
self.bn0 = tf.keras.layers.BatchNormalization(axis=self.bn_axis,name ='L0_BN0')
self.relu0 = tf.keras.layers.ReLU(name ='L0_Relu0')
if arch == 'C':
self.layer1 = self._make_layer(block = BasicBlock, planes = channels[0], blocks = layers[0], stride=1, data_format = self.data_format, key='CL1')
self.layer2 = self._make_layer(block = BasicBlock, planes = channels[1], blocks = layers[1], stride=2, data_format = self.data_format, key='CL2')
elif arch == 'D':
self.layer1 = self._make_conv_layers(channels = channels[0],convs = layers[0], stride=1, data_format = self.data_format, key='DL1')
self.layer2 = self._make_conv_layers(channels = channels[1],convs = layers[1], stride=2, data_format = self.data_format, key='DL2')
self.layer3 = self._make_layer(block = block, planes = channels[2], blocks = layers[2], stride=2, data_format = self.data_format, key='L3')
self.layer4 = self._make_layer(block = block, planes = channels[3], blocks = layers[3], stride=2, data_format = self.data_format, key='L4')
self.layer5 = self._make_layer(block = block, planes = channels[4], blocks = layers[4], dilation=2, new_level=False, data_format = self.data_format, key='L5')
self.layer6 = None if layers[5] == 0 else self._make_layer(block, channels[5], layers[5], dilation=4, new_level=False, data_format = self.data_format, key='L6')
if arch == 'C':
self.layer7 = None if layers[6] == 0 else self._make_layer(BasicBlock, channels[6], layers[6], dilation=2, new_level=False, residual=False, data_format = self.data_format, key='CL7')
self.layer8 = None if layers[7] == 0 else self._make_layer(BasicBlock, channels[7], layers[7], dilation=1, new_level=False, residual=False, data_format = self.data_format, key='CL8')
elif arch == 'D':
self.layer7 = None if layers[6] == 0 else self._make_conv_layers(channels[6], layers[6], dilation=2, data_format = self.data_format, key='DL7')
self.layer8 = None if layers[7] == 0 else self._make_conv_layers(channels[7], layers[7], dilation=1, data_format = self.data_format, key='DL8')
if num_classes > 0:
self.avgpool = tf.keras.layers.GlobalAveragePooling2D(data_format = self.data_format)
self.fc = tf.keras.layers.Dense(units=num_classes)
def _make_layer(self, block, planes, blocks, stride=1,dilation=1, new_level=True, data_format = 'channels_last', residual=True, key=None):
assert dilation == 1 or dilation % 2 == 0
downsample = None
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = tf.keras.Sequential([conv1x1(out_planes = planes * block.expansion,stride = stride, data_format = data_format),
tf.keras.layers.BatchNormalization(axis=self.bn_axis)], name = 'downsample')
#
layers = []
layers.append(block(planes= planes, stride = stride, downsample = downsample, dilation=(1, 1) if dilation == 1 else (
dilation // 2 if new_level else dilation, dilation), data_format=data_format, residual=residual, key = key, stage = '0'))
self.inplanes = planes * block.expansion
for i in range(1, blocks):
layers.append(block(planes, residual=residual,dilation=(dilation, dilation), data_format=data_format, key = key, stage = i))
return tf.keras.Sequential(layers, name = key)
def _make_conv_layers(self, channels, convs, stride=1, dilation=1 ,data_format = 'channels_last', key = None):
modules = []
for i in range(convs):
modules.extend([
conv3x3(out_planes= channels, stride=stride if i == 0 else 1,
padding= 'same' , use_bias=False, dilation=dilation, data_format = data_format,name ='{}_{}_Conv'.format(key,i)),
tf.keras.layers.BatchNormalization(axis=self.bn_axis,name ='{}_{}_BN'.format(key,i)),
tf.keras.layers.ReLU(name ='{}_{}_Relu'.format(key,i))])
self.inplanes = channels
return tf.keras.Sequential(modules,name=key)
def call(self, x, training=None):
x = self.conv0(x)
x = self.bn0(x,training = training)
x = self.relu0(x)
x = self.layer1(x,training = training)
x = self.layer2(x,training = training)
x = self.layer3(x,training = training)
x = self.layer4(x,training = training)
x = self.layer5(x,training = training)
if self.layer6 is not None:
x = self.layer6(x,training = training)
if self.layer7 is not None:
x = self.layer7(x)
if self.layer8 is not None:
x = self.layer8(x)
if self.out_map:
x = self.fc(x)
else:
x = self.avgpool(x)
x = self.fc(x)
return x
def loss(logits, labels):
return tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels))
def make_scheduler(policy, init_lr, n_step_epoch, global_step):
total_steps= n_step_epoch * 10 #10 epochs
milestones = policy.split('_')
milestones.pop(0)
milestones = list(map(lambda x: int(x), milestones))
boundaries = np.multiply(milestones,n_step_epoch)
values = [init_lr] + [init_lr/(0.1**-i) for i in range(1,len(milestones)+1)]
learning_rate = tf.train.piecewise_constant(global_step, boundaries, values)
return learning_rate
def train(model, optimizer, step_counter ):
"""Trains model on `dataset` using `optimizer`."""
for (batch, i) in enumerate(range(10)):
print('Training Loop {}'.format(i))
images = tf.random.uniform((4, 224, 224,3))
labels = tf.constant(np.random.randint(4, size=4))
with tf.contrib.summary.record_summaries_every_n_global_steps(10, global_step=step_counter):
with tf.GradientTape() as tape:
logits = model(images, training=True)
loss_value = loss(logits, labels)
grads = tape.gradient(loss_value, model.variables)
optimizer.apply_gradients(zip(grads, model.variables), global_step=step_counter)
def test(model):
"""Perform an evaluation of `model` on the examples from `dataset`."""
for i in (range(10)):
images = tf.random.uniform((4, 225, 225,3))
logits = model(images, training=False)
print(logits)
def main():
model = DRN(BasicBlock, [1, 1, 2, 2, 2, 2, 1, 1], arch='C',num_classes = 4)
device = '/gpu:0'
step_counter = tf.train.get_or_create_global_step()
lr = make_scheduler(policy='multistep_2_5',init_lr=0.1,n_step_epoch = 10,global_step= step_counter)
optimizer = tf.train.MomentumOptimizer(lr,momentum=0.5)
with tf.device(device):
for _ in range(10):
train(model, optimizer,step_counter)
print(optimizer._lr_t)
test(model)
if __name__ == '__main__':
main()
File "", line 1, in
runfile('/home/srijith/work/Tensorflow/SkinCaner_tensorflow/debug/stackoverflow.py', wdir='/home/srijith/work/Tensorflow/SkinCaner_tensorflow/debug')
File "/home/srijith/anaconda3/lib/python3.7/site-packages/spyder_kernels/customize/spydercustomize.py", line 709, in runfile
execfile(filename, namespace)
File "/home/srijith/anaconda3/lib/python3.7/site-packages/spyder_kernels/customize/spydercustomize.py", line 108, in execfile
exec(compile(f.read(), filename, 'exec'), namespace)
File "/home/srijith/work/Tensorflow/SkinCaner_tensorflow/debug/stackoverflow.py", line 311, in
main()
File "/home/srijith/work/Tensorflow/SkinCaner_tensorflow/debug/stackoverflow.py", line 305, in main
train(model, optimizer,step_counter)
File "/home/srijith/work/Tensorflow/SkinCaner_tensorflow/debug/stackoverflow.py", line 284, in train
optimizer.apply_gradients(zip(grads, model.variables), global_step=step_counter)
File "/home/srijith/anaconda3/lib/python3.7/site-packages/tensorflow/python/training/optimizer.py", line 598, in apply_gradients
self._prepare()
File "/home/srijith/anaconda3/lib/python3.7/site-packages/tensorflow/python/training/momentum.py", line 87, in _prepare
learning_rate = learning_rate()
File "/home/srijith/anaconda3/lib/python3.7/site-packages/tensorflow/python/training/learning_rate_decay_v2.py", line 171, in decayed_lr
boundaries = ops.convert_n_to_tensor(boundaries)
File "/home/srijith/anaconda3/lib/python3.7/site-packages/tensorflow/python/framework/ops.py", line 1273, in convert_n_to_tensor
as_ref=False)
File "/home/srijith/anaconda3/lib/python3.7/site-packages/tensorflow/python/framework/ops.py", line 1228, in internal_convert_n_to_tensor
raise TypeError("values must be a list.")
TypeError: values must be a list.
The code works as expected when we provide a constant learning rate. Is there something that we are missing?