No performance improvement using quantization model in pytorch

No performance improvement using quantization model in pytorch - python

I have trained a model in pytorch with float data type. I want to improve my inference time by converting this model to quantized model. I have used torch.quantization.convert api to convert my model's weight to uint8 data type. However, when I use this model for inference, I do not get any performance improvement. Am I doing something wrong here ?
The Unet Model code:
def gen_initialization(m):
if type(m) == nn.Conv2d:
sh = m.weight.shape
nn.init.normal_(m.weight, std=math.sqrt(2.0 / (sh[0]*sh[2]*sh[3])))
nn.init.constant_(m.bias, 0)
elif type(m) == nn.BatchNorm2d:
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
class TripleConv(nn.Module):
def __init__(self, in_ch, out_ch):
super(TripleConv, self).__init__()
mid_ch = (in_ch + out_ch) // 2
self.conv = nn.Sequential(
nn.Conv2d(in_ch, mid_ch, kernel_size=3, stride=1, padding=1, bias=True),
nn.BatchNorm2d(num_features=mid_ch),
nn.LeakyReLU(negative_slope=0.1),
nn.Conv2d(mid_ch, mid_ch, kernel_size=3, stride=1, padding=1, bias=True),
nn.BatchNorm2d(num_features=mid_ch),
nn.LeakyReLU(negative_slope=0.1),
nn.Conv2d(mid_ch, out_ch, kernel_size=3, stride=1, padding=1, bias=True),
nn.BatchNorm2d(num_features=out_ch),
nn.LeakyReLU(negative_slope=0.1)
)
self.conv.apply(gen_initialization)
def forward(self, x):
return self.conv(x)
class Down(nn.Module):
def __init__(self, in_ch, out_ch):
super(Down, self).__init__()
self.triple_conv = TripleConv(in_ch, out_ch)
self.avg_pool_conv = nn.AvgPool2d(2, 2)
self.in_ch = in_ch
self.out_ch = out_ch
def forward(self, x):
self.cache = self.triple_conv(x)
pad = torch.zeros(x.shape[0], self.out_ch - self.in_ch, x.shape[2], x.shape[3], device=x.device)
x = torch.cat((x, pad), dim=1)
self.cache += x
return self.avg_pool_conv(self.cache)
class Center(nn.Module):
def __init__(self, in_ch, out_ch):
super(Center, self).__init__()
self.conv = nn.Sequential(
nn.Conv2d(in_ch, out_ch, kernel_size=3, stride=1, padding=1, bias=True),
nn.BatchNorm2d(num_features=out_ch),
nn.LeakyReLU(negative_slope=0.1, inplace=True)
)
self.conv.apply(gen_initialization)
def forward(self, x):
return self.conv(x)
class Up(nn.Module):
def __init__(self, in_ch, out_ch):
super(Up, self).__init__()
self.upsample = nn.Upsample(scale_factor=2, mode='bilinear',
align_corners=True)
self.triple_conv = TripleConv(in_ch, out_ch)
def forward(self, x, cache):
x = self.upsample(x)
x = torch.cat((x, cache), dim=1)
x = self.triple_conv(x)
return x
class UNet(nn.Module):
def __init__(self, in_ch, first_ch=None):
super(UNet, self).__init__()
if not first_ch:
first_ch = 32
self.down1 = Down(in_ch, first_ch)
self.down2 = Down(first_ch, first_ch*2)
self.down3 = Down(first_ch*2, first_ch*4)
self.down4 = Down(first_ch*4, first_ch*8)
self.center = Center(first_ch*8, first_ch*8)
self.up4 = Up(first_ch*8*2, first_ch*4)
self.up3 = Up(first_ch*4*2, first_ch*2)
self.up2 = Up(first_ch*2*2, first_ch)
self.up1 = Up(first_ch*2, first_ch)
self.output = nn.Conv2d(first_ch, in_ch, kernel_size=3, stride=1,
padding=1, bias=True)
self.output.apply(gen_initialization)
def forward(self, x):
x = self.down1(x)
x = self.down2(x)
x = self.down3(x)
x = self.down4(x)
x = self.center(x)
x = self.up4(x, self.down4.cache)
x = self.up3(x, self.down3.cache)
x = self.up2(x, self.down2.cache)
x = self.up1(x, self.down1.cache)
return self.output(x)
The inference code:
from tqdm import tqdm
import os
import numpy as np
import torch
import gan_network
import torch.nn.parallel
from torch.utils.data import DataLoader
import torch.utils.data as data
import random
import glob
import scipy.io
import time
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID" # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="0"
class DataFolder(data.Dataset):
def __init__(self, file):
super(DataFolder, self).__init__()
self.image_names = []
fid = file
for line in fid:
# line = line[:-1]
if line == '':
continue
# print(line)
self.image_names.append(line)
random.shuffle(self.image_names)
self.image_names = self.image_names[0:]
def __len__(self):
return len(self.image_names)
def __getitem__(self, index):
path = self.image_names[index]
img = np.load(path)
img = np.rollaxis(img, 2, 0)
img = torch.from_numpy(img[:, :, :])
return img, path
if __name__ == '__main__':
batch_size = 1
image_size = 2048
channels = 6
model_path = 'D:/WorkProjects/Network_Training_Aqusens/FullFovReconst/network/network_epoch9.pth'
test_data = glob.glob('D:/save/temp/*.npy')
dest_dir = 'D:/save/temp/results/'
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
net = gan_network.UNet(6, 32)
if torch.cuda.device_count() > 1:
net = torch.nn.DataParallel(net)
net.to(device)
net.load_state_dict(torch.load(model_path))
quantized_model = torch.quantization.convert(net, {torch.nn.Conv2d, torch.nn.BatchNorm2d}, inplace=False)
dataset = DataFolder(file=test_data)
print(f'{len(dataset)}')
data_loader = DataLoader(dataset=dataset, num_workers=4,
batch_size=batch_size, shuffle=False,
drop_last=False, pin_memory=True)
input = torch.Tensor(batch_size, channels, image_size, image_size).to(device)
t0 = time.time()
with torch.no_grad():
for i, batch in enumerate(tqdm(data_loader)):
input.copy_(batch[0])
output = net(input).cpu().clone().numpy()
np.array(output)
output = np.rollaxis(output, 1, 4)
for num in range(batch_size):
arr = output[num, :, :, :]
file_name = os.path.basename(batch[1][num])
save_name = os.path.join(dest_dir, file_name)
save_name = save_name.replace(".npy", "")
scipy.io.savemat(save_name+'.mat', {'output': arr})
t1 = time.time()
print(f'Elapsed time = {t1-t0}')
For models net and quantized model, i get the elapsed time around 30 seconds for 12 images passed through them.

PyTorch documentation suggests three ways to perform quantization. You are doing post-training dynamic quantization (the simplest quantization method available) which only supports torch.nn.Linear and torch.nn.LSTM layers as listed here. To quantize CNN layers, you would want to check out the other two techniques (these are the ones that support CNN layers): post-training static quantization and quantization aware training. This tutorial shows both these techniques applied on CNNs.

Have tried out static quantization approach on Yolov5, it cuts 73% of size and decreases inference time by ~ 13-15%. It seems to me, that conv layers gains less than linear in latency, because same static approach on toy MNIST fully-dense net decreased inference time by 4 times.

Related

Fixing "OperatorNotAllowedInGraphError" in tensorflow 2.9.1 with custom model

I'm trying to build a custom Transformer model within TensorFlow but I'm stuck. I always receive this error:
OperatorNotAllowedInGraphError: Iterating over a symbolic `tf.Tensor` is not allowed: AutoGraph did convert this function. This might indicate you are trying to use an unsupported feature.
Of course I did some research and I found many solutions, but I'm failing to implement these to my own model since I'm not even sure where exactly this error occurs. I tried to remove all for loops, but the error still remained. I hope someone can help me and point me to the right direction.
I'm using:
python: 3.9
tensorflow: 2.9.1
keras: 2.9.0
import tensorflow as tf
import keras.backend as K # for custom loss function
import tensorflow_probability as tfp
from tensorflow.python.keras.layers import Dense, Dropout, GlobalAveragePooling1D, MultiHeadAttention, Input, Conv1D
from keras.layers import LayerNormalization
import numpy as np
class MultiAttention(tf.keras.Model):
def __init__(self, head_size, num_heads, dropout_attention, dropout1, dropout2, ff_dim):
super(MultiAttention, self).__init__()
# define all layers in init
self.LN1 = LayerNormalization(epsilon=1e-6)
self.MHA = MultiHeadAttention(key_dim=head_size, num_heads=num_heads, dropout=dropout_attention)
self.Dropout1 = Dropout(dropout1)
self.LN2 = LayerNormalization(epsilon=1e-6)
self.CONV1 = Conv1D(filters=ff_dim, kernel_size=1, activation="relu")
self.Dropout2 = Dropout(dropout2)
self.CONV2 = None
def build(self, input_shape):
self.CONV2 = Conv1D(filters=input_shape.shape[-1], kernel_size=1)
def call(self, inputs, training=False):
# Normalization and Attention
x = self.LN1(epsilon=1e-6)(inputs)
x = self.MHA(x, x)
x = self.Dropout1(x)
x_att = x + inputs
# Feed Forward Part
x = self.LN2(x_att)
x = self.Conv1(x)
x = self.Dropout2(x)
x = self.Conv1(x)
return x + x_att
class Transformer(tf.keras.Model): # tf.keras.Model inherits model.fit/predict/evaluate. tf.keras.Layer can't do this
def __init__(self, head_size, num_heads, dropout_attention, dropout1, dropout2, ff_dim,
num_transformer_blocks, mlp_units, mlp_dropout):
super(Transformer, self).__init__()
# define all layers in init
self.head_size = head_size
self.num_heads = num_heads
self.dropout_attention = dropout_attention
self.dropout1 = dropout1
self.dropout2 = dropout2
self.ff_dim = ff_dim
self.num_transformer_blocks = num_transformer_blocks
self.mlp_units = mlp_units
self.GAP = GlobalAveragePooling1D(data_format="channels_first")
self.Dense1 = Dense(mlp_units, activation="relu")
self.Dropout = Dropout(mlp_dropout)
self.Dense2 = Dense(1, activation="softmax")
self.MA = None
def build(self, input_shape):
self.MA = MultiAttention(self.head_size, self.num_heads, self.dropout_attention,
self.dropout1, self.dropout2, self.ff_dim)
# for dim in range(self.mlp_units):
# self.Dense1_list.append(Dense(dim, actiavtion="relu"))
def call(self, input_shape, training=False):
x = Input(input_shape)
for _ in range(self.num_transformer_blocks):
x = self.MA(x)
x = self.GAP(x)
for unit in self.Dense1_list:
x = unit(x)
x = self.Dropout(x)
x = self.Dense1(x)
x = self.Dropout(x)
return self.Dense2(x)
class CustomLoss(tf.keras.losses.Loss):
def __init__(self):
super().__init__()
def call(self, y_true, y_pred):
alpha = K.std(y_pred) / K.std(y_true)
beta = K.sum(y_pred) / K.sum(y_true) # no need to calc mean
r = tfp.stats.correlation(y_true, y_pred, sample_axis=None, event_axis=None)
return K.sqrt(K.square(1 - r) + K.square(1 - alpha) + K.square(1 - beta))
def create_model(head_size, num_heads, dropout_attention, dropout1, dropout2, ff_dim,
num_transformer_blocks, mlp_units, mlp_dropout, lr):
model = Transformer(head_size, num_heads, dropout_attention, dropout1, dropout2, ff_dim,
num_transformer_blocks, mlp_units, mlp_dropout)
model.compile(
loss=CustomLoss(),
optimizer=tf.keras.optimizers.Adam(learning_rate=lr),
)
return model
model = create_model(256,4, 0.10, 0.12, 0.15, 2, 4, 3, 0.10, 0.001)
model.fit(np.zeros((2, 257, 11)),
np.zeros((2, 1)),
epochs=1,
batch_size=512,
)

GAN LOSS of Generator and Discriminator Lowest at First Epoch - Is that normal?

I am trying to train a simple GAN and I noticed that the loss for the generator and discriminator is the lowest in the first epoch? How can that be? Did I miss something?
Below you find the plot of the Loss over the iterations:
Here is the code I was using:
I adapted the code according to your suggest #emrejik. It doesn't seem to have changed much though. I couldn't work with torch.ones() at the suggested lines as I was receiving this error message: "argument size (position 1) must be tuple of ints, not Tensor". Any idea how come?
I ran through 10 epochs and this came out now:
from glob import glob
import sys
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import torchvision.utils as vutils
import torchvision
from torch.utils.tensorboard import SummaryWriter
from torchinfo import summary
from mpl_toolkits.axes_grid1 import ImageGrid
from skimage import io
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import numpy as np
from tqdm import trange
manual_seed = 999
path = 'Punks'
image_paths = glob(path + '/*.png')
img_size = 28
batch_size = 32
device = 'cuda' if torch.cuda.is_available() else 'cpu'
transform = transforms.Compose(
[
transforms.ToPILImage(),
transforms.Resize(img_size),
transforms.CenterCrop(img_size),
transforms.ToTensor(),
transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]),
]
)
class ImageDataset(Dataset):
def __init__(self, paths, transform):
self.paths = paths
self.transform = transform
def __len__(self):
return len(self.paths)
def __getitem__(self, index):
image_path = self.paths[index]
image = io.imread(image_path)
if self.transform:
image_tensor = self.transform(image)
return image_tensor
if __name__ == '__main__':
dataset = ImageDataset(image_paths, transform)
train_loader = DataLoader(
dataset, batch_size=batch_size, num_workers=2, shuffle=True)
class Discriminator(nn.Module):
def __init__(self):
super().__init__()
self.model = nn.Sequential(
nn.Linear(784*3, 2048),
nn.ReLU(),
nn.Dropout(0.3),
nn.Linear(2048, 1024),
nn.ReLU(),
nn.Dropout(0.3),
nn.Linear(1024, 512),
nn.ReLU(),
nn.Dropout(0.3),
nn.Linear(512, 256),
nn.ReLU(),
nn.Dropout(0.3),
nn.Linear(256, 1),
nn.Sigmoid(),
)
def forward(self, x):
x = x.view(x.size(0), 784*3)
output = self.model(x)
return output
discriminator = Discriminator().to(device=device)
class Generator(nn.Module):
def __init__(self):
super().__init__()
self.model = nn.Sequential(
nn.Linear(100, 256),
nn.ReLU(),
nn.Linear(256, 512),
nn.ReLU(),
nn.Linear(512, 1024),
nn.ReLU(),
nn.Linear(1024, 2048),
nn.ReLU(),
nn.Linear(2048, 784*3),
nn.Tanh(),
)
def forward(self, x):
output = self.model(x)
output = output.view(x.size(0), 3, 28, 28)
return output
generator = Generator().to(device=device)
lr = 0.0001
num_epochs = 10
loss_function = nn.BCELoss()
#noise = torch.randn(batch_size, 100, device=device)
optimizer_discriminator = torch.optim.Adam(
discriminator.parameters(), lr=lr)
optimizer_generator = torch.optim.Adam(generator.parameters(), lr=lr)
model = Discriminator().to(device=device)
summary(model, input_size=(batch_size, 3, 28, 28))
model = Generator().to(device=device)
summary(model, input_size=(batch_size, 100))
image_list = []
Dis_losses = []
Gen_losses = []
iters = 0
epochs = 0
for epoch in trange((num_epochs), bar_format='{desc:<5.5}{percentage:3.0f}%|{bar:120}{r_bar}\n'):
for n, real_samples in enumerate(train_loader):
batch_size = len(real_samples)
real_samples = real_samples.to(device=device)
real_samples_labels = torch.ones((batch_size, 1)).to(device=device)
latent_space_samples = torch.randn(
(batch_size, 100)).to(device=device)
fake_samples = generator(latent_space_samples)
fake_samples_labels = torch.zeros(
(batch_size, 1)).to(device=device)
discriminator.zero_grad()
output_discriminator_real = discriminator(real_samples)
loss_discriminator_real = loss_function(
output_discriminator_real, real_samples_labels)
output_discriminator_fake = discriminator(fake_samples)
loss_discriminator_fake = loss_function(
output_discriminator_fake, fake_samples_labels)
loss_discriminator = (
loss_discriminator_real + loss_discriminator_fake)/2
loss_discriminator.backward()
optimizer_discriminator.step()
latent_space_samples = torch.randn(
(batch_size, 100)).to(device=device)
generator.zero_grad()
fake_samples = generator(latent_space_samples)
output_discriminator_fake = discriminator(fake_samples)
loss_generator = loss_function(
output_discriminator_fake, real_samples_labels)
loss_generator.backward()
optimizer_generator.step()
image_list.append(vutils.make_grid(
fake_samples_labels, padding=2, normalize=True))
Dis_losses.append(loss_discriminator.item())
Gen_losses.append(loss_generator.item())
iters += 1
epochs += 1
if n == batch_size - 1:
print(f"Epoch: {epoch} Loss D.: {loss_discriminator}")
print(f"Epoch: {epoch} Loss G.: {loss_generator}")
latent_space_samples = torch.randn((batch_size, 100)).to(device=device)
plt.figure(figsize=(10, 5))
plt.title("Generator and Discriminator Loss During Training")
plt.plot(Dis_losses, label="D")
plt.plot(Gen_losses, label="G")
plt.xlabel("iterations")
plt.ylabel("Loss")
plt.legend()
plt.show()

I didn't see the proper use of loss function for the discriminator. You should give real samples and generated samples separately to the discriminator. I think you should change your code to a form like this:
fake = generator(noise)
disc_real = disc(real)
loss_disc_real = loss_func(disc_real, torch.ones_like(disc_real))
disc_fake = disc(fake)
loss_disc_fake = loss_func(disc_fake,torch.zeros_like(disc_fake))
loss_disc = (loss_disc_real+loss_disc_fake)/2
....
loss_generator = loss_func(disc_fake,torch.ones_like(disc_fake))
...
plot loss_disc and loss_generator, this should be work

I am so sure that I have already put all data and model, but I still Get this error

this is the error stack.
Traceback (most recent call last):
File "my_train.py", line 55, in <module>
test()
File "my_train.py", line 51, in test
train.train()
File "my_train.py", line 37, in train
outputs = self.model(inputs)
File "/home/rs/andy/python-venv/torch/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1110, in _call_impl
return forward_call(*input, **kwargs)
File "/home/rs/andy/andy/MyFCN/model/FCN/FCN32.py", line 12, in forward
y = self.up_sampling(feature, None, 512)
File "/home/rs/andy/andy/MyFCN/model/FCN/FCN.py", line 47, in up_sampling
y = self.bn(batch_norm)(y)
File "/home/rs/andy/python-venv/torch/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1110, in _call_impl
return forward_call(*input, **kwargs)
File "/home/rs/andy/python-venv/torch/lib/python3.8/site-packages/torch/nn/modules/batchnorm.py", line 168, in forward
return F.batch_norm(
File "/home/rs/andy/python-venv/torch/lib/python3.8/site-packages/torch/nn/functional.py", line 2421, in batch_norm
return torch.batch_norm(
RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu! (when checking argument for argument weight in method wrapper__cudnn_batch_norm)
This is all my code:
I think the error line will happen in this file as I put data to GPU in this section.
train:
import torch.cuda
from torch import nn, optim
from torch.utils.data import DataLoader
import dataset
from model.FCN import FCN32
class Train:
def __init__(self, dataset_path, model, batch_size, shuffle):
self.dataset = dataset.ObtTrainDataset(dataset_path)
self.batch_size = batch_size
self.shuffle = shuffle
self.device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"using {self.device}")
self.model = model
def train(self):
self.model = self.model.to(self.device)
epoch = 10
criterion = nn.CrossEntropyLoss().to(self.device)
optimizer = optim.Adam(self.model.parameters(), lr=0.0001)
dl = DataLoader(dataset=self.dataset, batch_size=self.batch_size, shuffle=False)
for i in range(epoch):
print("------------{} begin--------------".format(i))
self.model.train()
running_loss = 0.0
j = 0
for data in dl:
j += 1
inputs, target = data
inputs = inputs.to(self.device)
target = target.to(self.device)
target = torch.squeeze(target, 1).long().to(self.device)
optimizer.zero_grad()
outputs = self.model(inputs)
loss = criterion(outputs, target)
loss.backward()
optimizer.step()
running_loss += loss.cpu().item()
print(running_loss)
torch.save(self.model.state_dict(), f"models/obt_10_{i}.pth")
def test():
dataset = "data/obt/image"
model = FCN32(256, 5)
train = Train(dataset, model, 8, True)
train.train()
if __name__ == '__main__':
test()
model code:
This is the implementation of FCN32s, I think there is nothing that goes run. But the error stack indicts that the FCN.py file line 47 wrong.
FCN:
from torch import nn
from model.FCN.vgg import VGG16
class FCN(nn.Module):
def __init__(self, input_size, num_classes, backbone="vgg16"):
super().__init__()
all_backones = ["vgg16"]
if backbone not in all_backones:
raise ValueError(f"backbone must be ont of the item in {all_backones}")
if backbone == "vgg16":
self.features = VGG16(input_size)
self.num_classes = num_classes
self.deconv1 = nn.ConvTranspose2d(512, 512, 3, 2, padding=1, output_padding=1)
self.deconv2 = nn.ConvTranspose2d(512, 256, 3, 2, padding=1, output_padding=1)
self.deconv3 = nn.ConvTranspose2d(256, 128, 3, 2, padding=1, output_padding=1)
self.deconv4 = nn.ConvTranspose2d(128, 64, 3, 2, padding=1, output_padding=1)
self.deconv5 = nn.ConvTranspose2d(64, 32, 3, 2, padding=1, output_padding=1)
self.classifier = nn.Conv2d(32, num_classes, kernel_size=1, padding="same")
self.bn = nn.BatchNorm2d
self.relu = nn.ReLU()
def forward(self, x):
raise NotImplementedError("please implement it")
def up_sampling(self, x1, x2=None, batch_norm=None):
deconv = None
assert batch_norm is not None
if batch_norm == 512:
deconv = self.deconv1
elif batch_norm == 256:
deconv = self.deconv2
elif batch_norm == 128:
deconv = self.deconv3
elif batch_norm == 64:
deconv = self.deconv4
elif batch_norm == 32:
deconv = self.deconv5
y = deconv(x1)
y = self.relu(y)
if x2 is None:
y = self.bn(batch_norm)(y)
else:
y = self.bn(batch_norm)(y + x2)
return y
if __name__ == '__main__':
pass
FCN32s
import torch
from torch import nn
from model.FCN import FCN
class FCN32(FCN):
def forward(self, x):
feature = self.features(x)["pool32"]
y = self.up_sampling(feature, None, 512)
y = self.up_sampling(y, None, 256)
y = self.up_sampling(y, None, 128)
y = self.up_sampling(y, None, 64)
y = self.up_sampling(y, None, 32)
y = self.classifier(y)
return y
dataset:
import os
from glob import glob
import numpy as np
import torch
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms as T
class ObtTrainDataset(Dataset):
def __init__(self, image_path=r"data/obt/image", mode="train"):
assert mode in ("train", "val", "test")
self.image_path = image_path
self.image_list = glob(os.path.join(self.image_path, "*.npy"))
self.mode = mode
if mode in ("train", "val"):
self.mask_path = self.image_path
self.transform_x = T.Compose(
[T.ToTensor(), T.Resize((256, 256))])
self.transform_mask = T.Compose([T.ToTensor(), T.Resize((256, 256))])
def __getitem__(self, index):
if self.mode in ("train", "val"):
image_name = os.path.basename(self.image_list[index])
X = np.load(os.path.join(self.image_path, image_name))
masks = np.load(os.path.join(self.image_path + "Masks", image_name))
X = X / 1.0
masks = masks / 1.0
X = self.transform_x(X)
masks = self.transform_mask(masks)
X = X.type(torch.FloatTensor)
masks = masks.type(torch.FloatTensor)
return X, masks
else:
X = Image.open(self.image_list[index])
X = self.transform_x(X)
path = self.image_list[index]
return X, path
def __len__(self):
return len(self.image_list)
I have already debuged it for 3 hours help!!!

Your use of BatchNorm2d is very odd. In your base class, FCN, you do not define a batch norm layer, but rather only a "place holder": self.bn is not a layer, but rather a function that returns a layer, given the parameter num_features. Then, at each forward pass, you create, from scratch a new BatchNorm2d layer, and forward your features through this layer.
Needless to say, that the new layer that is created during the forward pass is not moved to the GPU: the .to(self.device) is called before the forward pass and before the batch norm layer was created.
But this is not the worst thing here: the fact that you create a new batch norm layer for each forward pass means that the layer cannot learn the statistics of your data and thus is unable to function properly.

Unable to call model.build() . "ValueError: You cannot build your model by calling `build` if your layers do not support float type inputs."

I've been trying to implement the " An image is worth 16x16 words: Transformers for Image Recognition at Scale " paper by Alexey Dosovitskiy et al using TensorFlow. While implementing the model I'm running into the following error
ValueError: You cannot build your model by calling `build` if your layers do not support float type inputs. Instead, in order to instantiate and build your model, `call` your model on real tensor data (of the correct dtype).
My code is split into two files called model.py and train.py
The train.py file is as below
import tensorflow_addons as tfa
from tensorflow import keras
from model import VisionTransformer
num_classes = 10
inputshape = (32, 32, 3)
(x_train, y_train), (x_test, y_test) = keras.datasets.cifar10.load_data()
learning_rate = 0.001
weight_decay = 0.0001
batch_size = 256
num_epochs = 1
image_size = 72
patch_size = 6
num_patches = (image_size // patch_size) ** 2
projection_dim = 64
num_heads = 4
transformer_units = [
projection_dim * 2,
projection_dim,
]
transformer_layers = 8
mlp_head_units = [2048, 1024]
model = VisionTransformer(
inputshape,
patch_size,
num_patches,
projection_dim,
transformer_layers,
num_heads,
transformer_units,
mlp_head_units,
num_classes
)
optimizer = tfa.optimizers.AdamW(
learning_rate=learning_rate, weight_decay=weight_decay
)
model.compile(optimizer=optimizer,
loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=[
keras.metrics.SparseCategoricalAccuracy(name="accuracy"),
keras.metrics.SparseTopKCategoricalAccuracy(5,
name="top-5"
"-accuracy"),
],
)
model.build(inputshape)
model.summary()
The model.py file is as below
import keras
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
class Patches(layers.Layer):
def __init__(self, patch_size):
super(Patches, self).__init__()
self.patch_size = patch_size
def call(self, images, **kwargs):
batch_size = tf.shape(images)[0]
patches = tf.image.extract_patches(
images=images,
sizes=[1, self.patch_size, self.patch_size, 1],
strides=[1, self.patch_size, self.patch_size, 1],
rates=[1, 1, 1, 1],
padding="VALID",
)
patch_dims = patches.shape[-1]
patches = tf.reshape(patches, [batch_size, -1, patch_dims])
return patches
class PatchEncoder(layers.Layer):
def __init__(self, num_patches, projection_dim):
super(PatchEncoder, self).__init__()
self.num_patches = num_patches
self.projection = layers.Dense(units=projection_dim)
self.position_embedding = layers.Embedding(
input_dim=num_patches, output_dim=projection_dim
)
def call(self, patch, **kwargs):
positions = tf.range(start=0, limit=self.num_patches, delta=1)
encoded = self.projection(patch) + self.position_embedding(positions)
return encoded
def mlp(x, hidden_units, dropout_rate):
for units in hidden_units:
x = layers.Dense(units, activation=tf.nn.gelu)(x)
x = layers.Dropout(dropout_rate)(x)
return x
class VisionTransformer(tf.keras.Model):
def __init__(self, inputshape, patch_size, num_patches, projection_dim,
transformer_layers, num_heads, transformer_units,
mlp_head_units, num_classes):
super(VisionTransformer, self).__init__()
self.inputshape = inputshape
self.patch_size = patch_size
self.num_patches = num_patches
self.projection_dim = projection_dim
self.transformer_layers = transformer_layers
self.num_heads = num_heads
self.transformer_units = transformer_units
self.mlp_head_units = mlp_head_units
self.num_classes = num_classes
def call(self, input, training):
inputs = layers.Input(shape=self.inputshape)
patches = Patches(self.patch_size)(inputs)
encoded_patches = PatchEncoder(self.num_patches, self.projection_dim)
(patches)
for _ in range(self.transformer_layers):
x1 = layers.LayerNormalization(epsilon=1e-6)(encoded_patches)
attention_output = layers.MultiHeadAttention(
num_heads=self.num_heads, key_dim=self.projection_dim,
dropout=0.1
)(x1, x1)
x2 = layers.Add()([attention_output, encoded_patches])
x3 = layers.LayerNormalization(epsilon=1e-6)(x2)
x3 = mlp(x3, hidden_units=self.transformer_units, dropout_rate=0.1)
encoded_patches = layers.Add()([x3, x2])
representation = layers.LayerNormalization(epsilon=1e-6)(
encoded_patches)
representation = layers.Flatten()(representation)
representation = layers.Dropout(0.5)(representation)
features = mlp(representation, hidden_units=self.mlp_head_units,
dropout_rate=0.5)
logits = layers.Dense(self.num_classes)(features)
model = keras.Model(inputs=inputs, outputs=logits)
return model

today I meet the problem too.I solved the problem by deleting the Input layer.
I think the reason is that the model will build Input_layer when build() is called, so it's no need to build a new Input_layer. Otherwise, get error!
def call(self, input, training):
# please annotation the Input layer and try again
# inputs = layers.Input(shape=self.inputshape)
patches = Patches(self.patch_size)(inputs)
encoded_patches = PatchEncoder(self.num_patches, self.projection_dim)
(patches)

PyTorch | getting "RuntimeError: Found dtype Long but expected Float"

I'm trying to train a CNN on a custom dataset. Code:
Dataset.py
class MyDataset(Dataset):
def __init__(self, csv_file, root_dir):
self.annotations = pd.read_csv(csv_file)
self.root_dir = root_dir
self.transform = transform
def __len__(self):
return len(self.annotations)
def __getitem__(self, index):
img_path = os.path.join(self.root_dir, self.annotations.iloc[index, 0])
y_label = torch.tensor(int(self.annotations.iloc[index, 1]))
img = cv2.imread(img_path)
# resize
res = cv2.resize(img, dsize=(50, 50), interpolation=cv2.INTER_CUBIC)
# convert image to tensor
res = torch.from_numpy(res)
return (res, y_label)
Model.py
class ConvNet(torch.nn.Module):
def __init__(self):
super(ConvNet, self).__init__()
f2 = 4
self.layer2 = nn.Sequential(
nn.Conv2d(50, f2, kernel_size=5, padding=2),
nn.ReLU(),
nn.BatchNorm2d(f2),
nn.MaxPool2d(kernel_size=2, stride=2))
self.fc1 = nn.Linear(100, 200)
self.fc2 = nn.Linear(200, 20)
self.fc3 = nn.Linear(20, 1)
def forward(self, x):
x = self.layer2(x.float())
x = x.reshape(x.size(0), -1)
x = self.fc1(x)
x = self.fc2(x)
x = self.fc3(x)
return x
and here is my training code:
dataset = MyDataset(
csv_file='dataset.csv',
root_dir='tmp')
train_set, test_set = torch.utils.data.random_split(dataset, lengths=[500, 70])
train_loader = DataLoader(dataset=train_set, batch_size=16, shuffle=True)
test_loader = DataLoader(dataset=test_set, batch_size=16, shuffle=True)
model = ConvNet()
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
for epoch in range(20):
losses = []
for batch_idx, (data, targets) in enumerate(train_loader):
data = data.to(device=device)
targets = targets.to(device=device)
# forward
scores = model(data)
loss = criterion(scores, targets)
losses.append(loss.item())
# backward
optimizer.zero_grad()
loss.backward()
optimizer.step()
print('Cost: {0} = {1}'.format(epoch, sum(losses)/len(losses)))
But I get RuntimeError: Found dtype Long but expected Float. This probably comes from the fact that I do x = self.layer2(x.float()) to avoid overflow.
I would like to know how to fix that error. It's difficult to pin-point where the exact problem comes from.
How can I solve this?

The problem might be caused by data tensor. When data loader calls images via getitem() method, image is read with opencv and transformed to tensor. I think at that point, type of your data tensor is long but it should be float. If you cast your numpy array -named res- to float it should work fine. You can see my solution below.
res = cv2.resize(img, dsize=(50, 50), interpolation=cv2.INTER_CUBIC)
res = res.astype(np.float32) # you should add this line
res = torch.from_numpy(res)
res = res.permute(2, 0, 1)

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

No performance improvement using quantization model in pytorch - python

Have tried out static quantization approach on Yolov5, it cuts 73% of size and decreases inference time by ~ 13-15%. It seems to me, that conv layers gains less than linear in latency, because same static approach on toy MNIST fully-dense net decreased inference time by 4 times.

Related

Fixing "OperatorNotAllowedInGraphError" in tensorflow 2.9.1 with custom model

GAN LOSS of Generator and Discriminator Lowest at First Epoch - Is that normal?

I am so sure that I have already put all data and model, but I still Get this error

Unable to call model.build() . "ValueError: You cannot build your model by calling `build` if your layers do not support float type inputs."

PyTorch | getting "RuntimeError: Found dtype Long but expected Float"

Categories

Resources