Variational Auto Encoder weird mosaic reconstructed result

Variational Auto Encoder weird mosaic reconstructed result - python

I used VAE and want to train a VAE network but somehow after 150 epochs the output is very weird, the loss value is converged to ~0.07. In my Dataloader the image was transferred from int 16 to Gray image and there is my code, and the input image is [batch_size, 5, 512, 512]( 5 gray image), I have no idea why the output looks like that.
class MyDataset(Dataset):
def __init__(self, transform):
data_path="/content/drive/MyDrive/sub_sample/imgs_train_data_0_5_5bands.npy"
# train_ = CustomImageDataset(data_path, transform=None)
imgs_test = np.load(data_path)
# x = torch.zeros(3, 244, 395, dtype = torch.uint8)
print(imgs_test.dtype)
img = imgs_test.astype('float32')
imgs_test = (img - np.min(img, axis = (0,1))) / np.ptp(img, axis=(0,1))
print(imgs_test.dtype)
print(imgs_test.max(),imgs_test.min())
self.training = imgs_test
# print(max(imgs_test[0]),min(imgs_test[0]))
self.img = torch.from_numpy(self.training)
# self.img = F.interpolate(self.img, size = (256,256))
print(imgs_test.shape)
# self.transform = transform
# print('****',self.subset.shape[0])
def __getitem__(self, index):
# img_resize = np.random.random((13,256,256))
img_resize = self.img[index, :, :, :] # 读取每一个npy的数据
# print((img_resize[0,:,:].max()))
return img_resize
def __len__(self):
# print('the shape of the total dataset',len(self.training))
return len(self.training)
The reconstructed result:
BTW, the loss function is F.mse_loss(input,reconstruncted image)
I would be very appreciate if anyone can give me some advice.

Related

Unable to solve "TypeError: Invalid shape (512, 256, 2) for image data" for VGG19

I'm unable to resolve the error for the forllowing code, I have a VGG19 encoder and a GetFusedImage class using pytorch.
CODE:
import argparse
import cv2
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision.models.vgg import vgg19
import matplotlib.pyplot as plt
import pywt
import pywt.data
# Defining a custom pre-trained model (VGG19)
class VGG19(torch.nn.Module):
def __init__(self, device='cpu'):
super(VGG19, self).__init__()
modelFeatures = list(vgg19(pretrained=True).features) # Extracting the feature of the model
if device == "cuda":
self.features = nn.ModuleList(modelFeatures).cuda().eval()
else:
self.features = nn.ModuleList(modelFeatures).eval()
def forward(self, image):
featureMaps = [] # List used to store the output of the each layer of the VGG19 model
for index, layer in enumerate(self.features):
image = layer(image)
if index == 3:
featureMaps.append(image)
return featureMaps
class GetFusedImage:
def __init__(self, CTscanImage, MRIscanImage):
"""
Class Fusion constructor
Instance Variables:
self.images: input images
self.model: CNN model, default=vgg19
self.device: either 'cuda' or 'cpu'
"""
self.inputImages = CTscanImage, MRIscanImage
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
self.model = VGG19(self.device)
def _RGB_to_YCbCr(self, img_RGB):
"""
A private method which converts an RGB image to YCrCb format
"""
img_RGB = img_RGB.astype(np.float32) / 255.
return cv2.cvtColor(img_RGB, cv2.COLOR_RGB2YCrCb)
def _YCbCr_to_RGB(self, img_YCbCr):
"""
A private method which converts a YCrCb image to RGB format
"""
img_YCbCr = img_YCbCr.astype(np.float32)
return cv2.cvtColor(img_YCbCr, cv2.COLOR_YCrCb2RGB)
def _is_gray(self, img):
"""
A private method which returns True if image is gray, otherwise False
"""
if len(img.shape) < 3:
return True
if img.shape[2] == 1:
return True
b, g, r = img[:,:,0], img[:,:,1], img[:,:,2]
if (b == g).all() and (b == r).all():
return True
return False
def _softmax(self, tensor):
"""
A private method which compute softmax ouput of a given tensor
"""
tensor = torch.exp(tensor)
tensor = tensor / tensor.sum(dim=1, keepdim=True)
return tensor
def _tranfer_to_tensor(self):
"""
A private method to transfer all input images to PyTorch tensors
"""
self.images_to_tensors = []
for image in self.normalized_images:
np_input = image.astype(np.float32)
if np_input.ndim == 2:
np_input = np.repeat(np_input[None, None], 3, axis=1)
else:
np_input = np.transpose(np_input, (2, 0, 1))[None]
if self.device == "cuda":
self.images_to_tensors.append(torch.from_numpy(np_input).cuda())
else:
self.images_to_tensors.append(torch.from_numpy(np_input))
def fuseAlgorithm(self):
"""
Perform fusion algorithm
"""
with torch.no_grad():
imageSumMaps = [-1 for tensor_img in self.images_to_tensors]
for idx, tensor_img in enumerate(self.images_to_tensors):
imageSummaps[idx] = []
featureMaps = self.model(tensor_img)
for featureMap in featureMaps:
sumMap = torch.sum(featureMap, dim=1, keepdim=True)
imageSumMaps[index].append(sumMap)
maxFusion = None
for sumMaps in zip(*imageSumMaps):
features = torch.cat(sumMaps, dim=1)
weights = self._softmax(F.interpolate(features,
size=self.images_to_tensors[0].shape[2:]))
weights = F.interpolate(weights,
size=self.images_to_tensors[0].shape[2:])
currentFusion = torch.zeros(self.images_to_tensors[0].shape)
for index, tensor_img in enumerate(self.images_to_tensors):
currentFusion += tensor_img * weights[:,index]
if maxFusion is None:
maxFusion = currentFusion
else:
maxFusion = torch.max(maxFusion, currentFusion)
output = np.squeeze(maxFusion.cpu().numpy())
if output.ndim == 3:
output = np.transpose(output, (1, 2, 0))
return output
def fuseImage(self):
"""
A top level method which fuse self.images
"""
# Convert all images to YCbCr format
self.normalizedImages = [-1 for img in self.inputImages]
self.YCbCrImages = [-1 for img in self.inputImages]
#checking if the image is grayscale or not
for index, img in enumerate(self.inputImages):
if not self._is_gray(img):
self.YCbCrImages[index] = self._RGB_to_YCbCr(img)
self.normalizedImages[index] = self.YCbCrImages[index][:, :, 0] #storingthe first channel of YCbCr image
else:
self.normalizedImages[index] = img / 255.
# Transfer all images to PyTorch tensors (arrays)
self._tranfer_to_tensor()
# Perform fuse strategy
fusedImage = self.fuseAlgorithm()[:, :, 0]
# Reconstruct fused image given rgb input images
for index, img in enumerate(self.inputImages):
if not self._is_gray(img):
self.YCbCrImages[index][:, :, 0] = fusedImage
fusedImage = self._YCbCr_to_RGB(self.YCbCr_images[idx])
fusedImage = np.clip(fusedImage, 0, 1)
return (fusedImage * 255).astype(np.uint8)
# return fused_img
def waveletTransformation(self, image, imageName):
titles = ['Approximation', ' Horizontal detail',
'Vertical detail', 'Diagonal detail']
coeffs2 = pywt.dwt2(image, 'haar')
LL, (LH, HL, HH) = coeffs2
fig = plt.figure(figsize=(12, 3))
for i, a in enumerate([LL, LH, HL, HH]):
ax = fig.add_subplot(1, 4, i + 1)
ax.imshow(a, interpolation="nearest", cmap=plt.cm.gray)
if "CTscanImage" in imageName:
path='jpg/ct_'+str(i)+'.jpg'
cv2.imwrite(path,a)
else:
path='jpg/mri_'+str(i)+'.jpg'
cv2.imwrite(path,a)
ax.set_title(titles[i], fontsize=10)
ax.set_xticks([])
ax.set_yticks([])
fig.tight_layout()
plt.show()
if __name__ == "__main__":
arg = argparse.ArgumentParser()
arg.add_argument("--pathInCTImage", help="Path to CT Scan Image")
arg.add_argument("--pathInMRIImage", help="Path to MRI Scan Image")
args = arg.parse_args()
CTscanImage = cv2.imread(args.pathInCTImage)
MRIscanImage = cv2.imread(args.pathInMRIImage) #MRI Image will be registered
handler = GetFusedImage(CTscanImage, MRIscanImage)
handler.waveletTransformation(CTscanImage, "CTscanImage")
handler.waveletTransformation(MRIscanImage, "MRIscanImage")
ERROR:
/Users/jaskiratsingh/opt/anaconda3/lib/python3.9/site-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead.
warnings.warn(
/Users/jaskiratsingh/opt/anaconda3/lib/python3.9/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=VGG19_Weights.IMAGENET1K_V1`. You can also use `weights=VGG19_Weights.DEFAULT` to get the most up-to-date weights.
warnings.warn(msg)
Traceback (most recent call last):
File "/Users/jaskiratsingh/Desktop/registration_fusion/fusion.py", line 223, in <module>
handler.waveletTransformation(CTscanImage, "CTscanImage")
File "/Users/jaskiratsingh/Desktop/registration_fusion/fusion.py", line 190, in waveletTransformation
ax.imshow(a, interpolation="nearest", cmap=plt.cm.gray)
File "/Users/jaskiratsingh/opt/anaconda3/lib/python3.9/site-packages/matplotlib/_api/deprecation.py", line 456, in wrapper
return func(*args, **kwargs)
File "/Users/jaskiratsingh/opt/anaconda3/lib/python3.9/site-packages/matplotlib/__init__.py", line 1412, in inner
return func(ax, *map(sanitize_sequence, args), **kwargs)
File "/Users/jaskiratsingh/opt/anaconda3/lib/python3.9/site-packages/matplotlib/axes/_axes.py", line 5488, in imshow
im.set_data(X)
File "/Users/jaskiratsingh/opt/anaconda3/lib/python3.9/site-packages/matplotlib/image.py", line 715, in set_data
raise TypeError("Invalid shape {} for image data"
TypeError: Invalid shape (512, 256, 2) for image data
The images which I passed are the following: https://drive.google.com/drive/folders/1ILFu0LVHOEHpcjpBPrLvCH3VPTprhiGf?usp=share_link
I tried changing the shape as well but that also didn't work for me.

MarkRCNN output images (prediction)

I'm following this object detection tutorial in Pytorch for Mask RCNN: https://pytorch.org/tutorials/intermediate/torchvision_tutorial.html and at the end there are some verification image.
How can I get this prediction image? I'm working with object deteciton.
Is there some way to output images from trained model that I can see is my network is learn something?
This is my code:
import os
import numpy as np
import torch
from PIL import Image
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor
#from engine import train_one_epoch, evaluate
from vision.references.detection.engine import train_one_epoch, evaluate
import utils
import transforms as T
import matplotlib.pyplot as plt
class Moj_Dataset_ArT(object):
def __init__(self, root, transforms):
self.root = root
self.transforms = transforms
# load all image files, sorting them to
# ensure that they are aligned
self.imgs = list(sorted(os.listdir(os.path.join(root, "NEW_train"))))
self.masks = list(sorted(os.listdir(os.path.join(root, "train_masks"))))
def __getitem__(self, idx):
# load images and masks
img_path = os.path.join(self.root, "NEW_train", self.imgs[idx])
mask_path = os.path.join(self.root, "train_masks", self.masks[idx])
img = Image.open(img_path).convert("RGB")
# note that we haven't converted the mask to RGB,
# because each color corresponds to a different instance
# with 0 being background
mask = Image.open(mask_path)
mask = np.array(mask)
# instances are encoded as different colors
obj_ids = np.unique(mask)
# first id is the background, so remove it
obj_ids = obj_ids[1:]
# split the color-encoded mask into a set
# of binary masks
masks = mask == obj_ids[:, None, None]
# get bounding box coordinates for each mask
num_objs = len(obj_ids)
boxes = []
for i in range(num_objs):
pos = np.where(masks[i])
xmin = np.min(pos[1])
xmax = np.max(pos[1])
ymin = np.min(pos[0])
ymax = np.max(pos[0])
boxes.append([xmin, ymin, xmax, ymax])
boxes = torch.as_tensor(boxes, dtype=torch.float32)
# there is only one class
labels = torch.ones((num_objs,), dtype=torch.int64)
masks = torch.as_tensor(masks, dtype=torch.uint8)
image_id = torch.tensor([idx])
area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
# suppose all instances are not crowd
iscrowd = torch.zeros((num_objs,), dtype=torch.int64)
target = {}
target["boxes"] = boxes
target["labels"] = labels
target["masks"] = masks
target["image_id"] = image_id
target["area"] = area
target["iscrowd"] = iscrowd
if self.transforms is not None:
img, target = self.transforms(img, target)
return img, target
def __len__(self):
return len(self.imgs)
def get_model_instance_segmentation(num_classes):
# load an instance segmentation model pre-trained pre-trained on COCO
model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=False) #bilo na True
# get number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features
# replace the pre-trained head with a new one
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
# now get the number of input features for the mask classifier
in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
hidden_layer = 256
# and replace the mask predictor with a new one
model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask,
hidden_layer,
num_classes)
return model
def get_transform(train):
transforms = []
transforms.append(T.ToTensor())
#if train:
#transforms.append(T.RandomHorizontalFlip(0.5))
return T.Compose(transforms)
def draw_loss(ml):
plt.figure(figsize=(10,5))
plt.title("Training Loss")
#plt.plot(val_losses,label="val")
plt.plot(ml,label="train")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend()
plt.xlim([0,80])
plt.ylim([0, 1.2])
plt.show()
def main():
# train on the GPU or on the CPU, if a GPU is not available
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
# our dataset has two classes only - background and person
num_classes = 2
# use our dataset and defined transformations
dataset = Moj_Dataset_ArT('Train/ArT', get_transform(train=True))
dataset_test = Moj_Dataset_ArT('Train/ArT', get_transform(train=False))
# split the dataset in train and test set
indices = torch.randperm(len(dataset)).tolist()
dataset = torch.utils.data.Subset(dataset, indices[:-500])
dataset_test = torch.utils.data.Subset(dataset_test, indices[-500:])
# define training and validation data loaders
data_loader = torch.utils.data.DataLoader(
dataset, batch_size=4, shuffle=True, num_workers=4,
collate_fn=utils.collate_fn)
data_loader_test = torch.utils.data.DataLoader(
dataset_test, batch_size=1, shuffle=False, num_workers=4,
collate_fn=utils.collate_fn)
# get the model using our helper function
model = get_model_instance_segmentation(num_classes)
# move model to the right device
model.to(device)
# construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.0005,
momentum=0.9, weight_decay=0.0005)
# and a learning rate scheduler
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
step_size=30,
gamma=0.1) # gamma bila na 0.5
# let's train it for 10 epochs
num_epochs = 200
PATH = 'home//Train/ArT/models/'
ml =[]
for epoch in range(num_epochs):
# train for one epoch, printing every 10 iterations
loss_value = train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=10)
ml.append(loss_value)
# update the learning rate
lr_scheduler.step()
torch.save(model, PATH)
# evaluate on the test dataset
#evaluate(model, data_loader_test, device=device)
print(ml)
draw_loss(ml)
if __name__ == "__main__":
main()
Something like this
When I run this after evalueate():
img, _ = dataset_test[20]
with torch.no_grad():
prediction = model([img.to(device)])
imag = Image.fromarray(img.mul(255).permute(1, 2, 0).byte().numpy())
imaag = Image.fromarray(prediction[0]['masks'][0, 0].mul(255).byte().cpu().numpy())
imag.show()
imaag.show()
I get this (not good image):
I also tried with Detectron2
but then I need to make cfg file and train on the other way, but I'm using this Pytorch tutorial

Show image mask tensor model

I am working on a tensor model.
I have trained it and now testing and evaluating it.
Img_score with the prediction is working correctly.
I want also to print the seg_mask image (seg_out in the code), but plt.imshow does not work, even if I permute it.
Moreover if I print the values of the seg_out image they are all negatives which I don't understand how to handle them in images.
Here is what I am using.
INPUT_WIDTH = 232
INPUT_HEIGHT = 640
INPUT_CHANNELS = 3
device = "cpu"
model = SegDecNet(device, INPUT_WIDTH, INPUT_HEIGHT, INPUT_CHANNELS)
model.set_gradient_multipliers(0)
model_path = "/final_state_dict.pth"
model.load_state_dict(torch.load(model_path, map_location=device))
# %%
img_path = '/20118.png'
img = cv2.imread(img_path) if INPUT_CHANNELS == 3 else cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
img = cv2.resize(img, (INPUT_WIDTH, INPUT_HEIGHT))
print(img.shape)
img = np.transpose(img, (2, 0, 1)) if INPUT_CHANNELS == 3 else img[np.newaxis]
img_t = torch.from_numpy(img)[np.newaxis].float() / 445400 # must be [BATCH_SIZE x CHANNELS x HEIGHT x WIDTH]
dec_out, seg_out = model(img_t)
img_score = torch.sigmoid(dec_out)
print(img_score)
print(seg_out[0][0].shape)
print(seg_out[0][0].permute(0, 1))
#plt.imshow(seg_out[0].permute(2, 0, 1))
thanks

PyTorch | getting "RuntimeError: Found dtype Long but expected Float"

I'm trying to train a CNN on a custom dataset. Code:
Dataset.py
class MyDataset(Dataset):
def __init__(self, csv_file, root_dir):
self.annotations = pd.read_csv(csv_file)
self.root_dir = root_dir
self.transform = transform
def __len__(self):
return len(self.annotations)
def __getitem__(self, index):
img_path = os.path.join(self.root_dir, self.annotations.iloc[index, 0])
y_label = torch.tensor(int(self.annotations.iloc[index, 1]))
img = cv2.imread(img_path)
# resize
res = cv2.resize(img, dsize=(50, 50), interpolation=cv2.INTER_CUBIC)
# convert image to tensor
res = torch.from_numpy(res)
return (res, y_label)
Model.py
class ConvNet(torch.nn.Module):
def __init__(self):
super(ConvNet, self).__init__()
f2 = 4
self.layer2 = nn.Sequential(
nn.Conv2d(50, f2, kernel_size=5, padding=2),
nn.ReLU(),
nn.BatchNorm2d(f2),
nn.MaxPool2d(kernel_size=2, stride=2))
self.fc1 = nn.Linear(100, 200)
self.fc2 = nn.Linear(200, 20)
self.fc3 = nn.Linear(20, 1)
def forward(self, x):
x = self.layer2(x.float())
x = x.reshape(x.size(0), -1)
x = self.fc1(x)
x = self.fc2(x)
x = self.fc3(x)
return x
and here is my training code:
dataset = MyDataset(
csv_file='dataset.csv',
root_dir='tmp')
train_set, test_set = torch.utils.data.random_split(dataset, lengths=[500, 70])
train_loader = DataLoader(dataset=train_set, batch_size=16, shuffle=True)
test_loader = DataLoader(dataset=test_set, batch_size=16, shuffle=True)
model = ConvNet()
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
for epoch in range(20):
losses = []
for batch_idx, (data, targets) in enumerate(train_loader):
data = data.to(device=device)
targets = targets.to(device=device)
# forward
scores = model(data)
loss = criterion(scores, targets)
losses.append(loss.item())
# backward
optimizer.zero_grad()
loss.backward()
optimizer.step()
print('Cost: {0} = {1}'.format(epoch, sum(losses)/len(losses)))
But I get RuntimeError: Found dtype Long but expected Float. This probably comes from the fact that I do x = self.layer2(x.float()) to avoid overflow.
I would like to know how to fix that error. It's difficult to pin-point where the exact problem comes from.
How can I solve this?

The problem might be caused by data tensor. When data loader calls images via getitem() method, image is read with opencv and transformed to tensor. I think at that point, type of your data tensor is long but it should be float. If you cast your numpy array -named res- to float it should work fine. You can see my solution below.
res = cv2.resize(img, dsize=(50, 50), interpolation=cv2.INTER_CUBIC)
res = res.astype(np.float32) # you should add this line
res = torch.from_numpy(res)
res = res.permute(2, 0, 1)

Having problem with creating a dataloader class in PyTorch

I am building a neural network for Bengali numerical digit classification using PyTorch. I am facing difficulties building the dataset class to load my dataset using a data loader. I have a folder with all the images (numerical digits from 0-9) and a CSV file with 2 columns, the first column contains the name of the image, and the second contains the label(0-9). This is my data loader class which probably isn't causing the error.
class BDRWDataset(Dataset):
"""BDRW dataset."""
def __init__(self, csv_file, imgs_dir, transform=None):
"""
Args:
csv_file (string): Path to the csv file with labels.
imgs_dir (string): Directory with all the images.
transform (callable, optional): Optional transform to be applied
on a sample.
"""
self.labels = pd.read_csv(csv_file).iloc[:, 1].to_numpy().reshape(-1,1)
self.imgs_dir = imgs_dir
self.transform = transform
def __len__(self):
return len(self.labels)
def __getitem__(self, idx):
if torch.is_tensor(idx):
idx = idx.tolist()
img_path = '/content/BDRW_train/digit_' + str(idx) + '.jpg'
image = io.imread(img_name, plugin='matplotlib')
image = Image.fromarray(np.uint8(image))
label = self.labels[idx]
label = float(label)
if self.transform:
image = self.transform(image)
return (image, label)
I create an instance of this class.
transformed_dataset = BDRWDataset(csv_file='/content/labels.csv',imgs_dir='/content/BDRW_train',
transform=transforms.Compose([
Rescale((28, 28)),
transforms.Normalize((0.5,), (0.5,)),
ToTensor()
]))
I have defined rescale and to tensor as follows
class Rescale(object):
"""Rescale the image in a sample to a given size.
Args:
output_size (tuple or int): Desired output size. If tuple, output is
matched to output_size. If int, smaller of image edges is matched
to output_size keeping aspect ratio the same.
"""
def __init__(self, output_size):
assert isinstance(output_size, (int, tuple))
self.output_size = output_size
def __call__(self, sample):
image, label = sample
h, w = image.shape[:2]
if isinstance(self.output_size, int):
if h > w:
new_h, new_w = self.output_size * h / w, self.output_size
else:
new_h, new_w = self.output_size, self.output_size * w / h
else:
new_h, new_w = self.output_size
new_h, new_w = int(new_h), int(new_w)
img = transform.resize(image, (new_h, new_w))
return img, label
class ToTensor(object):
"""Convert ndarrays in sample to Tensors."""
def __call__(self, sample):
image, label = sample['image'], sample['label']
# swap color axis because
# numpy image: H x W x C
# torch image: C X H X W
image = image.transpose((2, 0, 1))
return (torch.from_numpy(image), torch.from_numpy(label))
Splitting the dataset into test and train and created train loader and validation loader using torch.utils.data.DataLoader
The neural network is
class CNN(nn.Module):
def __init__(self):
super(CNN, self).__init__()
self.layer1 = nn.Sequential(
nn.Conv2d(1, 16, kernel_size=5, padding=2),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2))
self.layer2 = nn.Sequential(
nn.Conv2d(16, 32, kernel_size=5, padding=2),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2))
self.fc = nn.Linear(7*7*32, 10)
def forward(self, x):
out = self.layer1(x)
out = self.layer2(out)
out = out.view(out.size(0), -1)
out = self.fc(out)
return out
I create an instance of this class and start training
cnn = CNN()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(cnn.parameters(), lr=learning_rate)
losses = [];
for epoch in range(num_epochs):
for i, (image, label) in enumerate(valloader):
image = Variable(image.float())
label = Variable(label)
# Forward + Backward + Optimize
optimizer.zero_grad()
outputs = cnn(image)
loss = criterion(output, label)
loss.backward()
optimizer.step()
losses.append(loss.data[0]);
if (i+1) % 100 == 0:
print ('Epoch : %d/%d, Iter : %d/%d, Loss: %.4f'
%(epoch+1, num_epochs, i+1, len(train_dataset)//batch_size, loss.data[0]))
This is where I get the error
AttributeError Traceback (most recent call last)
/usr/local/lib/python3.6/dist-packages/PIL/Image.py in open(fp, mode)
2812 try:
-> 2813 fp.seek(0)
2814 except (AttributeError, io.UnsupportedOperation):
AttributeError: 'str' object has no attribute 'seek'
During handling of the above exception, another exception occurred:
AttributeError Traceback (most recent call last)
9 frames
/usr/local/lib/python3.6/dist-packages/PIL/Image.py in open(fp, mode)
2813 fp.seek(0)
2814 except (AttributeError, io.UnsupportedOperation):
-> 2815 fp = io.BytesIO(fp.read())
2816 exclusive_fp = True
2817
AttributeError: 'str' object has no attribute 'read'
It refers to PIL Image which I have used in the dataloader. So it seems to me that's where I am doing something wrong.
https://colab.research.google.com/drive/17XdP7gUoMNLxPCJ6PHEi3B09UQItzKyf?usp=sharing
This is the notebook I am working on. Please help me debug the errors in the code.
https://drive.google.com/open?id=1DznuHV9Fi5jVEbGdP-tg3ckmp5CNDOj1
This is the dataset I am working on.

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Variational Auto Encoder weird mosaic reconstructed result - python

Related

Unable to solve "TypeError: Invalid shape (512, 256, 2) for image data" for VGG19

MarkRCNN output images (prediction)

Show image mask tensor model

PyTorch | getting "RuntimeError: Found dtype Long but expected Float"

Having problem with creating a dataloader class in PyTorch

Categories

Resources