This question mainly concerns the return value of __getitem__ in a pytorch Dataset which I've seen as both a tuple and a dict in the source code.
I have been following this tutorial for creating a dataset class within my code, which is following this tutorial on transfer learning. It has the following definition of a dataset.
class FaceLandmarksDataset(Dataset):
"""Face Landmarks dataset."""
def __init__(self, csv_file, root_dir, transform=None):
"""
Args:
csv_file (string): Path to the csv file with annotations.
root_dir (string): Directory with all the images.
transform (callable, optional): Optional transform to be applied
on a sample.
"""
self.landmarks_frame = pd.read_csv(csv_file)
self.root_dir = root_dir
self.transform = transform
def __len__(self):
return len(self.landmarks_frame)
def __getitem__(self, idx):
img_name = os.path.join(self.root_dir,
self.landmarks_frame.iloc[idx, 0])
image = io.imread(img_name)
landmarks = self.landmarks_frame.iloc[idx, 1:].as_matrix()
landmarks = landmarks.astype('float').reshape(-1, 2)
sample = {'image': image, 'landmarks': landmarks}
if self.transform:
sample = self.transform(sample)
return sample
As you can see, __getitem__ returns a dictionary with two entries.
In the transfer learning tutorial, the following calls are made to transform a dataset:
data_transforms = {
'train': transforms.Compose([
transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
]),
'val': transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
]),
}
data_dir = 'hymenoptera_data'
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),
data_transforms[x])
for x in ['train', 'val']}
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=4,
shuffle=True, num_workers=4)
for x in ['train', 'val']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
class_names = image_datasets['train'].classes
use_gpu = torch.cuda.is_available()
inputs, classes = next(iter(dataloaders['train']))
That last line of code causes an error in my code by attempting to run transform on a sample in my custom dataset.
'dict' object has no attribute 'size'
But if the tutorial dataset is implemented correctly, shouldn't it function correctly with a transform? My own hybrid implementation is below:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.autograd import Variable
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy
from torch.utils.data import *
from skimage import io, transform
plt.ion()
class NumsDataset(Dataset):
"""Face Landmarks dataset."""
def __init__(self, root_dir, transform=None):
"""
Args:
csv_file (string): Path to the csv file with annotations.
root_dir (string): Directory with all the images.
transform (callable, optional): Optional transform to be applied
on a sample.
"""
self.docs = []
for file in os.listdir(root_dir):
#print(file)
if file.endswith(".txt"):
path = os.path.join(root_dir, file)
with open(path, 'r') as f:
self.docs.append( ( file , list(f.read()) ) ) #tup containing file, image values pairs
self.root_dir = root_dir
self.transform = transform
def __len__(self): #returns number of images
i = 0
for j in self.docs:
i += len(j[1])
return i
def len2(self): #returns number of batches
return len(self.docs)
def __getitem__(self, idx):
idx1 = idx // self.len2()
idx2 = idx % self.len2()
imglabel = self.docs[idx1][0] #label with filename for batch error calculation later
imgdir = os.path.join(self.root_dir, self.docs[idx1][0].strip(".txt"))
img = None
l = idx2
for file in os.listdir(imgdir):
file = os.path.join(imgdir, file)
if(l == 0):
img = io.imread(file)
l -= 1
sample = (img , imglabel)
sample ={'image': img, 'label': imglabel}
if self.transform:
sample = self.transform(sample)
return sample
data_transforms = {
'train': transforms.Compose([
transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
]),
'val': transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
]),
}
data_dir = "images"
image_datasets = {x: NumsDataset(os.path.join(data_dir, x),
data_transforms[x])
for x in ['train', 'val']}
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=5)
for x in ['train', 'val']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
class_names = ["one", "two", "four"]
use_gpu = torch.cuda.is_available()
# Get a batch of training data
inputs, classes = next(iter(dataloaders['train']))
directory structure:
images
/train
/file1
*.jpg
/file2...
*.jpg
file1.txt
file2.txt...
/val
/file1
*.jpg
/file2...
*.jpg
file1.txt
file2.txt...
Is the sample I'm returning formatted incorrectly?
The below problem occurs when you pass dict instead of image to transforms. The custom transforms mentioned in the example can handle that, but a default transforms cannot, instead you can pass only image to the transform. This will solve half of the problem.
'dict' object has no attribute 'size'
the rest of the problem lies with the image handling code in the example, so I had to dig through till transforms.py in the torchvision; this uses PIL image unlike skimage mentioned in the example, so I replaced the code with PIL and is working perfectly fine.
site-packages/torchvision/transforms/transforms.py
Original Code:
def __getitem__(self, idx):
if torch.is_tensor(idx):
img_name = os.path.join(self.root_dir,self.anb_frame.iloc[idx, 0])
image = io.imread(img_name)
labels = self.anb_frame.iloc[idx, 1:]
labels = np.array([labels])
sample = {'image': image, 'labels': labels}
if self.transform:
image = self.transform(image)
return sample
Modified:
def __getitem__(self, idx):
if torch.is_tensor(idx):
img_name = os.path.join(self.root_dir,self.anb_frame.iloc[idx, 0])
image = Image.open(img_name)
if self.transform:
image = self.transform(image)
labels = self.anb_frame.iloc[idx, 1:]
labels = np.array([labels])
sample = {'image': image, 'labels': labels}
return sample
The particular way the tutorial on dataloading uses the custom dataset is with self defined transforms. The transforms must be designed to fit the dataset. As such, the dataset must output a sample compatible with the library transform functions, or transforms must be defined for the particular sample case. Choosing the latter, among other things has resulted in completely functional code.
Related
I am trying to use AlexNet to classify spectrogram images generated for 3s audio segments. I have successfully trained my dataset and am trying to identify which images the model misclassified.
I am able to obtain the filename of a image by calling iterator.dataset.data_df.adressfname, however I am unsure how to use this statement in the for loop in the get_predictions function. If I try to retrieve the filenames using iterator.dataset.data_df.adressfname[i], I get the following error: expected Tensor as element 0 in argument 0, but got str
Ultimately, I want to create a dataframe that contains the filename, actual label and predicted label. Does anyone have any suggestions?
class CustomDataset(Dataset):
def __init__(self, img_path, csv_file, transforms):
self.imgs_path = img_path
self.csv_train_file = csv_file
file_list = glob.glob(self.imgs_path + "*")
self.data = []
self.data_df = pd.read_csv(self.csv_train_file)
self.transforms = transforms
for ind in self.data_df.index:
img_path = self.data_df['spectrogramSegFilename'][ind]
class_name = self.data_df['dx'][ind]
self.data.append([img_path, class_name])
self.class_map = {"ProbableAD" : 0, "Control": 1}
self.img_dim = (256, 256)
def __len__(self):
return len(self.data)
def __getitem__(self, idx):
img_path, class_name = self.data[idx]
img = cv2.imread(img_path)
img = cv2.resize(img, self.img_dim)
class_id = self.class_map[class_name]
img_tensor = torch.from_numpy(img)
img_tensor = img_tensor.permute(2, 0, 1)
data = self.transforms(img_tensor)
class_id = torch.tensor([class_id])
return data, class_id
if __name__ == "__main__":
transformations = transforms.Compose([transforms.ToPILImage(), transforms.Resize(256), transforms.CenterCrop(256), transforms.ToTensor(), transforms.Normalize((0.49966475, 0.1840554, 0.34930056), (0.35317238, 0.17343724, 0.1894943))])
train_dataset = CustomDataset("/spectrogram_images/spectrogram_train/", "train_features_segmented.csv", transformations)
test_dataset = CustomDataset("/spectrogram_images/spectrogram_test/", "test_features_segmented.csv", transformations)
train_dataset = CustomDataset("spectrogram_images/spectrogram_train/", "/train_features_segmented.csv")
test_dataset = CustomDataset("spectrogram_images/spectrogram_test/", "/test_features_segmented.csv")
train_data_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_data_loader = DataLoader(test_dataset, batch_size=64, shuffle=True)
def get_predictions(model, iterator, device):
model.eval()
images = []
labels = []
probs = []
participant_ids = []
with torch.no_grad():
for i, (x, y) in enumerate(iterator):
x = x.to(device)
y_pred = model(x)
y_prob = F.softmax(y_pred, dim=-1)
participant_ids.append(iterator.dataset.data_df.adressfname[i])
images.append(x.cpu())
labels.append(y.cpu())
probs.append(y_prob.cpu())
images = torch.cat(images, dim=0)
labels = torch.cat(labels, dim=0)
probs = torch.cat(probs, dim=0)
participant_ids = torch.cat(participant_ids, dim=0)
return images, labels, probs, participant_ids
Just add a third field to the return signature of __getitem__():
def __getitem__(self,idx):
...
return data,class_id,img_path
Then, when you call the dataloader:
for i, (x,y,img_paths) in enumerate(iterator):
... call model ...
... compare outputs to labels ...
... identify incorrect batch indices
mislabeled_files = [img_paths[idx] for idx in incorrect_batch_indices]
And there you have it. Note that in your current code block, i indexes the batch index, which really has nothing to do with the dataset object wrapped in the dataloader (both because the dataloader has a batch size so it collates multiple elements from the dataset into a single index, and because the dataloader shuffles the elements in the dataset) so you should not use this index (i) to reference the dataset object. If you wanted to reference the underlying data items in the dataset object you could instead simply return the data idx in __getitem__():
def __getitem__(self,idx):
...
return data,class_id,idx
You can then use this idx to reference the original dataset data and get the file names that way.
I'm trying to train a pretrained visual transformer (ViT) on a new dataset.
The dataset is made up of jpg images sorted into folders (train, val, test) and has 4 calsses.
I want to use map() on the dataset for preprocessing.
I added 'getitem' and 'len' so that it'll be a map-style dataset.
But I still get the error:
AttributeError: 'DataLoader' object has no attribute 'map'
Here's the code:
from torch.utils.data import DataLoader
class MyDataset(Dataset):
def __init__(self, path, transform):
self.files = glob.glob(path)
print(type(self.files))
self.transform = transform
self.labels = [filepath.split('/')[-2] for filepath in self.files]
def __getitem__(self, item):
file = self.files[item]
label = self.labels[item]
file = Image.open(file)
file = self.transform(file)
return file, label
def __len__(self):
return len(self.files)
transform=transforms.Compose([transforms.ToTensor()])
train_data = MyDataset(train_path, transform)
val_data = MyDataset(val_path, transform)
test_data = MyDataset(test_path, transform)
train = DataLoader(train_data , batch_size=1, shuffle=True, num_workers=3)
val = DataLoader(val_data , batch_size=1, shuffle=True)
test = DataLoader(test_data , batch_size=1, shuffle=True)
feature_extractor = ViTFeatureExtractor.from_pretrained('google/vit-base-patch16-224-in21k')
data_collator = default_data_collator
def preprocess_images(examples):
images = examples['img']
images = [np.array(image, dtype=np.uint8) for image in images]
images = [np.moveaxis(image, source=-1, destination=0) for image in images]
inputs = feature_extractor(images=images)
examples['pixel_values'] = inputs['pixel_values']
return examples
features = Features({
'label': ClassLabel(
names=['class1', 'class2', 'class3', 'class4']),
'img': Array3D(dtype="int64", shape=(3, 32, 32)),
'pixel_values': Array3D(dtype="float32", shape=(3, 224, 224)),
})
preprocessed_train_ds = train.map(preprocess_images, batched=True, features=features)
preprocessed_val_ds = val.map(preprocess_images, batched=True, features=features)
preprocessed_test_ds = test.map(preprocess_images, batched=True, features=features)
What else can I do?
When you instantiate the DataLoader for train, test and val dataset, you can point the flag collate_fn=preprocess_images function. You will have to update the function to match to your requirement.
e.g.,
DataLoader(train_data, collate_fn=preprocess_images, , batch_size=1, shuffle=True, num_workers=3)
See here
To quote:
collate_fn (Callable, optional): merges a list of samples to form a
mini-batch of Tensor(s). Used when using batched loading from a
map-style dataset.
I am new to PyTorch.
The task - create train, validation and test classes.
Data:
CSV file with 2 columns
Where id is name of the picture stored in train and test1 directories
Directories with train and test data images.
My code so far:
**import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data
import torch.nn.functional as F
import torchvision
from torchvision import transforms
from PIL import Image
##transforms
transforms = transforms.Compose([
transforms.Resize(64),
transforms.ToTensor(),
transforms.Normalize(mean = [0.485, 0.456, 0.406],
std = [0.229, 0.224, 0.225])
])
##dataloader
dataset_path = "C:/Users/nikit/OneDrive/Desktop/PyTorch/train/train"
dataset = torchvision.datasets.ImageFolder(root = train_data_path, transform = transforms)
val_split = 0.2
dataset_size = len(dataset)
val_size = int(test_split * dataset_size)
train_size = dataset_size - val_size
train_data, val_data = torch.utils.data.random_split(dataset, [train_size, test_size])
##define test
test_data_path = "C:/Users/nikit/OneDrive/Desktop/PyTorch/test1/test1"
test_data = torchvision.datasets.ImageFolder(root = train_data_path, transform = transforms)
##data load
batch_size = 64
train_data_loader = torch.utils.data.DataLoader(train_data, batch_size = batch_size)
val_data_loader = torch.utils.data.DataLoader(val_data, batch_size = batch_size)
test_data_loader = torch.utils.data.DataLoader(test_data, batch_size = batch_size)**
Please help me to connect the values with csv data
train_df = pd.DataFrame(columns=["img_name","label"])
train_df["img_name"] = os.listdir(path_train)
for idx, i in enumerate(os.listdir(path_train)):
if "cat" in i:
train_df["label"][idx] = 0
if "dog" in i:
train_df["label"][idx] = 1
train_df.to_csv (r'train_csv.csv', index = False, header=True)
Creating a Custom Dataset for your files
A custom Dataset class must implement three functions: init, len, and getitem. Take a look at this implementation; the FashionMNIST images are stored in a directory img_dir, and their labels are stored separately in a CSV file annotations_file.
In the next sections, we’ll break down what’s happening in each of these functions:
import os
import pandas as pd
from torchvision.io import read_image
class CustomImageDataset(Dataset):
def __init__(self, annotations_file, img_dir, transform=None,
target_transform=None):
self.img_labels = pd.read_csv(annotations_file)
self.img_dir = img_dir
self.transform = transform
self.target_transform = target_transform
def __len__(self):
return len(self.img_labels)
def __getitem__(self, idx):
img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx, 0])
image = read_image(img_path)
label = self.img_labels.iloc[idx, 1]
if self.transform:
image = self.transform(image)
if self.target_transform:
label = self.target_transform(label)
return image, label
I have a dataset of images that looks like this:
array([[[[0.35980392, 0.26078431, 0.14313725],
[0.38137255, 0.26470588, 0.15196078],
[0.51960784, 0.3745098 , 0.26176471],
...,
[0.34313725, 0.22352941, 0.15 ],
[0.30784314, 0.2254902 , 0.15686275],
[0.28823529, 0.22843137, 0.16862745]],
[[0.38627451, 0.28235294, 0.16764706],
[0.45098039, 0.32843137, 0.21666667],
[0.62254902, 0.47254902, 0.36470588],
...,
[0.34607843, 0.22745098, 0.15490196],
[0.30686275, 0.2245098 , 0.15588235],
[0.27843137, 0.21960784, 0.16176471]],
[[0.41568627, 0.30098039, 0.18431373],
[0.51862745, 0.38529412, 0.27352941],
[0.67745098, 0.52058824, 0.40980392],
...,
[0.34901961, 0.22941176, 0.15588235],
[0.29901961, 0.21666667, 0.14901961],
[0.26078431, 0.20098039, 0.14313725]],
...,
This is how i download it:
data, attrs = fetch_dataset()
This is how fetch_dataset() function works:
import numpy as np
import os
from skimage.transform import resize
import skimage.io
import pandas as pd
def fetch_dataset(attrs_name = "lfw_attributes.txt",
images_name = "lfw-deepfunneled",
dx=80,dy=80,
dimx=45,dimy=45
):
#download if not exists
if not os.path.exists(images_name):
print("images not found, donwloading...")
os.system("wget http://vis-www.cs.umass.edu/lfw/lfw-deepfunneled.tgz -O tmp.tgz")
print("extracting...")
os.system("tar xvzf tmp.tgz && rm tmp.tgz")
print("done")
assert os.path.exists(images_name)
if not os.path.exists(attrs_name):
print("attributes not found, downloading...")
os.system("wget http://www.cs.columbia.edu/CAVE/databases/pubfig/download/%s" % attrs_name)
print("done")
#read attrs
df_attrs = pd.read_csv("lfw_attributes.txt",sep='\t',skiprows=1,)
df_attrs = pd.DataFrame(df_attrs.iloc[:,:-1].values, columns = df_attrs.columns[1:])
#read photos
photo_ids = []
for dirpath, dirnames, filenames in os.walk(images_name):
for fname in filenames:
if fname.endswith(".jpg"):
fpath = os.path.join(dirpath,fname)
photo_id = fname[:-4].replace('_',' ').split()
person_id = ' '.join(photo_id[:-1])
photo_number = int(photo_id[-1])
photo_ids.append({'person':person_id,'imagenum':photo_number,'photo_path':fpath})
photo_ids = pd.DataFrame(photo_ids)
# print(photo_ids)
#mass-merge
#(photos now have same order as attributes)
df = pd.merge(df_attrs,photo_ids,on=('person','imagenum'))
assert len(df)==len(df_attrs),"lost some data when merging dataframes"
# print(df.shape)
#image preprocessing
all_photos =df['photo_path'].apply(skimage.io.imread)\
.apply(lambda img:img[dy:-dy,dx:-dx])\
.apply(lambda img: resize(img,[dimx,dimy]))
all_photos = np.stack(all_photos.values)#.astype('uint8')
all_attrs = df.drop(["photo_path","person","imagenum"],axis=1)
return all_photos,all_attrs
Next I'm trying to convert my dataset to Tensor:
import torch
import torchvision.transforms
from torchvision import transforms as transforms
class MyDataset(torch.utils.data.Dataset):
def __init__(self, data):
self.dataset = data
def __getitem__(self, idx):
sample = self.dataset[idx]
data, label = sample[0], sample[1]
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
return transform(data), torch.tensor(label)
def __len__(self):
return len(self.dataset)
Then:
from sklearn.model_selection import train_test_split
train, val = train_test_split(data, test_size=0.25, random_state=42)
train_dataset = MyDataset(train)
val_dataset = MyDataset(val)
Autoencoder class:
from copy import deepcopy
class Autoencoder(nn.Module):
def __init__(self):
#<определите архитектуры encoder и decoder>
super(Autoencoder,self).__init__()
self.encoder = nn.Sequential(
nn.Conv2d(3, 6, kernel_size=5),
nn.ReLU(True),
nn.Conv2d(6,16,kernel_size=5),
nn.ReLU(True))
self.decoder = nn.Sequential(
nn.Conv2d(16, 6, kernel_size=5),
nn.ReLU(True),
nn.Conv2d(6,3,kernel_size=5),
nn.ReLU(True))
def forward(self, x):
latent_code = self.encoder(x)
reconstruction = self.decoder(latent_code)
return reconstruction, latent_code
Then:
criterion = nn.BCELoss()
autoencoder = Autoencoder()
optimizer = torch.optim.Adam(autoencoder.parameters())
The training itself:
#<тут Ваш код тренировки автоэнкодера>
num_epochs = 5
for epoch in range(num_epochs):
for x in train_dataset:
img, _ = x
img = Variable(img).cpu()
output = autoencoder(img)
loss = criterion(output, img)
optimizer.zero_grad()
loss.backward()
optimizer.step()
print('epoch [{}/{}], loss:{:.4f}'.format(epoch+1, num_epochs, loss.data()))
And I get this error:
RuntimeError: output with shape [1, 45, 3] doesn't match the broadcast shape [3, 45, 3]
I think I'm making mistakes somewhere in preparing my data. Could someome please explain me how to do it properly in my case? Thanks for any help.
I have two dataset folder of tif images, one is a folder called BMMCdata, and the other one is the mask of BMMCdata images called BMMCmasks(the name of images are corresponds). I am trying to make a customised dataset and also split the data randomly to train and test. at the moment I am getting an error
self.filenames.append(fn)
AttributeError: 'CustomDataset' object has no attribute 'filenames'
Any comment will be appreciated a lot.
import torch
from torch.utils.data.dataset import Dataset # For custom data-sets
from torchvision import transforms
from PIL import Image
import os.path as osp
import glob
folder_data = "/Users/parto/PycharmProjects/U-net/BMMCdata/data"
class CustomDataset(Dataset):
def __init__(self, root):
self.filename = folder_data
self.root = root
self.to_tensor = transforms.ToTensor()
filenames = glob.glob(osp.join(folder_data, '*.tif'))
for fn in filenames:
self.filenames.append(fn)
self.len = len(self.filenames)
print(fn)
def __getitem__(self, index):
image = Image.open(self.filenames[index])
return self.transform(image)
def __len__(self):
return self.len
custom_img = CustomDataset(folder_data)
# total images in set
print(custom_img.len)
train_len = int(0.6*custom_img.len)
test_len = custom_img.len - train_len
train_set, test_set = CustomDataset.random_split(custom_img, lengths=[train_len, test_len])
# check lens of subset
len(train_set), len(test_set)
train_set = CustomDataset(folder_data)
train_set = torch.utils.data.TensorDataset(train_set, train=True, batch_size=4)
train_loader = torch.utils.data.DataLoader(train_set, batch_size=4, shuffle=True, num_workers=1)
print(train_set)
print(train_loader)
test_set = torch.utils.data.DataLoader(Dataset, batch_size=4, sampler= train_sampler)
test_loader = torch.utils.data.DataLoader(Dataset, batch_size=4)
answer given by #ptrblck in pytorch community. thank you
# get all the image and mask path and number of images
folder_data = glob.glob("D:\\Neda\\Pytorch\\U-net\\BMMCdata\\data\\*.tif")
folder_mask = glob.glob("D:\\Neda\\Pytorch\\U-net\\BMMCmasks\\masks\\*.tif")
# split these path using a certain percentage
len_data = len(folder_data)
print(len_data)
train_size = 0.6
train_image_paths = folder_data[:int(len_data*train_size)]
test_image_paths = folder_data[int(len_data*train_size):]
train_mask_paths = folder_mask[:int(len_data*train_size)]
test_mask_paths = folder_mask[int(len_data*train_size):]
class CustomDataset(Dataset):
def __init__(self, image_paths, target_paths, train=True): # initial logic
happens like transform
self.image_paths = image_paths
self.target_paths = target_paths
self.transforms = transforms.ToTensor()
def __getitem__(self, index):
image = Image.open(self.image_paths[index])
mask = Image.open(self.target_paths[index])
t_image = self.transforms(image)
return t_image, mask
def __len__(self): # return count of sample we have
return len(self.image_paths)
train_dataset = CustomDataset(train_image_paths, train_mask_paths, train=True)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=4, shuffle=True, num_workers=1)
test_dataset = CustomDataset(test_image_paths, test_mask_paths, train=False)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=4, shuffle=False, num_workers=1)