MarkRCNN output images (prediction) - python

I'm following this object detection tutorial in Pytorch for Mask RCNN: https://pytorch.org/tutorials/intermediate/torchvision_tutorial.html and at the end there are some verification image.
How can I get this prediction image? I'm working with object deteciton.
Is there some way to output images from trained model that I can see is my network is learn something?
This is my code:
import os
import numpy as np
import torch
from PIL import Image
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor
#from engine import train_one_epoch, evaluate
from vision.references.detection.engine import train_one_epoch, evaluate
import utils
import transforms as T
import matplotlib.pyplot as plt
class Moj_Dataset_ArT(object):
def __init__(self, root, transforms):
self.root = root
self.transforms = transforms
# load all image files, sorting them to
# ensure that they are aligned
self.imgs = list(sorted(os.listdir(os.path.join(root, "NEW_train"))))
self.masks = list(sorted(os.listdir(os.path.join(root, "train_masks"))))
def __getitem__(self, idx):
# load images and masks
img_path = os.path.join(self.root, "NEW_train", self.imgs[idx])
mask_path = os.path.join(self.root, "train_masks", self.masks[idx])
img = Image.open(img_path).convert("RGB")
# note that we haven't converted the mask to RGB,
# because each color corresponds to a different instance
# with 0 being background
mask = Image.open(mask_path)
mask = np.array(mask)
# instances are encoded as different colors
obj_ids = np.unique(mask)
# first id is the background, so remove it
obj_ids = obj_ids[1:]
# split the color-encoded mask into a set
# of binary masks
masks = mask == obj_ids[:, None, None]
# get bounding box coordinates for each mask
num_objs = len(obj_ids)
boxes = []
for i in range(num_objs):
pos = np.where(masks[i])
xmin = np.min(pos[1])
xmax = np.max(pos[1])
ymin = np.min(pos[0])
ymax = np.max(pos[0])
boxes.append([xmin, ymin, xmax, ymax])
boxes = torch.as_tensor(boxes, dtype=torch.float32)
# there is only one class
labels = torch.ones((num_objs,), dtype=torch.int64)
masks = torch.as_tensor(masks, dtype=torch.uint8)
image_id = torch.tensor([idx])
area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
# suppose all instances are not crowd
iscrowd = torch.zeros((num_objs,), dtype=torch.int64)
target = {}
target["boxes"] = boxes
target["labels"] = labels
target["masks"] = masks
target["image_id"] = image_id
target["area"] = area
target["iscrowd"] = iscrowd
if self.transforms is not None:
img, target = self.transforms(img, target)
return img, target
def __len__(self):
return len(self.imgs)
def get_model_instance_segmentation(num_classes):
# load an instance segmentation model pre-trained pre-trained on COCO
model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=False) #bilo na True
# get number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features
# replace the pre-trained head with a new one
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
# now get the number of input features for the mask classifier
in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
hidden_layer = 256
# and replace the mask predictor with a new one
model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask,
hidden_layer,
num_classes)
return model
def get_transform(train):
transforms = []
transforms.append(T.ToTensor())
#if train:
#transforms.append(T.RandomHorizontalFlip(0.5))
return T.Compose(transforms)
def draw_loss(ml):
plt.figure(figsize=(10,5))
plt.title("Training Loss")
#plt.plot(val_losses,label="val")
plt.plot(ml,label="train")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend()
plt.xlim([0,80])
plt.ylim([0, 1.2])
plt.show()
def main():
# train on the GPU or on the CPU, if a GPU is not available
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
# our dataset has two classes only - background and person
num_classes = 2
# use our dataset and defined transformations
dataset = Moj_Dataset_ArT('Train/ArT', get_transform(train=True))
dataset_test = Moj_Dataset_ArT('Train/ArT', get_transform(train=False))
# split the dataset in train and test set
indices = torch.randperm(len(dataset)).tolist()
dataset = torch.utils.data.Subset(dataset, indices[:-500])
dataset_test = torch.utils.data.Subset(dataset_test, indices[-500:])
# define training and validation data loaders
data_loader = torch.utils.data.DataLoader(
dataset, batch_size=4, shuffle=True, num_workers=4,
collate_fn=utils.collate_fn)
data_loader_test = torch.utils.data.DataLoader(
dataset_test, batch_size=1, shuffle=False, num_workers=4,
collate_fn=utils.collate_fn)
# get the model using our helper function
model = get_model_instance_segmentation(num_classes)
# move model to the right device
model.to(device)
# construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.0005,
momentum=0.9, weight_decay=0.0005)
# and a learning rate scheduler
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
step_size=30,
gamma=0.1) # gamma bila na 0.5
# let's train it for 10 epochs
num_epochs = 200
PATH = 'home//Train/ArT/models/'
ml =[]
for epoch in range(num_epochs):
# train for one epoch, printing every 10 iterations
loss_value = train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=10)
ml.append(loss_value)
# update the learning rate
lr_scheduler.step()
torch.save(model, PATH)
# evaluate on the test dataset
#evaluate(model, data_loader_test, device=device)
print(ml)
draw_loss(ml)
if __name__ == "__main__":
main()
Something like this
When I run this after evalueate():
img, _ = dataset_test[20]
with torch.no_grad():
prediction = model([img.to(device)])
imag = Image.fromarray(img.mul(255).permute(1, 2, 0).byte().numpy())
imaag = Image.fromarray(prediction[0]['masks'][0, 0].mul(255).byte().cpu().numpy())
imag.show()
imaag.show()
I get this (not good image):
I also tried with Detectron2
but then I need to make cfg file and train on the other way, but I'm using this Pytorch tutorial

Related

How to get the filenames of misclassified images in the test data set - pytorch?

I am trying to use AlexNet to classify spectrogram images generated for 3s audio segments. I have successfully trained my dataset and am trying to identify which images the model misclassified.
I am able to obtain the filename of a image by calling iterator.dataset.data_df.adressfname, however I am unsure how to use this statement in the for loop in the get_predictions function. If I try to retrieve the filenames using iterator.dataset.data_df.adressfname[i], I get the following error: expected Tensor as element 0 in argument 0, but got str
Ultimately, I want to create a dataframe that contains the filename, actual label and predicted label. Does anyone have any suggestions?
class CustomDataset(Dataset):
def __init__(self, img_path, csv_file, transforms):
self.imgs_path = img_path
self.csv_train_file = csv_file
file_list = glob.glob(self.imgs_path + "*")
self.data = []
self.data_df = pd.read_csv(self.csv_train_file)
self.transforms = transforms
for ind in self.data_df.index:
img_path = self.data_df['spectrogramSegFilename'][ind]
class_name = self.data_df['dx'][ind]
self.data.append([img_path, class_name])
self.class_map = {"ProbableAD" : 0, "Control": 1}
self.img_dim = (256, 256)
def __len__(self):
return len(self.data)
def __getitem__(self, idx):
img_path, class_name = self.data[idx]
img = cv2.imread(img_path)
img = cv2.resize(img, self.img_dim)
class_id = self.class_map[class_name]
img_tensor = torch.from_numpy(img)
img_tensor = img_tensor.permute(2, 0, 1)
data = self.transforms(img_tensor)
class_id = torch.tensor([class_id])
return data, class_id
if __name__ == "__main__":
transformations = transforms.Compose([transforms.ToPILImage(), transforms.Resize(256), transforms.CenterCrop(256), transforms.ToTensor(), transforms.Normalize((0.49966475, 0.1840554, 0.34930056), (0.35317238, 0.17343724, 0.1894943))])
train_dataset = CustomDataset("/spectrogram_images/spectrogram_train/", "train_features_segmented.csv", transformations)
test_dataset = CustomDataset("/spectrogram_images/spectrogram_test/", "test_features_segmented.csv", transformations)
train_dataset = CustomDataset("spectrogram_images/spectrogram_train/", "/train_features_segmented.csv")
test_dataset = CustomDataset("spectrogram_images/spectrogram_test/", "/test_features_segmented.csv")
train_data_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_data_loader = DataLoader(test_dataset, batch_size=64, shuffle=True)
def get_predictions(model, iterator, device):
model.eval()
images = []
labels = []
probs = []
participant_ids = []
with torch.no_grad():
for i, (x, y) in enumerate(iterator):
x = x.to(device)
y_pred = model(x)
y_prob = F.softmax(y_pred, dim=-1)
participant_ids.append(iterator.dataset.data_df.adressfname[i])
images.append(x.cpu())
labels.append(y.cpu())
probs.append(y_prob.cpu())
images = torch.cat(images, dim=0)
labels = torch.cat(labels, dim=0)
probs = torch.cat(probs, dim=0)
participant_ids = torch.cat(participant_ids, dim=0)
return images, labels, probs, participant_ids
Just add a third field to the return signature of __getitem__():
def __getitem__(self,idx):
...
return data,class_id,img_path
Then, when you call the dataloader:
for i, (x,y,img_paths) in enumerate(iterator):
... call model ...
... compare outputs to labels ...
... identify incorrect batch indices
mislabeled_files = [img_paths[idx] for idx in incorrect_batch_indices]
And there you have it. Note that in your current code block, i indexes the batch index, which really has nothing to do with the dataset object wrapped in the dataloader (both because the dataloader has a batch size so it collates multiple elements from the dataset into a single index, and because the dataloader shuffles the elements in the dataset) so you should not use this index (i) to reference the dataset object. If you wanted to reference the underlying data items in the dataset object you could instead simply return the data idx in __getitem__():
def __getitem__(self,idx):
...
return data,class_id,idx
You can then use this idx to reference the original dataset data and get the file names that way.

tensorboard,, the scalar value is not recorded

The scalar value is not being recorded as in these pictures.
enter image description here 2.
enter image description here
I don't know if I'm doing the wrong with code for training or code for tensorboard.
It's my first time using a tensorboard, so I don't know if I wrote the correct code for viewing the tensorboard.
If the scalar value is not recorded as shown in the attached image even though the code for the tensorboard is written correctly, I would like to look at the code part for training.
Therefore, I would appreciate it if you could check code only from recording the scalar value to verifying it with the tensorboard
part of the main function of <run.py>
def main():
args = parser.parse_args()
assert args.n_views == 2, "Only two view training is supported. Please use --n-views 2."
# check if gpu training is available
if not args.disable_cuda and torch.cuda.is_available():
args.device = torch.device('cuda')
cudnn.deterministic = True
cudnn.benchmark = True
else:
args.device = torch.device('cpu')
args.gpu_index = -1
dataset = ContrastiveLearningDataset(args.data)
train_dataset = dataset.__getitem__(args.dataset_name, args.n_views, 1)
train_loader = torch.utils.data.DataLoader(
train_dataset, batch_size=args.batch_size, shuffle=True,
num_workers=args.workers, pin_memory=True, drop_last=True)
#f(*)
model = ResNetSimCLR(base_model=args.arch, out_dim=args.out_dim)
optimizer = torch.optim.Adam(model.parameters(), args.lr, weight_decay=args.weight_decay)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=len(train_loader), eta_min=0,
last_epoch=-1)
with torch.cuda.device(args.gpu_index):
simclr = SimCLR(model=model, optimizer=optimizer, scheduler=scheduler, args=args)
simclr.train(train_loader)
simclr.writer.flush()
<simclr.py> to write train function in main()
class SimCLR(object):
def __init__(self, *args, **kwargs):
self.args = kwargs['args']
self.model = kwargs['model'].to(self.args.device)
self.optimizer = kwargs['optimizer']
self.scheduler = kwargs['scheduler']
self.writer = SummaryWriter()
logging.basicConfig(filename=os.path.join(self.writer.log_dir, 'training.log'), level=logging.DEBUG)
self.criterion = torch.nn.CrossEntropyLoss().to(self.args.device)
def info_nce_loss(self, features):
labels = torch.cat([torch.arange(self.args.batch_size) for i in range(self.args.n_views)], dim=0)
labels = (labels.unsqueeze(0) == labels.unsqueeze(1)).float()
labels = labels.to(self.args.device)
features = F.normalize(features, dim=1)
similarity_matrix = torch.matmul(features, features.T)
mask = torch.eye(labels.shape[0], dtype=torch.bool).to(self.args.device)
labels = labels[~mask].view(labels.shape[0], -1)
similarity_matrix = similarity_matrix[~mask].view(similarity_matrix.shape[0], -1)
positives = similarity_matrix[labels.bool()].view(labels.shape[0], -1)
negatives = similarity_matrix[~labels.bool()].view(similarity_matrix.shape[0], -1)
logits = torch.cat([positives, negatives], dim=1)
labels = torch.zeros(logits.shape[0], dtype=torch.long).to(self.args.device)
logits = logits / self.args.temperature
return logits, labels
def train(self, train_loader):
scaler = GradScaler(enabled=self.args.fp16_precision)
# save config file
save_config_file(self.writer.log_dir, self.args)
n_iter = 0 #global optimization step
logging.info(f"Start SimCLR training for {self.args.epochs} epochs.")
logging.info(f"Training with gpu: {self.args.disable_cuda}.")
for epoch_counter in range(self.args.epochs):
for images, _ in tqdm(train_loader):
images = torch.cat(images, dim=0)
images = images.to(self.args.device)
with autocast(enabled=self.args.fp16_precision):
features = self.model(images)
logits, labels = self.info_nce_loss(features)
loss = self.criterion(logits, labels)
self.optimizer.zero_grad()
scaler.scale(loss).backward()
scaler.step(self.optimizer)
scaler.update()
if n_iter % self.args.log_every_n_steps == 0:
top1, top5 = accuracy(logits, labels, topk=(1, 5))
self.writer.add_scalar('loss', loss, global_step=n_iter)
self.writer.add_scalar('acc/top1', top1[0], global_step=n_iter)
self.writer.add_scalar('acc/top5', top5[0], global_step=n_iter)
self.writer.add_scalar('learning_rate', self.scheduler.get_lr()[0], global_step=n_iter)
n_iter += 1
# warmup for the first 10 epochs
if epoch_counter >= 1:
try:
self.scheduler.step()
except ZeroDivisionError:
print("ZeroDivision")
logging.debug(f"Epoch: {epoch_counter}\tLoss: {loss}\tTop1 accuracy: {top1[0]}")
logging.info("Training has finished.")
# save model checkpoints
checkpoint_name = 'checkpoint_{:04d}.pth.tar'.format(self.args.epochs)
save_checkpoint({
'epoch': self.args.epochs,
'arch': self.args.arch,
'state_dict': self.model.state_dict(),
'optimizer': self.optimizer.state_dict(),
}, is_best=False, filename=os.path.join(self.writer.log_dir, checkpoint_name))
logging.info(f"Model checkpoint and metadata has been saved at {self.writer.log_dir}.")

ModuleNotFoundError: No module named 'torch.hub'

I am using the following packages in my project:
boto3==1.20.19
flashtorch==0.1.3
matplotlib==3.3.4
numpy==1.13.3
Pillow==8.4.0
scikit_learn==1.0.1
scipy==1.5.4
seaborn==0.11.2
torch==1.10.0
torchvision==0.11.1
When I try to execute the following dependencies:
import seaborn as sns
import copy
import boto3
from scipy.stats import spearmanr
import random
import csv
from sklearn.model_selection import train_test_split
import copy
import time
import numpy as np
import os
import pickle
import torch
from torchvision import transforms
import torchvision.models as models
from torch.utils import data
import matplotlib.pyplot as plt
from PIL import Image
from sklearn.metrics import f1_score
from scipy.stats import pearsonr
from flashtorch.saliency import Backprop
from scipy.ndimage.filters import gaussian_filter
def split_train_test(test_size=0.1):
# Make partition directory.
print("make partition directory if not exists")
partition_p = os.path.join("..", "data", "partition")
if not os.path.exists(partition_p):
os.mkdir(partition_p)
# List all input filenames and shuffle.
base_p = os.path.join("..", "data", "imgs")
img_ps = [] # e.g. ../data/imgs/MINT/foo.jpg.
for dir_name in os.listdir(base_p):
dir_p = os.path.join(base_p, dir_name)
for img_fname in os.listdir(dir_p):
img_p = os.path.join(dir_name, img_fname)
img_ps.append(img_p)
random.shuffle(img_ps)
# Keep only baseball cards in MINT or POOR condition, for now.
print("Keep only baseball cards, for now")
baseball_sport = "185223"
img_ps = [p for p in img_ps if baseball_sport in p]
# Keep only desired conditions.
print("Exclude good and fair cards to create a 9, 7, 5, 3, 1 scale")
img_ps = [p for p in img_ps if "GOOD" not in p and "FAIR" not in p]
# Remove "bad" images.
# Try to load each image, and only retain it if we can load it
# successfully as a (255, 255, 3) image.
print("remove 'bad' images")
old_img_ps = copy.copy(img_ps)
img_ps = []
counter = 0
for p in old_img_ps:
try:
# counter.
counter += 1
if counter % 100 == 0:
print(counter)
# validate image.
full_p = os.path.join("..", "data", "imgs", p)
img = Image.open(full_p)
img = np.array(img)
assert img.shape[2] == 3
# add image to paths.
img_ps.append(p)
except Exception as e:
print("Skip bad image {}".format(p))
print(e)
# Train/test split.
print("split train, test")
train_ps, test_ps = train_test_split(img_ps, test_size=test_size)
# Create partition dictionary.
print("create partition object")
partition = {
"train": train_ps,
"test": test_ps
}
# Pickle partition object.
of_p = os.path.join(partition_p, "partition.p")
with open(of_p, "wb") as of:
pickle.dump(partition, of)
print("wrote {}".format(of_p))
# Map all unique labels to integer IDs.
# lbl_to_idx is a dictinary mapping e.g. "EX" -> 0, etc.
# str_labels = ["MINT", "NM", "EX", "VG", "GOOD", "FAIR", "POOR"]
str_labels = ["MINT", "NM", "EX", "VG", "POOR"]
lbl_to_idx = {lbl: i for i, lbl in enumerate(str_labels)}
# Create labels dictionary.
# fname_to_lbl is a dictionary mapping e.g. "some_fname.npy" -> 2
# Example filename.100097_sport185226_conditionVG_preprocessed.npy
fname_to_lbl = {}
for p in img_ps:
# Extract label.
str_lbl = p.split("_")[-1].lstrip("condition").rstrip(".jpg")
int_lbl = lbl_to_idx[str_lbl]
# Add to dictionary.
fname_to_lbl[p] = int_lbl
# Pickle fname_to_lbl mapping.
of_p = os.path.join(partition_p, "labels.p")
with open(of_p, "wb") as of:
pickle.dump(fname_to_lbl, of)
print("wrote {}".format(of_p))
class Dataset(data.Dataset):
def __init__(self, list_IDs, labels, apply_rotations=False):
"""
parameters
----------
list_IDs (str []) file paths.
labels (str []) target labels.
apply_rotations (bool) : if True then randomly rotate images.
NOTE! This should be FALSE for testing data.
"""
self.labels = labels # dictionary. key=fname, value=integer lbl.
self.list_IDs = list_IDs # list of filenames.
self.apply_rotations = apply_rotations
self.rotation_fn = transforms.Compose([
transforms.RandomAffine(degrees=90, shear=90)
])
# self.rotation_fn = transforms.RandomRotation(90) # torch rotation function.
# Initialize AWS storage (s3) resource.
self.s3_resource = boto3.resource("s3")
# Make ML_images directory and subdirectories.
conditions = ["MINT", "NM", "EX", "VG", "FAIR", "GOOD", "POOR"]
base_p = os.path.join("..", "data", "ml_images")
if not os.path.exists(base_p):
os.mkdir(base_p)
for condition in conditions:
p = os.path.join(base_p, condition)
os.mkdir(p)
def __len__(self):
return len(self.list_IDs)
def download_from_s3(self, remote_fname):
# download fname from s3 as ../data/ml_images/{REMOTE FNAME}
local_fname = os.path.join("..", "data", "ml_images", remote_fname)
print("download {}".format(remote_fname))
# download image from S3 as "img.jpg"
resp = self.s3_resource.Object(
"mintcondition",
remote_fname
).download_file(local_fname)
return None
def load_and_preprocess_img(self, img_p, apply_rotations=False):
""" load and preprocess img, optionally applying rotations
parameters:
-----------
img_p (str) path to image.
apply_rotations (bool) if True apply rotations to the image.
should be false to all testing.
returns:
--------
X (torch tensor) Image as torch tensor of shape (3, 255, 255)
"""
# Load image and reshape to (3, 255, 255)
img = Image.open(img_p)
img = img.resize((255, 255), Image.ANTIALIAS)
# optionally rotate.
if apply_rotations:
img = self.rotation_fn(img)
# Cast to torch tensor.
X = np.array(img) # (255, 255, 3) numpy
assert X.shape == (255, 255, 3)
X = X / 255 # "normalize"
X = X.swapaxes(1, 2) # (255, 3, 255)
X = X.swapaxes(0, 1) # (3, 255, 255) numpy
X = torch.from_numpy(X).float() # (3, 255, 255) torch
return X
def __getitem__(self, index):
try:
# Select sample.
remote_p = self.list_IDs[index]
# Get label.
y = self.labels[remote_p]
# If the image does not exist locally, get it from S3.
local_p = os.path.join("..", "data", "ml_images", remote_p)
if not os.path.exists(local_p):
self.download_from_s3(remote_p)
# Load image and reshape to torch tensor of shape (3, 255, 255)
X = self.load_and_preprocess_img(local_p, apply_rotations=self.apply_rotations) # torch tensor.
except Exception as e:
print(e)
print("exception loading data..using random image, label instead")
X = np.random.random((3, 255, 255))
X = torch.from_numpy(X).float()
y = 0
return X, y
def set_parameter_requires_grad(model, feature_extracting):
if feature_extracting:
for param in model.parameters():
param.requires_grad = False
def reenable_gradients(model):
for param in model.parameters():
param.requires_grad = True
def initialize_model(model_name, num_classes, feature_extract, use_pretrained=True):
model_ft = None
input_size = 0
if model_name == "resnet":
""" Resnet18
"""
model_ft = models.resnet18(pretrained=use_pretrained)
set_parameter_requires_grad(model_ft, feature_extract)
num_ftrs = model_ft.fc.in_features
model_ft.fc = torch.nn.Linear(num_ftrs, num_classes)
input_size = 224
elif model_name == "alexnet":
""" Alexnet
"""
model_ft = models.alexnet(pretrained=use_pretrained)
set_parameter_requires_grad(model_ft, feature_extract)
num_ftrs = model_ft.classifier[6].in_features
model_ft.classifier[6] = torch.nn.Linear(num_ftrs, num_classes)
input_size = 224
elif model_name == "vgg":
model_ft = models.vgg11_bn(pretrained=use_pretrained)
set_parameter_requires_grad(model_ft, feature_extract)
num_ftrs = model_ft.classifier[6].in_features
model_ft.classifier[6] = nn.Linear(num_ftrs, num_classes)
input_size = 224
elif model_name == "squeezenet":
model_ft = models.squeezenet1_0(pretrained=use_pretrained)
set_parameter_requires_grad(model_ft, feature_extract)
model_ft.classifier[1] = nn.Conv2d(512, num_classes, kernel_size=(1, 1), stride=(1, 1))
model_ft.num_classes = num_classes
input_size = 224
elif model_name == "densenet":
model_ft = models.densenet121(pretrained=use_pretrained)
set_parameter_requires_grad(model_ft, feature_extract)
num_ftrs = model_ft.classifier.in_features
model_ft.classifier = nn.Linear(num_ftrs, num_classes)
input_size = 224
elif model_name == "inception":
""" Inception v3
Be careful, expects (299,299) sized images and has auxiliary output
"""
model_ft = models.inception_v3(pretrained=use_pretrained)
set_parameter_requires_grad(model_ft, feature_extract)
# Handle the auxilary net
num_ftrs = model_ft.AuxLogits.fc.in_features
model_ft.AuxLogits.fc = nn.Linear(num_ftrs, num_classes)
# Handle the primary net
num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Linear(num_ftrs, num_classes)
input_size = 299
else:
print("Invalid model name, exiting...")
exit()
return model_ft, input_size
def train_model(model, dataloaders, criterion, optimizer, num_epochs=25, is_inception=False, device="cpu"):
since = time.time()
val_acc_history = []
best_model_wts = copy.deepcopy(model.state_dict())
best_acc = 0.0
# Initiailize a file to track accuracy over epochs.
acc_of_p = os.path.join("..", "data", "model_accuracy.csv")
acc_of = open(acc_of_p, "w", newline="")
header = ["epoch", "phase", "accuracy", "F", "r"]
w = csv.writer(acc_of)
w.writerow(header)
for epoch in range(num_epochs):
print('Epoch {}/{}'.format(epoch, num_epochs - 1))
print('-' * 10)
# Each epoch has a training and validation phase
for phase in ['train', 'test']:
if phase == 'train':
model.train() # Set model to training mode
else:
model.eval() # Set model to evaluate mode
running_loss = 0.0
running_corrects = 0.0
ypreds = []
ytrues = []
# Iterate over data.
batch_num = 0
for inputs, labels in dataloaders[phase]:
inputs = inputs.to(device)
labels = labels.to(device)
# zero the parameter gradients
optimizer.zero_grad()
# forward
# track history if only in train
with torch.set_grad_enabled(phase == 'train'):
# Get model outputs and calculate loss
# Special case for inception because in training it has an auxiliary output. In train
# mode we calculate the loss by summing the final output and the auxiliary output
# but in testing we only consider the final output.
if is_inception and phase == 'train':
# From https://discuss.pytorch.org/t/how-to-optimize-inception-model-with-auxiliary-classifiers/7958
outputs, aux_outputs = model(inputs)
loss1 = criterion(outputs, labels)
loss2 = criterion(aux_outputs, labels)
loss = loss1 + 0.4 * loss2
else:
outputs = model(inputs)
loss = criterion(outputs, labels)
_, preds = torch.max(outputs, 1)
# backward + optimize only if in training phase
if phase == 'train':
loss.backward()
optimizer.step()
# statistics
running_loss += loss.item() * inputs.size(0)
running_corrects += torch.sum(preds == labels.data)
F = f1_score(preds.numpy(), labels.data.numpy(), average="micro")
ypreds.extend(list(preds.numpy()))
ytrues.extend(list(labels.data.numpy()))
# counter.
batch_num += 1
if batch_num % 1 == 0:
correct = float(torch.sum(preds == labels.data))
incorrect = float(torch.sum(preds != labels.data))
perc_correct = 100 * correct / (correct + incorrect)
msg = """
epoch {} batch {} : percent correct={:.4f} F={:.4f}
""".format(epoch, batch_num, perc_correct, F)
print(msg)
print(preds)
print(labels.data)
# rank correlation of predicted, actual.
rho, p = spearmanr(preds.numpy(), labels.data.numpy())
print("correlation of pred, actual: rho = {:.4f}".format(rho))
epoch_loss = running_loss / len(dataloaders[phase].dataset)
epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)
epoch_F = f1_score(ypreds, ytrues, average="micro")
epoch_R = pearsonr(ypreds, ytrues)[0]
print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))
# deep copy the model
if phase == 'val' and epoch_acc > best_acc:
best_acc = epoch_acc
best_model_wts = copy.deepcopy(model.state_dict())
if phase == 'val':
val_acc_history.append(epoch_acc)
# Write latest train and test accuracies to output file.
out = [epoch, phase, epoch_acc.numpy(), epoch_F, epoch_R]
w.writerow(out)
acc_of.flush()
# Pickle the model after end of epoch.
of_p = os.path.join("..", "models", "latest_model.p")
with open(of_p, "wb") as of:
pickle.dump(model, of)
print("wrote {}".format(of_p))
time_elapsed = time.time() - since
print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
print('Best val Acc: {:4f}'.format(best_acc))
# close output file.
of.flush()
of.close()
print("wrote {}".format(acc_of_p))
# load best model weights
model.load_state_dict(best_model_wts)
return model, val_acc_history
def train_CNN_model(num_classes=5, load_latest_model=False):
# Use CUDA if available.
use_cuda = torch.cuda.is_available()
print("check if CUDA can be used -> {}".format(use_cuda))
device = torch.device("cuda:0" if use_cuda else "cpu")
# Define transformations of the images.
# Note that the normalization transform with these specific
# Parameters is necessary for working with pretrained AlexNet.
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
img_transforms = transforms.Compose([
normalize
])
# Parameters
params = {
"batch_size": 64,
"shuffle": True,
"num_workers": 4
}
max_epochs = 10
# Datasets.
print("load train/test partition and labels")
partition_p = os.path.join("..", "data", "partition", "partition.p")
labels_p = os.path.join("..", "data", "partition", "labels.p")
partition = pickle.load(open(partition_p, "rb"))
labels = pickle.load(open(labels_p, "rb"))
print("create generators over train, test data")
train_set = Dataset(partition["train"], labels, apply_rotations=True) # do rotate train data.
train_g = data.DataLoader(train_set, **params)
test_set = Dataset(partition["test"], labels, apply_rotations=False) # don't rotate test data.
test_g = data.DataLoader(test_set, **params)
if load_latest_model:
print("load pickled latest_model.p")
model_p = os.path.join("..", "models", "latest_model.p")
model = pickle.load(open(model_p, "rb"))
else:
model, _ = initialize_model(
model_name="resnet", # resnet 18 with skip connections.
num_classes=num_classes,
feature_extract=True, # if True only finetune top layer.
use_pretrained=True
)
model.to(device)
# Initialize optimizer, loss criterion.
optimizer = torch.optim.Adam(model.parameters())
criterion = torch.nn.CrossEntropyLoss()
# Train model.
model, history = train_model(
model=model,
dataloaders={"train": train_g, "test": test_g},
criterion=criterion,
optimizer=optimizer,
num_epochs=max_epochs,
is_inception=False,
device=device
)
# Pickle best performing model.
of_p = os.path.join("..", "models", "best_model.p")
with open(of_p, "wb") as of:
pickle.dump(model, of)
print("wrote {}".format(of_p))
# Pickle history of best performing model.
of_p = os.path.join("..", "models", "history.p")
with open(of_p, "wb") as of:
pickle.dump(history, of)
print("wrote {}".format(of_p))
def load_and_preprocess_img(img_p):
# Load image and reshape to (3, 255, 255)
img = Image.open(img_p)
img = img.resize((255, 255), Image.ANTIALIAS)
# Cast to torch tensor.
X = np.array(img) # (255, 255, 3) numpy
X = X.reshape((1, 255, 255, 3))
X = X / 255 # "normalize"
X = X.swapaxes(2, 3) # (64, 255, 3, 255)
X = X.swapaxes(1, 2) # (64, 3, 255, 255) numpy
X = torch.from_numpy(X).float() # (64, 3, 255, 255) torch
return X
def saliency_map(model, img_p):
# Load and preprocess image.
X = load_and_preprocess_img(img_p) # (3, 255, 255 torch tensor)
X.requires_grad_() # This is critical to actually get gradients.
with torch.set_grad_enabled(True):
backprop = Backprop(model) # flashtorch.saliency Backprop object.
gradients = backprop.calculate_gradients(X, take_max=True, guided=False) # (1, 255, 255)
# Cast image, saliency maps to numpy arrays.
X = X.detach() # must 'detach' from gradients before slicing.
img_np = X.numpy()[0] # (3, 255, 255)
img_np = img_np.swapaxes(0, 1) # (255, 3, 255)
img_np = img_np.swapaxes(1, 2) # (255, 255, 3)
saliency_map_np = gradients.numpy()[0] # (255, 255)
print(max(np.max(saliency_map_np, axis=0)))
print(saliency_map_np)
print(img_np.shape)
print(saliency_map_np.shape)
# Smooth heatmap.
saliency_map_np = gaussian_filter(saliency_map_np, sigma=10)
# Plot image and overlay saliency map.
heatmap = sns.heatmap(saliency_map_np, alpha=0.5)
heatmap.imshow(img_np, cmap="YlGnBu")
plt.show()
return saliency_map_np
if __name__ == "__main__":
# split_train_test()
# train_CNN_model(load_latest_model=False, num_classes=5)
model_p = os.path.join("..", "models", "cloud_model.p")
model = pickle.load(open(model_p, "rb"))
reenable_gradients(model) # e.g., disable fine-tuning mode
# img_p = os.path.join("..", "demos", "demo_cards",
# "very_good.jpg")
img_p = os.path.join("..", "data", "imgs", "POOR",
"9078_sport185223_conditionPOOR.jpg")
sm = saliency_map(model, img_p)
I get the following error:
Traceback (most recent call last):
File "/home/admin/Desktop/Code/project/venv/lib/python3.6/site-packages/torchvision/_internally_replaced_utils.py", line 14, in <module>
from torch.hub import load_state_dict_from_url
ModuleNotFoundError: No module named 'torch.hub'
Any suggestions why? What is wrong with my installed packages?
I appreciate your replies!

How do I predict using a PyTorch model?

I created a pyTorch Model to classify images.
I saved it once via state_dict and the entire model like that:
torch.save(model.state_dict(), "model1_statedict")
torch.save(model, "model1_complete")
How can i use these models?
I'd like to check them with some images to see if they're good.
I am loading the model with:
model = torch.load(path_model)
model.eval()
This works alright, but i have no idea how to use it to predict on a new picture.
def predict(self, test_images):
self.eval()
# model is self(VGG class's object)
count = test_images.shape[0]
result_np = []
for idx in range(0, count):
# print(idx)
img = test_images[idx, :, :, :]
img = np.expand_dims(img, axis=0)
img = torch.Tensor(img).permute(0, 3, 1, 2).to(device)
# print(img.shape)
pred = self(img)
pred_np = pred.cpu().detach().numpy()
for elem in pred_np:
result_np.append(elem)
return result_np
network is VGG-19 and ref my source code.
like this architecture:
class VGG(object):
def __init__(self):
...
def train(self, train_images, valid_images):
train_dataset = torch.utils.data.Dataset(train_images)
valid_dataset = torch.utils.data.Dataset(valid_images)
trainloader = torch.utils.data.DataLoader(train_dataset)
validloader = torch.utils.data.DataLoader(valid_dataset)
self.optimizer = Adam(...)
self.criterion = CrossEntropyLoss(...)
for epoch in range(0, epochs):
...
self.evaluate(validloader, model=self, criterion=self.criterion)
...
def evaluate(self, dataloader, model, criterion):
model.eval()
for i, sample in enumerate(dataloader):
...
def predict(self, test_images):
...
if __name__ == "__main__":
network = VGG()
trainset, validset = get_dataset() # abstract function for showing
testset = get_test_dataset()
network.train(trainset, validset)
result = network.predict(testset)
A pytorch model is a function. You provide it with appropriately defined input, and it returns an output. If you just want to visually inspect the output given a specific input image, simply call it:
model.eval()
output = model(example_image)

Mapping images and their labels

I have images in a folder(train) and csv file(train.csv) containing image names and labels.
how to map images in one folder and labels in another csv file
how can i create a data frame with image data and labels.
multiclass classification
import tensorflow as tf
from tensorflow import keras
import pandas as pd
class MyTrainingData(keras.utils.Sequence):
def __init__(self, file, labels, batchSize):
self.file = file
self.label = labels
self.batchSize = batchSize
self.n_bathces = int(len(self.file) / self.batchSize)
def on_epoch_end(self): # it is called after every epoch
self.file, self.label = shuffle(self.file, self.label)
for i in range(50):
print(self.file[i], self.label[i], 'file-label')
def __len__(self):
return self.n_bathces
# called after every batch to get new batch or new 32 images and labels
def __getitem__(self, idx):
# this method calls by fit method with idx ranging from 0 to len(training_exmaples) / batch_size =
batchX = self.file[idx*self.batchSize: (idx+1)*self.batchSize]
batchY = self.label[idx*self.batchSize: (idx+1)*self.batchSize]
imgFiles = [image.load_img(name, target_size=(224, 224, 3)) for name in batchX] #loading 32 images
imgFiles = [image.img_to_array(img) for img in imgFiles] #preprocessing
imgFiles = [img / 255 for img in imgFiles] batchY = to_categorical(batchY, 4) # 4 represent number of classes (4 in that case)
return np.array(imgFiles), np.array(batchY)
def getfilePath(filenames):
path = './train/' # or any other path according to directory structure
filePaths = []
for name in filenames:
filePaths.append(path + name) # './train/' + 'img1.jpg' = './train/img1.jpg
return filePaths
df = pd.read_csv()
img_names = df['img_names']
labels = df['labels']
img_names = ['img1.jpg', 'img2.jpg', -----]
img_names = getfilePath(img_names)
img_names = ['./train/img1.jpg', './train/img2.jpg', -----]
label = [3, 1, 2, 0, ----]
batch_size = 32
data = MyTrainingData(fileNames, labels, batchSize
model = defineModel()
sgd = SGD(learning_rate=0.0001, momentum=0.9, nesterov=True)
model.compile(optimizer=sgd, loss='binary_crossentropy', metrics=['accuracy'])
model.fit(data, epochs=10, verbose=1)
The above code does much more than mapping. In case of large dataset(to big to fit in RAM). this techniwue will help you to load, preprocess and generate input data dynamically. Hope you find it usefull. Give feedback so that i can further improve it.

Categories