I have an object detection dataset with RGB images and annotations in Json. I use a custom DataLoader class to read the images and the labels. One issue that I’m facing is that I would like to skip images when training my model if/when labels don’t contain certain objects.
For example, If one image doesn’t contain any target labels belonging to the class ‘Cars’, I would like to skip them. When parsing my Json annotation, I tried checking for labels that don’t contain the class ‘Cars’ and returned None. Subsequently, I used a collate function to filter the None but unfortunately, It is not working.
import torch
from torch.utils.data.dataset import Dataset
import json
import os
from PIL import Image
from torchvision import transforms
#import cv2
import numpy as np
general_classes = {
# Cars
"Toyota Corolla" : 0,
"VW Golf" : 0,
"VW Beetle" : 0,
# Motor-cycles
"Harley Davidson" : 1,
"Yamaha YZF-R6" : 1,
}
car_classes={
"Toyota Corolla" : 0,
"VW Golf" : 0,
"VW Beetle" : 0
}
def get_transform(train):
transforms = []
# converts the image, a PIL image, into a PyTorch Tensor
transforms.append(T.ToTensor())
if train:
# during training, randomly flip the training images
# and ground-truth for data augmentation
transforms.append(T.RandomHorizontalFlip(0.5))
return T.Compose(transforms)
def my_collate(batch):
batch = list(filter(lambda x: x is not None, batch))
return torch.utils.data.dataloader.default_collate(batch)
class FilteredDataset(Dataset):
# The dataloader will skip the image and corresponding labels based on the dictionary 'car_classes'
def __init__(self, data_dir, transforms):
self.data_dir = data_dir
img_folder_list = os.listdir(self.data_dir)
self.transforms = transforms
imgs_list = []
json_list = []
self.filter_count=0
self.filtered_label_list=[]
for img_path in img_folder_list:
#img_full_path = self.data_dir + img_path
img_full_path=os.path.join(self.data_dir,img_path)
json_file = os.path.join(img_full_path, 'annotations-of-my-images.json')
img_file = os.path.join(img_full_path, 'Image-Name.png')
json_list.append(json_file)
imgs_list.append(img_file)
self.imgs = imgs_list
self.annotations = json_list
total_count=0
for one_annotation in self.annotations:
filtered_obj_id=[]
with open(one_annotation) as f:
img_annotations = json.load(f)
parts_list = img_annotations['regions']
for part in parts_list:
current_obj_id = part['tags'][0] # bbox label
check_obj_id = general_classes[current_obj_id]
if(check_obj_id==0):
subclass_id=car_classes[current_obj_id]
filtered_obj_id.append(subclass_id)
total_count=total_count+1
if(len(filtered_obj_id)>0):
self.filter_count=self.filter_count+1
self.filtered_label_list.append(one_annotation)
print("The total number of the objects in all images: ",total_count)
# get one image and the bboxes,img_id, labels of parts, etc in the image as target.
def __getitem__(self, idx):
img_path = self.imgs[idx]
image_id = torch.tensor([idx])
with open(self.annotations[idx]) as f:
img_annotations = json.load(f)
parts_list = img_annotations['regions']
obj_ids = []
boxes = []
for part in parts_list:
obj_id = part['tags'][0]
check_obj_id = general_classes[obj_id]
if(check_obj_id==0):
obj_id=car_classes[obj_id]
obj_ids.append(obj_id)
#print("---------------------------------------------------")
if(len(obj_ids)>0):
img = Image.open(img_path).convert("RGB")
labels = torch.as_tensor(obj_ids, dtype = torch.int64)
target = {}
target['labels'] = labels
if self.transforms is not None:
img, target = self.transforms(img, target)
return img, target
else:
return None
def __len__(self):
return len(self.filtered_label_list)
train_data_path = "path-to-my-annotation"
# Generators
train_dataset = FilteredDataset(train_data_path,get_transform(train=True))
print("Total files in the train_dataset: ",len(train_dataset))
#print("The first instance in the train dataset : ",train_dataset[0])
#training_generator = torch.utils.data.DataLoader(train_dataset)
training_generator = torch.utils.data.DataLoader(train_dataset,collate_fn=my_collate)
print("\n\n Iterator in action! ")
print("---------------------------------------------------------")
count=0
for img,target in training_generator:
#print("The img name : ",img[0])
count=count+1
print("target name : ",target)
print("count : ",count)
print("**************************************************")
However, I get the following error,
Could anyone please suggest a way to skip the images that do not contain a particular categorical label?
Related
I followed this tutorial for building an object detector and now I am trying to expand it to recognise more classes.
This object detection algorithm only works for one class (raccoon), however it is possible to train the neural network with bigger datasets to recognise more classes.
It uses a pretrained faster_rcnn_resnet50 network.
Could someone help me adapt the code so that it can distinguish more than one class?
Thanks in advance!
class RaccoonDataset(torch.utils.data.Dataset):
def __init__(self, root, data_file, transforms=None):
self.root = root
self.transforms = transforms
self.imgs = sorted(os.listdir(os.path.join(root, "images")))
self.path_to_data_file = data_file
def __getitem__(self, idx):
# load images and bounding boxes
img_path = os.path.join(self.root, "images", self.imgs[idx])
img = Image.open(img_path).convert("RGB")
box_list = parse_one_annot(self.path_to_data_file,
self.imgs[idx])
boxes = torch.as_tensor(box_list, dtype=torch.float32)
num_objs = len(box_list)
# there is only one class
labels = torch.ones((num_objs,), dtype=torch.int64)
image_id = torch.tensor([idx])
area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
# suppose all instances are not crowd
iscrowd = torch.zeros((num_objs,), dtype=torch.int64)
target = {}
target["boxes"] = boxes
target["labels"] = labels
target["image_id"] = image_id
target["area"] = area
target["iscrowd"] = iscrowd
if self.transforms is not None:
img, target = self.transforms(img, target)
return img, target
def __len__(self):
return len(self.imgs)
I'm trying to test my custom CNN model, so I want to train my model on a tiny subset and see if it can overfit.My question is as follows: How can I get only a few classes(say 5) for training?
Here's my dataloader:
train_data = CUB200.CUB200(transform=transform_train, train=True)
test_data = CUB200.CUB200(transform=transform_test, train=False)
trainloader = torch.utils.data.DataLoader(dataset=train_data, batch_size=32, shuffle=True, num_workers=0, drop_last=True)
testloader = torch.utils.data.DataLoader(dataset=test_data, batch_size=32, shuffle=True, num_workers=0)
custom data:
import os
import pandas as pd
from torchvision.datasets.folder import default_loader
from torch.utils.data import Dataset
base_folder = '../../data/CUB_200_2011/'
class CUB200(Dataset):
def __init__(self, base_folder=base_folder, transform=None, train=True):
self.base_folder = base_folder
self.transform = transform
self.train = train
images = pd.read_csv(os.path.join(base_folder, 'images.txt'), sep=' ',
names=['img_id', 'filepath'])
image_class_labels = pd.read_csv(os.path.join(base_folder, 'image_class_labels.txt'),
sep=' ', names=['img_id', 'target'])
train_test_split = pd.read_csv(os.path.join(base_folder, 'train_test_split.txt'),
sep=' ', names=['img_id', 'is_training_img'])
data = images.merge(image_class_labels, on='img_id')
data = data.merge(train_test_split, on='img_id')
if self.train:
self.data = data[data.is_training_img == 1]
else:
self.data = data[data.is_training_img == 0]
def __len__(self):
return len(self.data)
def __getitem__(self, index):
sample = self.data.iloc[index]
path = os.path.join(base_folder, 'images', sample.filepath)
img = default_loader(path)
# Target start from 1 in data, so shift to 0
label = sample.target - 1
if self.transform is not None:
img = self.transform(img)
return img, label
image folder:
I know we can use torch.utils.data.Subset in pytorch for doing this. But if I do this way, I must count the indices of the first 5 classes. Is there any more convient way to do this?
I have a dataset/records stored in a list of dictionaries. The dictionary can be pretty complex. I would like to load this list through TensorFlow dataset API. How can I do this? I tried something like this however, it is not working:
import tensorflow as tf
import json
LABELS_IDS = ["cat", "dog", "animal"]
def parse_record(record):
image = tf.io.read_file(record["_file"])
image = tf.image.decode_jpeg(image)
image = tf.image.convert_image_dtype(image, tf.float32)
image = tf.image.resize(image, [224, 224])
image = tf.image.random_flip_left_right(image, seed=None)
labels = []
for element in record["_categories"]:
if element in LABELS_IDS:
labels.append(LABELS_IDS.index(element))
one_hot_labels = tf.reduce_sum(tf.one_hot(labels, len(LABELS_IDS)), axis=0)
return image, one_hot_labels
records = [{"_file":"images/test.jpg", "_categories": ["cat", "animal"]}]
train_x = tf.data.Dataset.from_tensor_slices(records).map(parse_record)
Edit:
I found the answer to this, you can simply map records to different methods:
LABELS_IDS = ["cat", "dog", "animal"]
records = [{"_file":"images/test.jpg", "_categories": ["cat", "animal"]}]
def _load_files(records):
return [record["_file"] for record in records]
def _load_labels(records):
vectors = []
for record in records:
labels = []
for element in record["_categories"]:
if element in LABELS_IDS:
labels.append(LABELS_IDS.index(element))
one_hot = tf.reduce_sum(tf.one_hot(present, len(LABELS_IDS)), axis=0)
vectors.append(one_hot.numpy())
return vectors
def _load_data(file_path, label):
image = tf.io.read_file(file_path)
image = tf.image.decode_image(image, channels=3, expand_animations=False)
return image, label
data = (
_load_files(records),
_load_labels(records)
)
train_x = tf.data.Dataset.from_tensor_slices(data).map(_load_data)
For the benefit of Community, I am adding the #Cospel answer here
LABELS_IDS = ["cat", "dog", "animal"]
records = [{"_file":"images/test.jpg", "_categories": ["cat", "animal"]}]
def _load_files(records):
return [record["_file"] for record in records]
def _load_labels(records):
vectors = []
for record in records:
labels = []
for element in record["_categories"]:
if element in LABELS_IDS:
labels.append(LABELS_IDS.index(element))
one_hot = tf.reduce_sum(tf.one_hot(present, len(LABELS_IDS)), axis=0)
vectors.append(one_hot.numpy())
return vectors
def _load_data(file_path, label):
image = tf.io.read_file(file_path)
image = tf.image.decode_image(image, channels=3, expand_animations=False)
return image, label
data = (
_load_files(records),
_load_labels(records)
)
train_x = tf.data.Dataset.from_tensor_slices(data).map(_load_data)
I have a problem with TF dataset generator. I do not why, but when I get picture from dataset by running it through session, it returns Tensors where colors are inverted. I tried to changed BGR to RGB, but this is not the problem.
It is partially solved by inverting the image array (img = 1 - img ), but I would like not this problem to occur in first place. Does somebody know what could be the cause?
import os
import glob
import random
import tensorflow as tf
from tensorflow import Tensor
class PairGenerator(object):
person1 = 'img'
person2 = 'person2'
label = 'same_person'
#def __init__(self, lfw_path='./tf_dataset/resources' + os.path.sep + 'lfw'):
def __init__(self, lfw_path='/home/tom/Devel/ai-dev/tensorflow-triplet-loss/data/augmentor'):
self.all_people = self.generate_all_people_dict(lfw_path)
print(self.all_people.keys())
def generate_all_people_dict(self, lfw_path):
# generates a dictionary between a person and all the photos of that person
all_people = {}
for person_folder in os.listdir(lfw_path):
person_photos = glob.glob(lfw_path + os.path.sep + person_folder + os.path.sep + '*.jpg')
all_people[person_folder] = person_photos
return all_people
def get_next_pair(self):
all_people_names = list(self.all_people.keys())
while True:
# draw a person at random
person1 = random.choice(all_people_names)
# flip a coin to decide whether we fetch a photo of the same person vs different person
same_person = random.random() > 0.5
if same_person:
person2 = person1
else:
# repeatedly pick random names until we find a different name
person2 = person1
while person2 == person1:
person2 = random.choice(all_people_names)
person1_photo = random.choice(self.all_people[person1])
yield ({self.person1: person1_photo,
self.label: same_person})
class Inputs(object):
def __init__(self, img: Tensor, label: Tensor):
self.img = img
self.label = label
def feed_input(self, input_img, input_label=None):
# feed the input images that are necessary to make a prediction
feed_dict = {self.img: input_img}
# optionally also include the label:
# if we're just making a prediction without calculating loss, that won't be necessary
if input_label is not None:
feed_dict[self.label] = input_label
return feed_dict
class Dataset(object):
img_resized = 'img_resized'
label = 'same_person'
def __init__(self, generator=PairGenerator()):
self.next_element = self.build_iterator(generator)
def build_iterator(self, pair_gen: PairGenerator):
batch_size = 10
prefetch_batch_buffer = 5
dataset = tf.data.Dataset.from_generator(pair_gen.get_next_pair,
output_types={PairGenerator.person1: tf.string,
PairGenerator.label: tf.bool})
dataset = dataset.map(self._read_image_and_resize)
dataset = dataset.batch(batch_size)
dataset = dataset.prefetch(prefetch_batch_buffer)
iter = dataset.make_one_shot_iterator()
element = iter.get_next()
return Inputs(element[self.img_resized],
element[PairGenerator.label])
def _read_image_and_resize(self, pair_element):
target_size = [224, 224]
# read images from disk
img_file = tf.read_file(pair_element[PairGenerator.person1])
print("////")
print(PairGenerator.person1)
img = tf.image.decode_image(img_file, channels=3)
# let tensorflow know that the loaded images have unknown dimensions, and 3 color channels (rgb)
img.set_shape([None, None, 3])
# resize to model input size
img_resized = tf.image.resize_images(img, target_size)
#img_resized = tf.image.flip_up_down(img_resized)
#img_resized = tf.image.rot90(img_resized)
pair_element[self.img_resized] = img_resized
pair_element[self.label] = tf.cast(pair_element[PairGenerator.label], tf.float32)
return pair_element
generator = PairGenerator()
iter = generator.get_next_pair()
for i in range(10):
print(next(iter))
ds = Dataset(generator)
import matplotlib.pyplot as plt
imgplot = plt.imshow(out)
imgplot = plt.imshow(1 - out)
Ok so the solution was
imgplot = plt.imshow(out/255)
I have set up the Google's DeepLab V3 Demo on my local system and it runs successfully after making some minor changes. It's as:
# -*- coding: utf-8 -*-
# DeepLab Demo
# This demo will demostrate the steps to run deeplab semantic segmentation model on sample input images.
import os
from io import BytesIO
import tarfile
import tempfile
from six.moves import urllib
from matplotlib import gridspec
from matplotlib import pyplot as plt
import numpy as np
from PIL import Image
import tensorflow as tf
class DeepLabModel(object):
"""Class to load deeplab model and run inference."""
INPUT_TENSOR_NAME = 'ImageTensor:0'
OUTPUT_TENSOR_NAME = 'SemanticPredictions:0'
INPUT_SIZE = 513
FROZEN_GRAPH_NAME = 'frozen_inference_graph'
def __init__(self, tarball_path):
"""Creates and loads pretrained deeplab model."""
self.graph = tf.Graph()
graph_def = None
# Extract frozen graph from tar archive.
tar_file = tarfile.open(tarball_path)
for tar_info in tar_file.getmembers():
if self.FROZEN_GRAPH_NAME in os.path.basename(tar_info.name):
file_handle = tar_file.extractfile(tar_info)
graph_def = tf.GraphDef.FromString(file_handle.read())
break
tar_file.close()
if graph_def is None:
raise RuntimeError('Cannot find inference graph in tar archive.')
with self.graph.as_default():
tf.import_graph_def(graph_def, name='')
self.sess = tf.Session(graph=self.graph)
def run(self, image):
"""Runs inference on a single image.
Args:
image: A PIL.Image object, raw input image.
Returns:
resized_image: RGB image resized from original input image.
seg_map: Segmentation map of `resized_image`.
"""
width, height = image.size
resize_ratio = 1.0 * self.INPUT_SIZE / max(width, height)
target_size = (int(resize_ratio * width), int(resize_ratio * height))
resized_image = image.convert('RGB').resize(target_size, Image.ANTIALIAS)
batch_seg_map = self.sess.run(
self.OUTPUT_TENSOR_NAME,
feed_dict={self.INPUT_TENSOR_NAME: [np.asarray(resized_image)]})
seg_map = batch_seg_map[0]
return resized_image, seg_map
def create_pascal_label_colormap():
"""Creates a label colormap used in PASCAL VOC segmentation benchmark.
Returns:
A Colormap for visualizing segmentation results.
"""
colormap = np.zeros((256, 3), dtype=int)
ind = np.arange(256, dtype=int)
for shift in reversed(range(8)):
for channel in range(3):
colormap[:, channel] |= ((ind >> channel) & 1) << shift
ind >>= 3
return colormap
def label_to_color_image(label):
"""Adds color defined by the dataset colormap to the label.
Args:
label: A 2D array with integer type, storing the segmentation label.
Returns:
result: A 2D array with floating type. The element of the array
is the color indexed by the corresponding element in the input label
to the PASCAL color map.
Raises:
ValueError: If label is not of rank 2 or its value is larger than color
map maximum entry.
"""
if label.ndim != 2:
raise ValueError('Expect 2-D input label')
colormap = create_pascal_label_colormap()
if np.max(label) >= len(colormap):
raise ValueError('label value too large.')
return colormap[label]
def vis_segmentation(image, seg_map):
"""Visualizes input image, segmentation map and overlay view."""
plt.figure(figsize=(15, 5))
grid_spec = gridspec.GridSpec(1, 4, width_ratios=[6, 6, 6, 1])
plt.subplot(grid_spec[0])
plt.imshow(image)
plt.axis('off')
plt.title('input image')
plt.subplot(grid_spec[1])
seg_image = label_to_color_image(seg_map).astype(np.uint8)
plt.imshow(seg_image)
plt.axis('off')
plt.title('segmentation map')
plt.subplot(grid_spec[2])
plt.imshow(image)
plt.imshow(seg_image, alpha=0.7)
plt.axis('off')
plt.title('segmentation overlay')
unique_labels = np.unique(seg_map)
ax = plt.subplot(grid_spec[3])
plt.imshow(
FULL_COLOR_MAP[unique_labels].astype(np.uint8), interpolation='nearest')
ax.yaxis.tick_right()
plt.yticks(range(len(unique_labels)), LABEL_NAMES[unique_labels])
plt.xticks([], [])
ax.tick_params(width=0.0)
plt.grid('off')
plt.show()
LABEL_NAMES = np.asarray([
'background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus',
'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike',
'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tv'
])
FULL_LABEL_MAP = np.arange(len(LABEL_NAMES)).reshape(len(LABEL_NAMES), 1)
FULL_COLOR_MAP = label_to_color_image(FULL_LABEL_MAP)
# #title Select and download models {display-mode: "form"}
MODEL_NAME = 'mobilenetv2_coco_voctrainaug' # #param ['mobilenetv2_coco_voctrainaug', 'mobilenetv2_coco_voctrainval', 'xception_coco_voctrainaug', 'xception_coco_voctrainval']
_DOWNLOAD_URL_PREFIX = 'http://download.tensorflow.org/models/'
_MODEL_URLS = {
'mobilenetv2_coco_voctrainaug':
'deeplabv3_mnv2_pascal_train_aug_2018_01_29.tar.gz',
'mobilenetv2_coco_voctrainval':
'deeplabv3_mnv2_pascal_trainval_2018_01_29.tar.gz',
'xception_coco_voctrainaug':
'deeplabv3_pascal_train_aug_2018_01_04.tar.gz',
'xception_coco_voctrainval':
'deeplabv3_pascal_trainval_2018_01_04.tar.gz',
}
_TARBALL_NAME = 'deeplab_model.tar.gz'
model_dir = tempfile.mkdtemp()
tf.gfile.MakeDirs(model_dir)
download_path = os.path.join(model_dir, _TARBALL_NAME)
print('downloading model, this might take a while...')
urllib.request.urlretrieve(_DOWNLOAD_URL_PREFIX + _MODEL_URLS[MODEL_NAME],
download_path)
print('download completed! loading DeepLab model...')
MODEL = DeepLabModel(download_path)
print('model loaded successfully!')
# """## Run on sample images
#
# Select one of sample images (leave `IMAGE_URL` empty) or feed any internet image
# url for inference.
#
# Note that we are using single scale inference in the demo for fast computation,
# so the results may slightly differ from the visualizations in
# [README](https://github.com/tensorflow/models/blob/master/research/deeplab/README.md),
# which uses multi-scale and left-right flipped inputs.
# """
# #title Run on sample images {display-mode: "form"}
SAMPLE_IMAGE = 'image1.jpg' # #param ['image1', 'image2', 'image3']
IMAGE_URL = 'https://raw.githubusercontent.com/tensorflow/models/master/research/deeplab/g3doc/img/image1.jpg' ##param {type:"string"}
_SAMPLE_URL = ('https://github.com/tensorflow/models/blob/master/research/'
'deeplab/g3doc/img/%s.jpg?raw=true')
def run_visualization(url):
"""Inferences DeepLab model and visualizes result."""
try:
# f = urllib.request.urlopen(url)
# jpeg_str = f.read()
# original_im = Image.open(BytesIO(jpeg_str))
original_im = Image.open("human.jpg")
except IOError:
print('Cannot retrieve image. Please check url: ' + url)
return
print('running deeplab on image %s...' % url)
resized_im, seg_map = MODEL.run(original_im)
vis_segmentation(resized_im, seg_map)
image_url = SAMPLE_IMAGE
run_visualization(SAMPLE_IMAGE)
I have used various images with this model and it's working. Here's an example output:
Now I need to extract the mask as a separate image, how can I achieve that?
Thanks in advance!
The seg_map hold the segmented image.
resized_im, seg_map = MODEL.run(original_im)
Its a matplot Image array. You can convert it into numpy array using
np.array(seg_map) or use it whatever way you like.