Image Generator for 3D volumes in keras with data augmentation - python

Since the ImageDataGenerator by keras is not suitable for 3D volumes, I started to write my own generator for keras (semantic segmentation, not classification!).
1) If there is anybody out there that has adapted the ImageDataGenerator code to work with 3D volumes, please share it! This guy has done it for videos.
2) According to this tutorial I wrote a custom generator.
import glob
import os
import keras
import numpy as np
import skimage
from imgaug import augmenters as iaa
class DataGenerator(keras.utils.Sequence):
"""Generates data for Keras"""
"""This structure guarantees that the network will only train once on each sample per epoch"""
def __init__(self, list_IDs, im_path, label_path, batch_size=4, dim=(128, 128, 128),
n_classes=4, shuffle=True, augment=False):
'Initialization'
self.dim = dim
self.batch_size = batch_size
self.list_IDs = list_IDs
self.im_path = im_path
self.label_path = label_path
self.n_classes = n_classes
self.shuffle = shuffle
self.augment = augment
self.on_epoch_end()
def __len__(self):
'Denotes the number of batches per epoch'
return int(np.floor(len(self.list_IDs) / self.batch_size))
def __getitem__(self, index):
'Generate one batch of data'
# Generate indexes of the batch
indexes = self.indexes[index * self.batch_size:(index + 1) * self.batch_size]
# Find list of IDs
list_IDs_temp = [self.list_IDs[k] for k in indexes]
# Generate data
X, y = self.__data_generation(list_IDs_temp)
return X, y
def on_epoch_end(self):
'Updates indexes after each epoch'
self.indexes = np.arange(len(self.list_IDs))
if self.shuffle == True:
np.random.shuffle(self.indexes)
def __data_generation(self, list_IDs_temp):
if self.augment:
pass
if not self.augment:
X = np.empty([self.batch_size, *self.dim])
Y = np.empty([self.batch_size, *self.dim, self.n_classes])
# Generate data
for i, ID in enumerate(list_IDs_temp):
img_X = skimage.io.imread(os.path.join(im_path, ID))
X[i,] = img_X
img_Y = skimage.io.imread(os.path.join(label_path, ID))
Y[i,] = keras.utils.to_categorical(img_Y, num_classes=self.n_classes)
X = X.reshape(self.batch_size, *self.dim, 1)
return X, Y
params = {'dim': (128, 128, 128),
'batch_size': 4,
'im_path': "some/path/for/the/images/",
'label_path': "some/path/for/the/label_images",
'n_classes': 4,
'shuffle': True,
'augment': True}
partition = {}
im_path = "some/path/for/the/images/"
label_path = "some/path/for/the/label_images/"
images = glob.glob(os.path.join(im_path, "*.tif"))
images_IDs = [name.split("/")[-1] for name in images]
partition['train'] = images_IDs
training_generator = DataGenerator(partition['train'], **params)
My images have the size (128, 128, 128) and when I load them in I get a 5D tensor of size (batch_size, depth, heigt, width, number_of_channels), e.g. (4, 128, 128, 128, 1). For the label_images (which have the same dimensions and are single channel coded (value 1 = label 1, value 2 = label 2, value 3 = label 3 and value 0 = label 4 or background)) I get a binary representation of the labels with the to_categorical() function from keras and end up with a 5D, e.g. (4, 128, 128, 128, 4). The images and label_images have the same name and are located in different folders.
As I only have very few images, I would like to extend the total number of images through image augmentation. How would I do that with this generator? I have successfully tested the imgaug package, but instead of adding images to my set I only transform the existing images (e.g. flip them horizontally)
Edit: I was in misconception regarding data augmentation. See this article about image augmentation. Images will be passed in with random transformations (on-the-fly). Now I just have to gather enough data and set the parameters with imgaug. I will update this soon.

I found an implementation of a Keras customDataGenerator for 3D volume. Here is a GitHub link. The implementation can easily be expanded to include new augmentation techniques. Here is a minimal working example I am working in my project (3D volume semantic segmentation) based in the implementation I shared in the link:
from generator import customImageDataGenerator
def generator(images, groundtruth, batch):
"""Load a batch of augmented images"""
gen = customImageDataGenerator(mirroring=True,
rotation_90=True,
transpose_axes=True
)
for b in gen.flow(x=images, y=groundtruth, batch_size=batch):
yield (b[0], (b[1]).astype(float))
# images = (123, 48,48,48,1)
# groundtruth = (123, 48,48,48,1)
history = model.fit(
x=generator(images, groundtruth, batchSize),
validation_data=(imagesTest, groundtruthTest),
steps_per_epoch=len(images) / batchSize,
epochs=epochs,
callbacks=[callbacks],
)

Related

How to train network on images of different sizes Pytorch

I am trying to feed the Neural network dataset of images and I am getting this error
I don't know what might be the cause as all the images have different sizes
I have also tried to change batch sizes and kernels but I had no success with this.
File "c:\Users\david\Desktop\cs_agent\main.py", line 49, in <module>
for i, data in enumerate(train_loader, 0):
File "C:\Users\david\AppData\Local\Programs\Python\Python310\lib\site-packages\torch\utils\data\dataloader.py", line 530, in __next__
data = self._next_data()
File "C:\Users\david\AppData\Local\Programs\Python\Python310\lib\site-packages\torch\utils\data\dataloader.py", line 570, in _next_data
data = self._dataset_fetcher.fetch(index) # may raise StopIteration
File "C:\Users\david\AppData\Local\Programs\Python\Python310\lib\site-packages\torch\utils\data\_utils\fetch.py", line 52, in fetch
return self.collate_fn(data)
File "C:\Users\david\AppData\Local\Programs\Python\Python310\lib\site-packages\torch\utils\data\_utils\collate.py", line 172, in default_collate
return [default_collate(samples) for samples in transposed] # Backwards compatibility.
File "C:\Users\david\AppData\Local\Programs\Python\Python310\lib\site-packages\torch\utils\data\_utils\collate.py", line 172, in <listcomp>
return [default_collate(samples) for samples in transposed] # Backwards compatibility.
File "C:\Users\david\AppData\Local\Programs\Python\Python310\lib\site-packages\torch\utils\data\_utils\collate.py", line 138, in default_collate
return torch.stack(batch, 0, out=out)
RuntimeError: stack expects each tensor to be equal size, but got [3, 300, 535] at entry 0 and [3, 1080, 1920] at entry 23
this is my main file
import numpy as np
import matplotlib.pyplot as plt
import torch
import dataset
import os
from torch.utils.data import DataLoader
import torch.nn as nn
import torchvision
import check_device
import neural_network
import torch.optim as optim
EPS = 1.e-7
LR=0.5
WEIGHT_DECAY=0.5
batch_size =50
#DATA LOADING ###################################################################################################################
test_dataset =dataset.csHeadBody(csv_file="images\\test_labels.csv",root_dir="images\\test")
train_dataset =dataset.csHeadBody(csv_file="images\\train_labels.csv",root_dir="images\\train")
train_loader =DataLoader(dataset =train_dataset,batch_size=batch_size,shuffle=True)
test_loader =DataLoader(dataset=test_dataset,batch_size=batch_size,shuffle=True)
#DATA LOADING ###################################################################################################################END
#NEURAL NET #####################################################################################################################################################
net=neural_network.Net()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
#NEURAL NET END ######################################################################################
for epoch in range(2): # loop over the dataset multiple times
running_loss = 0.0
for i, data in enumerate(train_loader, 0):
# get the inputs; data is a list of [inputs, labels]
print(data)
inputs, labels = data
# zero the parameter gradients
optimizer.zero_grad()
# forward + backward + optimize
outputs = net(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
# print statistics
running_loss += loss.item()
if i % 2000 == 1999: # print every 2000 mini-batches
print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}')
running_loss = 0.0
print('Finished Training')
and this is my dataset file
class csHeadBody(Dataset):
def __init__(self, csv_file, root_dir, transform=None, target_transform=None):
self.img_labels = pd.read_csv(csv_file)
self.root_dir = root_dir
self.transform = transform
self.target_transform = target_transform
def __len__(self):
return len(self.img_labels)
def __getitem__(self, idx):
img_path = os.path.join(self.root_dir, self.img_labels.iloc[idx, 0])
image = read_image(img_path)
label = self.img_labels.iloc[idx, 1]
if self.transform:
image = self.transform(image)
if self.target_transform:
label = self.target_transform(label)
return image, label
this is my neural network architecture
import torch.nn.functional as F
import torch.nn as nn
import torch
class Net(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(3, 535, 535)
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(16 * 5 * 5, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = torch.flatten(x, 1) # flatten all dimensions except batch
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
You need to adjust the parameters of your convolutional and linear layers. The first argument is the number of input channels (3 for standard RGB images in conv1), then the number of output channels and then the convolution kernel size. To clarify, I've used named arguments in the code below. The code works for images of a square input size of 224x224 pixels (standard imagenet size, adjust if needed). If you want image size agnostic code you could use something like global average pooling (mean of each channel in the last conv layer). The net below supports both:
class Net(nn.Module):
def __init__(self, use_global_average_pooling: bool = False):
super().__init__()
self.use_global_average_pooling = use_global_average_pooling
self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3)
self.pool = nn.MaxPool2d(kernel_size=(2, 2))
self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3)
if use_global_average_pooling:
self.fc_gap = nn.Linear(64, 10)
else:
self.fc_1 = nn.Linear(54 * 54 * 64, 84) # 54 img side times 64 out channels from conv2
self.fc_2 = nn.Linear(84, 10)
def forward(self, x):
x = self.pool(F.relu(self.conv1(x))) # img side: (224 - 2) // 2 = 111
x = self.pool(F.relu(self.conv2(x))) # img side: (111 - 2) // 2 = 54
if self.use_global_average_pooling:
# mean for global average pooling (mean over channel dimension)
x = x.mean(dim=(-1, -2))
x = F.relu(self.fc_gap(x))
else: # use all features
x = torch.flatten(x, 1)
x = F.relu(self.fc_1(x))
x = self.fc_2(x)
return x
Additionally, the torchvision.io.read_image function used in your Dataset returns an uint8 tensor with integer values from 0 to 255. You'll want floating point values for your network, so you have to divide the result by 255 to get values in the [0, 1] range. Furthermore, neural networks work best with normalized inputs (subtracting the mean and then dividing by the standard error of your training dataset). I've added normalization to the image transforms below. For convenience, it is using the imagenet mean and standard error, which should work fine if your images are similar to imagenet images (otherwise you can calculate them on your own images).
Note that the resizing might distort your images (doesn't keep the original aspect ratio). Often this is no problem, but if it is you might want to pad your images with a constant color (e.g. black) to resize them to the required dimensions (there are also transforms for this in the torchvision library).
IMAGENET_MEAN = [0.485, 0.456, 0.406]
IMAGENET_STD = [0.229, 0.224, 0.225]
transforms = torchvision.transforms.Compose([
torchvision.transforms.Lambda(lambda x: x / 255.),
torchvision.transforms.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD),
torchvision.transforms.Resize((224, 224)),
])
You might also need to adjust the code in your Dataset to load images as an RGB image (if they also have an alpha channel). This can be done like this:
image = read_image(img_path, mode=torchvision.io.image.ImageReadMode.RGB)
You can then initialise your Dataset using:
test_dataset = dataset.csHeadBody(csv_file="images\\test_labels.csv", root_dir="images\\test", transform=transforms)
train_dataset = dataset.csHeadBody(csv_file="images\\train_labels.csv", root_dir="images\\train", transform=transforms)
I haven't tested the code, let me know if it doesn't work!

Data augmentation with ImageDataGenerator for videos (4D tensors) in Keras

I have an ImageDataGenerator in Keras that I would like to apply during training to every frame in short video clips which are represented as 4D numpy arrays with shape (num_frames, width, height, 3).
In the case of a standard dataset consisting of images each with shape (width, height, 3), I would normally do something like:
aug = tf.keras.preprocessing.image.ImageDataGenerator(
rotation_range=15,
zoom_range=0.15)
model.fit_generator(
aug.flow(X_train, y_train),
epochs=100)
How can I apply these same data augmentations to a dataset of 4D numpy arrays representing sequences of images?
I figured it out. I created a custom class which inherits from tensorflow.keras.utils.Sequence that performs the augmentations using scipy for each image.
class CustomDataset(tf.keras.utils.Sequence):
def __init__(self, batch_size, *args, **kwargs):
self.batch_size = batch_size
self.X_train = args[0]
self.Y_train = args[1]
def __len__(self):
# returns the number of batches
return int(self.X_train.shape[0] / self.batch_size)
def __getitem__(self, index):
# returns one batch
X = []
y = []
for i in range(self.batch_size):
r = random.randint(0, self.X_train.shape[0] - 1)
next_x = self.X_train[r]
next_y = self.Y_train[r]
augmented_next_x = []
###
### Augmentation parameters for this clip.
###
rotation_amt = random.randint(-45, 45)
for j in range(self.X_train.shape[1]):
transformed_img = ndimage.rotate(next_x[j], rotation_amt, reshape=False)
transformed_img[transformed_img == 0] = 255
augmented_next_x.append(transformed_img)
X.append(augmented_next_x)
y.append(next_y)
X = np.array(X).astype('uint8')
y = np.array(y)
encoder = LabelBinarizer()
y = encoder.fit_transform(y)
return X, y
def on_epoch_end(self):
# option method to run some logic at the end of each epoch: e.g. reshuffling
pass
I then pass this in to the fit_generator method:
training_data_augmentation = CustomDataset(BS, X_train_L, y_train_L)
model.fit_generator(
training_data_augmentation,
epochs=300)

U-net training Error: The size of tensor a (16) must match the size of tensor b (6) at non-singleton dimension 1

I’m trying to train a Unit model on LandCoverNet dataset, which is a satellite imagery dataset that contains input images and corresponding land cover type masks.
I have created a custom dataset to get my images and masks:
# Create custom dataset that accepts 4 channels images
from torch.utils.data import Dataset, DataLoader, sampler
from pathlib import Path
from PIL import Image
import matplotlib.pyplot as plt
import os
import numpy as np
import rasterio as rio
from torchvision import transforms, datasets, models
# We have two dir: inputs(folder for each image) and tatgets
class LandCoverNetDataset(BaseDataset):
CLASSES = ['otherland', 'cropland', 'pastureland', 'bare soil', 'openwater', 'forestland']
def __init__(self, inputs_dir, targets_dir,
classes = None,
augmentation=None ,
preprocessing = False,
pytorch=True):
super().__init__()
self.samples = []
self.pytorch = pytorch
self.augmentation = augmentation
self.preprocessing = preprocessing
# Convert str names to class values on masks
self.class_value = [self.CLASSES.index(cls.lower()) for cls in classes]
# Create dictionary for images and targets
for sub_dir in os.listdir(inputs_dir):
files = {}
files = {
'img_bands' : os.path.join(inputs_dir, sub_dir),
'target' : os.path.join(targets_dir, sub_dir[:13] + "_LC_10m.png")
}
self.samples.append(files)
def __len__(self):
return len(self.samples)
def normalize(self, band):
'''Notmalize a numpy array to have values between 0 and 1'''
band_min, band_max = band.min(), band.max()
np.seterr(divide='ignore', invalid='ignore')
normalized_band = ((band - band_min)/(band_max - band_min))
#Remove any nan value and subtitute by zero
where_are_NaNs = isnan(normalized_band)
normalized_band[where_are_NaNs] = 0
return normalized_band
def open_as_array(self, idx, include_ndvi = False):
'''
Merge the 4 bands into one image and normalize the bands
'''
# List indivisual bands in each image folder
# Stack them togather
list_bands = []
for img_file in os.listdir(self.samples[idx]['img_bands']):
# Get the ndvi band
if 'NDVI' in img_file:
ndvi_band = os.path.join(self.samples[idx]['img_bands'], img_file)
else:
# Get the rgb bands
band = rio.open(os.path.join(self.samples[idx]['img_bands'], img_file)).read(1)
if self.preprocessing:
# preprocess the bands before stacking them (only rgb)
band = self.normalize(band)
list_bands.append(band)
# Stack the bands
raw_rgb = np.stack(list_bands, axis=2).astype('float32')
if include_ndvi:
# Include the NDVI band in the input images
ndvi = np.expand_dims(rio.open(ndvi_band).read(1).astype('float32'), 2)
raw_rgb = np.concatenate([raw_rgb, ndvi], axis=2)
if self.augmentation:
transformed = self.augmentation(image = raw_rgb)
raw_rgb = transformed["image"]
if self.preprocessing:
# transpose to tensor shape
raw_rgb = raw_rgb.transpose((2,0,1)).astype('float32')
return raw_rgb
def open_mask(self, idx):
# Extract certain classes from mask
mask = cv2.imread(self.samples[idx]['target'], 0)
masks = [(mask == v) for v in self.class_value]
mask = np.stack(masks, axis=-1).astype('long')
if self.augmentation:
transformed = self.augmentation(image = mask)
mask = transformed["image"]
if self.preprocessing:
# preprocess the mask
mask = self.normalize(mask)
# transpose to tensor shape
mask = mask.transpose((2, 0, 1)).astype('long')
mask = mask[0, :, :]
return mask
def __getitem__(self, idx):
x = torch.tensor(self.open_as_array(idx, include_ndvi=True), dtype=torch.float)
y = torch.tensor(self.open_mask(idx), dtype=torch.long)
return x, y
def open_as_pil(self, idx):
arr = 256*self.open_as_array(idx)
return Image.fromarray(arr.astype(np.uint8), 'RGB')
def __repr__(self):
s = 'Dataset class with {} files'.format(self.__len__())
return s
The input here is 4 bands.
This is the shape of the first batch for both input/target
torch.Size([16, 4, 224, 224])
torch.Size([16, 224, 224])
I’m using a model from segmentation-models-pytorch library, and here is how I customized it for my case:
ENCODER = 'se_resnext50_32x4d'
ENCODER_WEIGHTS = 'imagenet'
ACTIVATION = 'softmax2d'
DEVICE = 'cuda'
model = smp.FPN(ENCODER, classes=len(CLASSES), activation=ACTIVATION)
# Replace the model.conv1 to accept 4 channels
# first: copy the layer's weights
weight = model.encoder.layer0.conv1.weight.clone()
model.encoder.layer0.conv1 = nn.Conv2d(4, 64,kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
with torch.no_grad():
model.encoder.layer0.conv1.weight[:, :3] = weight
model.encoder.layer0.conv1.weight[:, 3] = model.encoder.layer0.conv1.weight[:, 0]
loss = smp.utils.losses.NLLLoss()
metrics = [
smp.utils.metrics.IoU(threshold=0.5),
]
optimizer = torch.optim.SGD([
dict(params=model.parameters(), lr=0.001, weight_decay=1e-8, momentum=0.9),
])
# create epoch runners
# it is a simple loop of iterating over dataloader`s samples
train_epoch = smp.utils.train.TrainEpoch(
model,
loss=loss,
metrics=metrics,
optimizer=optimizer,
device=DEVICE,
verbose=True,
)
valid_epoch = smp.utils.train.ValidEpoch(
model,
loss=loss,
metrics=metrics,
device=DEVICE,
verbose=True,
)
And here is my training loop
# train model for 40 epochs
max_score = 0
for i in range(0, 40):
print('\nEpoch: {}'.format(i))
train_logs = train_epoch.run(train_loader)
valid_logs = valid_epoch.run(valid_loader)
# do something (save model, change lr, etc.)
if max_score < valid_logs['iou_score']:
max_score = valid_logs['iou_score']
torch.save(model, './best_model.pth')
print('Model saved!')
if i == 25:
optimizer.param_groups[0]['lr'] = 1e-5
print('Decrease decoder learning rate to 1e-5!')
At first, the target shape was [16, 6, 224, 224] but I had an error and found this thread that it should be [batch_size, height, width]
That’s why I added this line in the Dataset class : mask = mask[0, :, :]
to get ride of the number of classes dim, and here where things get confusing for me, because the output of me model is torch.Size([10, 6, 224, 224]).
This is the entire error message:
Epoch: 0
train: 0%| | 0/157 [00:00<?, ?it/s]
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-215-2ae39e205dee> in <module>()
7
8 print('\nEpoch: {}'.format(i))
----> 9 train_logs = train_epoch.run(train_loader)
10 valid_logs = valid_epoch.run(valid_loader)
11
3 frames
/usr/local/lib/python3.6/dist-packages/segmentation_models_pytorch/utils/functional.py in iou(pr, gt, eps, threshold, ignore_channels)
32 pr, gt = _take_channels(pr, gt, ignore_channels=ignore_channels)
33
---> 34 intersection = torch.sum(gt * pr)
35 union = torch.sum(gt) + torch.sum(pr) - intersection + eps
36 return (intersection + eps) / union
RuntimeError: The size of tensor a (16) must match the size of tensor b (6) at non-singleton dimension 1
Thanks!
OK, I changed the loss function to smp.utils.losses.DiceLoss(), and I was able to start training my model. I also removed mask = mask[0, :, :].
I also had an issue with my normalization. Here is how I did it:
for input (4 bands):
for i in range(raw_rgb.shape[0]):
raw_rgb[i, :, :] = self.normalize(raw_rgb[i, :, :])
And the same for the masks (3 channels)
This was after converting them to tensor.
I would also still want to know how to prepare my masks for CrossEntropyLoss.

How to build a Custom Data Generator for Keras/tf.Keras where X images are being augmented and corresponding Y labels are also images

I am working on Image Binarization using UNet and have a dataset of 150 images and their binarized versions too. My idea is to augment the images randomly to make them look like they are differentso I have made a function which inserts any of the 4-5 types of Noises, skewness, shearing and so on to an image. I could have easily used
ImageDataGenerator(preprocess_function=my_aug_function) to augment the images but the problem is that my y target is also an image. Also, I could have used something like:
train_dataset = (
train_dataset.map(
encode_single_sample, num_parallel_calls=tf.data.experimental.AUTOTUNE
)
.batch(batch_size)
.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
)
But it has 2 problems:
With larger dataset, it'll blow up the memory as data needs to be already in the memory
This is the crucial part that I need to augment the images on the go to make it look like I have a huge dataset.
Another Solution could be saving augmented images to a directory and making them 30-40K and then loading them. It would be silly thing to do.
Now the idea part is that I can use Sequence as the parent class but How can I keep on augmenting and generating new images on the fly with respective Y binarized images?
I have an idea as the below code. Can somebody help me with the augmentation and generation of y images. I have my X_DIR, Y_DIR where image names for binarised and original are same but stored in different directories.
class DataGenerator(tensorflow.keras.utils.Sequence):
def __init__(self, files_path, labels_path, batch_size=32, shuffle=True, random_state=42):
'Initialization'
self.files = files_path
self.labels = labels_path
self.batch_size = batch_size
self.shuffle = shuffle
self.random_state = random_state
self.on_epoch_end()
def on_epoch_end(self):
'Updates indexes after each epoch'
# Shuffle the data here
def __len__(self):
return int(np.floor(len(self.files) / self.batch_size))
def __getitem__(self, index):
# What do I do here?
def __data_generation(self, files):
# I think this is responsible for Augmentation but no idea how should I implement it and how does it works.
Custom Image Data Generator
load Directory data into dataframe for CustomDataGenerator
def data_to_df(data_dir, subset=None, validation_split=None):
df = pd.DataFrame()
filenames = []
labels = []
for dataset in os.listdir(data_dir):
img_list = os.listdir(os.path.join(data_dir, dataset))
label = name_to_idx[dataset]
for image in img_list:
filenames.append(os.path.join(data_dir, dataset, image))
labels.append(label)
df["filenames"] = filenames
df["labels"] = labels
if subset == "train":
split_indexes = int(len(df) * validation_split)
train_df = df[split_indexes:]
val_df = df[:split_indexes]
return train_df, val_df
return df
train_df, val_df = data_to_df(train_dir, subset="train", validation_split=0.2)
Custom Data Generator
import tensorflow as tf
from PIL import Image
import numpy as np
class CustomDataGenerator(tf.keras.utils.Sequence):
''' Custom DataGenerator to load img
Arguments:
data_frame = pandas data frame in filenames and labels format
batch_size = divide data in batches
shuffle = shuffle data before loading
img_shape = image shape in (h, w, d) format
augmentation = data augmentation to make model rebust to overfitting
Output:
Img: numpy array of image
label : output label for image
'''
def __init__(self, data_frame, batch_size=10, img_shape=None, augmentation=True, num_classes=None):
self.data_frame = data_frame
self.train_len = len(data_frame)
self.batch_size = batch_size
self.img_shape = img_shape
self.num_classes = num_classes
print(f"Found {self.data_frame.shape[0]} images belonging to {self.num_classes} classes")
def __len__(self):
''' return total number of batches '''
self.data_frame = shuffle(self.data_frame)
return math.ceil(self.train_len/self.batch_size)
def on_epoch_end(self):
''' shuffle data after every epoch '''
# fix on epoch end it's not working, adding shuffle in len for alternative
pass
def __data_augmentation(self, img):
''' function for apply some data augmentation '''
img = tf.keras.preprocessing.image.random_shift(img, 0.2, 0.3)
img = tf.image.random_flip_left_right(img)
img = tf.image.random_flip_up_down(img)
return img
def __get_image(self, file_id):
""" open image with file_id path and apply data augmentation """
img = np.asarray(Image.open(file_id))
img = np.resize(img, self.img_shape)
img = self.__data_augmentation(img)
img = preprocess_input(img)
return img
def __get_label(self, label_id):
""" uncomment the below line to convert label into categorical format """
#label_id = tf.keras.utils.to_categorical(label_id, num_classes)
return label_id
def __getitem__(self, idx):
batch_x = self.data_frame["filenames"][idx * self.batch_size:(idx + 1) * self.batch_size]
batch_y = self.data_frame["labels"][idx * self.batch_size:(idx + 1) * self.batch_size]
# read your data here using the batch lists, batch_x and batch_y
x = [self.__get_image(file_id) for file_id in batch_x]
y = [self.__get_label(label_id) for label_id in batch_y]
return tf.convert_to_tensor(x), tf.convert_to_tensor(y)
You can use libraries like albumentations and imgaug, both are good but I have heard there are issues with random seed with albumentations.
Here's an example of imgaug taken from the documentation here:
seq = iaa.Sequential([
iaa.Dropout([0.05, 0.2]), # drop 5% or 20% of all pixels
iaa.Sharpen((0.0, 1.0)), # sharpen the image
iaa.Affine(rotate=(-45, 45)), # rotate by -45 to 45 degrees (affects segmaps)
iaa.ElasticTransformation(alpha=50, sigma=5) # apply water effect (affects segmaps)
], random_order=True)
# Augment images and segmaps.
images_aug = []
segmaps_aug = []
for _ in range(len(input_data)):
images_aug_i, segmaps_aug_i = seq(image=image, segmentation_maps=segmap)
images_aug.append(images_aug_i)
segmaps_aug.append(segmaps_aug_i)
You are going in the right way with the custom generator. In __getitem__, make a batch using batch_x = self.files[index:index+batch_size] and same with batch_y, then augment them using X,y = __data_generation(batch_x, batch_y) which will load images(using any library you like, I prefer opencv), and return the augmented pairs (and any other manipulation).
Your __getitem__ will then return the tuple (X,y)
You can use ImageDataGenerator even if your label is an image.
Here is a simple example of how you can do that:
Code:
# Specifying your data augmentation here for both image and label
image_datagen = tf.keras.preprocessing.image.ImageDataGenerator()
mask_datagen = tf.keras.preprocessing.image.ImageDataGenerator()
# Provide the same seed and keyword arguments to the flow methods
seed = 1
image_generator = image_datagen.flow_from_directory(
data_dir,
class_mode=None,
seed=seed)
mask_generator = mask_datagen.flow_from_directory(
data_dir,
class_mode=None,
seed=seed)
# Combine the image and label generator.
train_generator = zip(image_generator, mask_generator)
Now, if you iterate over it you will get:
for image, label in train_generator:
print(image.shape,label.shape)
break
Output:
(32, 256, 256, 3) (32, 256, 256, 3)
You can use this train_generator with fit() command.
Code:
model.fit_generator(
train_generator,
steps_per_epoch=2000,
epochs=50)
With flow_from_directory your memory won't be cluttered and Imagedatagenerator will take care of the augmentation part.

How to use flow_from_directory in Keras for multi-class semantic segmentation?

Let's say I have 100 training grayscale images and 100 RGB training masks, each of size 512x512. I was able to one-hot encode the masks using to_categorical in Keras with the below
numclasses=3
masks_one_hot=to_categorical(maskArr,numclasses)
where maskArr is a 100x512x512x1, and masks_one_hot is 100x512x512x3.
However, to use ImageDataGenerator and flow_from_directory using trainGenerator from https://github.com/zhixuhao/unet/blob/master/data.py, I tried to save the one-hot encoded training images and then read them using trainGenerator. However, I noticed after using imwrite on them and then reading them with imread, they changed from one-hot encoded 512x512x3 to 512x512x3 RGB images. That is, instead of each channel having a value of 0 or 1, they now range from 0-255
As a result, if I do:
myGenerator = trainGeneratorOneHot(20,'data/membrane/train','image','label',data_gen_args,save_to_dir = "data/membrane/train/aug", flag_multi_class = True,
num_class = 3, target_size=(512,512,3))
num_batch=3
for i,batch in enumerate(myGenerator):
if(i >= num_batch):
break
where trainGeneratorOneHot is below:
def trainGeneratorOneHot(batch_size,...class_mode=None, image_class_mode=None):
image_datagen = ImageDataGenerator(**aug_dict)
mask_datagen = ImageDataGenerator(**aug_dict)
image_generator = image_datagen.flow_from_directory(train_path,classes = [image_folder], class_mode = image_class_mode, color_mode = image_color_mode,target_size = target_size, ...)
mask_generator = mask_datagen.flow_from_directory(train_path, classes = [mask_folder], class_mode = class_mode, target_size = target_size,...)
train_generator = zip(image_generator, mask_generator)
for (img,mask) in train_generator:
img,mask = adjustDataOneHot(img,mask)
yield (img,mask)
def adjustDataOneHot(img,mask):
return (img,mask)
Then I get `ValueError: could not broadcast input array from shape (512,512,1) into shape (512,512,3,1)
How can I fix this?
Was dealing with the same issue a few days ago. I found it essential to make my own data generator class to deal with taking in data from a dataframe, augmenting it, and then one-hot-encoding it before passing it to my model. I was never able to get the Keras ImageDataGenerator to work for semantic segmentation problems with multiple classes.
Below is a data generator class in case it might help you out:
def one_hot_encoder(mask, num_classes = 8):
hot_mask = np.zeros(shape = mask.shape, dtype = 'uint8')
for _ in range(8):
temp = np.zeros(shape = mask.shape[0:2], dtype = 'uint8')
temp[mask[:, :, _] != 0] = 1
hot_mask[:, :, _] = temp
return hot_mask
# Image data generator class
class DataGenerator(keras.utils.Sequence):
def __init__(self, dataframe, batch_size, n_classes = 8, augment = False):
self.dataframe = dataframe
self.batch_size = batch_size
self.n_classes = n_classes
self.augment = augment
# Steps per epoch
def __len__(self):
return len(self.dataframe) // self.batch_size
# Shuffles and resets the index at the end of training epoch
def on_epoch_end(self):
self.dataframe = self.dataframe.reset_index(drop = True)
# Generates data, feeds to training
def __getitem__(self, index):
processed_images = []
processed_masks = []
for _ in range(self.batch_size):
the_image = io.imread(self.dataframe['Images'][index])
the_mask = io.imread(self.dataframe['Masks'][index]).astype('uint8');
one_hot_mask = one_hot_encoder(the_mask, 8)
if(self.augment):
# Resizing followed by some augmentations
processed_image = augs_for_images(image = the_image) / 255.0
processed_mask = augs_for_masks(image = one_hot_mask)
else:
# Still resizing but no augmentations
processed_image = resize(image = the_image) / 255.0
processed_mask = resize(image = one_hot_mask)
processed_images.append(processed_image)
processed_masks.append(processed_mask)
batch_x = np.array( processed_images )
batch_y = np.array( processed_masks )
return (batch_x, batch_y)
Also, here's a link to a repo with some semantic segmentation models that might be of interest to you. The notebook itself shows how the author dealt with multi-class semantic segmentation.

Categories