How to build a Custom Data Generator for Keras/tf.Keras where X images are being augmented and corresponding Y labels are also images - python

I am working on Image Binarization using UNet and have a dataset of 150 images and their binarized versions too. My idea is to augment the images randomly to make them look like they are differentso I have made a function which inserts any of the 4-5 types of Noises, skewness, shearing and so on to an image. I could have easily used
ImageDataGenerator(preprocess_function=my_aug_function) to augment the images but the problem is that my y target is also an image. Also, I could have used something like:
train_dataset = (
train_dataset.map(
encode_single_sample, num_parallel_calls=tf.data.experimental.AUTOTUNE
)
.batch(batch_size)
.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
)
But it has 2 problems:
With larger dataset, it'll blow up the memory as data needs to be already in the memory
This is the crucial part that I need to augment the images on the go to make it look like I have a huge dataset.
Another Solution could be saving augmented images to a directory and making them 30-40K and then loading them. It would be silly thing to do.
Now the idea part is that I can use Sequence as the parent class but How can I keep on augmenting and generating new images on the fly with respective Y binarized images?
I have an idea as the below code. Can somebody help me with the augmentation and generation of y images. I have my X_DIR, Y_DIR where image names for binarised and original are same but stored in different directories.
class DataGenerator(tensorflow.keras.utils.Sequence):
def __init__(self, files_path, labels_path, batch_size=32, shuffle=True, random_state=42):
'Initialization'
self.files = files_path
self.labels = labels_path
self.batch_size = batch_size
self.shuffle = shuffle
self.random_state = random_state
self.on_epoch_end()
def on_epoch_end(self):
'Updates indexes after each epoch'
# Shuffle the data here
def __len__(self):
return int(np.floor(len(self.files) / self.batch_size))
def __getitem__(self, index):
# What do I do here?
def __data_generation(self, files):
# I think this is responsible for Augmentation but no idea how should I implement it and how does it works.

Custom Image Data Generator
load Directory data into dataframe for CustomDataGenerator
def data_to_df(data_dir, subset=None, validation_split=None):
df = pd.DataFrame()
filenames = []
labels = []
for dataset in os.listdir(data_dir):
img_list = os.listdir(os.path.join(data_dir, dataset))
label = name_to_idx[dataset]
for image in img_list:
filenames.append(os.path.join(data_dir, dataset, image))
labels.append(label)
df["filenames"] = filenames
df["labels"] = labels
if subset == "train":
split_indexes = int(len(df) * validation_split)
train_df = df[split_indexes:]
val_df = df[:split_indexes]
return train_df, val_df
return df
train_df, val_df = data_to_df(train_dir, subset="train", validation_split=0.2)
Custom Data Generator
import tensorflow as tf
from PIL import Image
import numpy as np
class CustomDataGenerator(tf.keras.utils.Sequence):
''' Custom DataGenerator to load img
Arguments:
data_frame = pandas data frame in filenames and labels format
batch_size = divide data in batches
shuffle = shuffle data before loading
img_shape = image shape in (h, w, d) format
augmentation = data augmentation to make model rebust to overfitting
Output:
Img: numpy array of image
label : output label for image
'''
def __init__(self, data_frame, batch_size=10, img_shape=None, augmentation=True, num_classes=None):
self.data_frame = data_frame
self.train_len = len(data_frame)
self.batch_size = batch_size
self.img_shape = img_shape
self.num_classes = num_classes
print(f"Found {self.data_frame.shape[0]} images belonging to {self.num_classes} classes")
def __len__(self):
''' return total number of batches '''
self.data_frame = shuffle(self.data_frame)
return math.ceil(self.train_len/self.batch_size)
def on_epoch_end(self):
''' shuffle data after every epoch '''
# fix on epoch end it's not working, adding shuffle in len for alternative
pass
def __data_augmentation(self, img):
''' function for apply some data augmentation '''
img = tf.keras.preprocessing.image.random_shift(img, 0.2, 0.3)
img = tf.image.random_flip_left_right(img)
img = tf.image.random_flip_up_down(img)
return img
def __get_image(self, file_id):
""" open image with file_id path and apply data augmentation """
img = np.asarray(Image.open(file_id))
img = np.resize(img, self.img_shape)
img = self.__data_augmentation(img)
img = preprocess_input(img)
return img
def __get_label(self, label_id):
""" uncomment the below line to convert label into categorical format """
#label_id = tf.keras.utils.to_categorical(label_id, num_classes)
return label_id
def __getitem__(self, idx):
batch_x = self.data_frame["filenames"][idx * self.batch_size:(idx + 1) * self.batch_size]
batch_y = self.data_frame["labels"][idx * self.batch_size:(idx + 1) * self.batch_size]
# read your data here using the batch lists, batch_x and batch_y
x = [self.__get_image(file_id) for file_id in batch_x]
y = [self.__get_label(label_id) for label_id in batch_y]
return tf.convert_to_tensor(x), tf.convert_to_tensor(y)

You can use libraries like albumentations and imgaug, both are good but I have heard there are issues with random seed with albumentations.
Here's an example of imgaug taken from the documentation here:
seq = iaa.Sequential([
iaa.Dropout([0.05, 0.2]), # drop 5% or 20% of all pixels
iaa.Sharpen((0.0, 1.0)), # sharpen the image
iaa.Affine(rotate=(-45, 45)), # rotate by -45 to 45 degrees (affects segmaps)
iaa.ElasticTransformation(alpha=50, sigma=5) # apply water effect (affects segmaps)
], random_order=True)
# Augment images and segmaps.
images_aug = []
segmaps_aug = []
for _ in range(len(input_data)):
images_aug_i, segmaps_aug_i = seq(image=image, segmentation_maps=segmap)
images_aug.append(images_aug_i)
segmaps_aug.append(segmaps_aug_i)
You are going in the right way with the custom generator. In __getitem__, make a batch using batch_x = self.files[index:index+batch_size] and same with batch_y, then augment them using X,y = __data_generation(batch_x, batch_y) which will load images(using any library you like, I prefer opencv), and return the augmented pairs (and any other manipulation).
Your __getitem__ will then return the tuple (X,y)

You can use ImageDataGenerator even if your label is an image.
Here is a simple example of how you can do that:
Code:
# Specifying your data augmentation here for both image and label
image_datagen = tf.keras.preprocessing.image.ImageDataGenerator()
mask_datagen = tf.keras.preprocessing.image.ImageDataGenerator()
# Provide the same seed and keyword arguments to the flow methods
seed = 1
image_generator = image_datagen.flow_from_directory(
data_dir,
class_mode=None,
seed=seed)
mask_generator = mask_datagen.flow_from_directory(
data_dir,
class_mode=None,
seed=seed)
# Combine the image and label generator.
train_generator = zip(image_generator, mask_generator)
Now, if you iterate over it you will get:
for image, label in train_generator:
print(image.shape,label.shape)
break
Output:
(32, 256, 256, 3) (32, 256, 256, 3)
You can use this train_generator with fit() command.
Code:
model.fit_generator(
train_generator,
steps_per_epoch=2000,
epochs=50)
With flow_from_directory your memory won't be cluttered and Imagedatagenerator will take care of the augmentation part.

Related

image_dataset_from_directory using a subset of sub-directories

I have downloaded the MINC dataset for material classification which consists of 23 cateogories. However, I am only interested in a subset of the categories (e.g. [wood, foliage, glass, hair])
Is it possible to get a subset of the data using tf.keras.preprocessing.image_dataset_from_directory?
I have tried tf.keras.preprocessing.image_dataset_from_directory(folder_dir, label_mode="categorical", class_names=["wood", "foliage", "glass", "hair"]) but it give this error The `class_names` passed did not match the names of the subdirectories of the target directory.
Is there a way to get a subset of the directories without deleting or modifying the folders? I know datagen.flow_from_directory is able to do it but keras says that it is deprecated and I should use image_dataset_from_directory.
There are two ways of doing this the first way is to do this by generator, but that process is costly, there is another way of doing this called Using tf.data for finer control. You can check this out at this link
https://www.tensorflow.org/tutorials/load_data/images
But, I will show you a brief demo that how you can load only the folders of your choice.
So, let's start...
#First import some libraries which are needed
import os
import glob
import tensorflow as tf
import matplotlib.pyplot as plt
I am taking only two classes of "Cats" vs "Dogs" you can take more than two classes...
batch_size = 32
img_height = 180
img_width = 180
#define your data directory where your dataset is placed
data_dir = path to your datasetfolder
#Now, here define a list of names for your dataset, like I am only loading cats and dogs... you can fill it with more if you have more
dataset_names = ['cats' , 'dogs']
#Now, glob the list of images in these two directories (cats & Dogs)
list_files = [glob.glob(data_dir + images + '/*.jpg') for images in folders]
list_files = list_files[0] + list_files[1]
image_count = len(list_files)
#Now, here pass this list to a tf.data.Dataset
list_files = tf.data.Dataset.from_tensor_slices(list_files)
#Now, define your class names to labels your dataset later...
class_names = ['cats', 'dogs']
#Now, here define the validation, test, train etc.
val_size = int(image_count * 0.2)
train_ds = list_files.skip(val_size)
val_ds = list_files.take(val_size)
#To get labels
def get_label(file_path):
# Convert the path to a list of path components
parts = tf.strings.split(file_path, os.path.sep)
parts = tf.strings.substr(parts, -4, 4)[0]
one_hot = parts == class_names
# Integer encode the label
return tf.argmax(one_hot)
def decode_img(img):
# Convert the compressed string to a 3D uint8 tensor
img = tf.io.decode_jpeg(img, channels=3)
# Resize the image to the desired size
return tf.image.resize(img, [img_height, img_width])
def process_path(file_path):
label = get_label(file_path)
# Load the raw data from the file as a string
img = tf.io.read_file(file_path)
img = decode_img(img)
return img, label
#Use Dataset.map to create a dataset of image, label pairs:
# Set `num_parallel_calls` so multiple images are loaded/processed in parallel.
train_ds = train_ds.map(process_path, num_parallel_calls=tf.data.AUTOTUNE)
val_ds = val_ds.map(process_path, num_parallel_calls=tf.data.AUTOTUNE)
#Configure dataset for performance
def configure_for_performance(ds):
ds = ds.cache()
ds = ds.shuffle(buffer_size=1000)
ds = ds.batch(batch_size)
ds = ds.prefetch(buffer_size=tf.data.AUTOTUNE)
return ds
train_ds = configure_for_performance(train_ds)
val_ds = configure_for_performance(val_ds)
#Visualize the data
image_batch, label_batch = next(iter(train_ds))
plt.figure(figsize=(10, 10))
for i in range(9):
ax = plt.subplot(3, 3, i + 1)
plt.imshow(image_batch[i].numpy().astype("uint8"))
label = label_batch[i]
plt.title(class_names[label])
plt.axis("off")
Output:
Link to the COLAB file is:
https://colab.research.google.com/drive/1oUNuGVDWDLqwt_YQ80X-CBRL6kJ_YhUX?usp=sharing

How to use flow_from_directory in Keras for multi-class semantic segmentation?

Let's say I have 100 training grayscale images and 100 RGB training masks, each of size 512x512. I was able to one-hot encode the masks using to_categorical in Keras with the below
numclasses=3
masks_one_hot=to_categorical(maskArr,numclasses)
where maskArr is a 100x512x512x1, and masks_one_hot is 100x512x512x3.
However, to use ImageDataGenerator and flow_from_directory using trainGenerator from https://github.com/zhixuhao/unet/blob/master/data.py, I tried to save the one-hot encoded training images and then read them using trainGenerator. However, I noticed after using imwrite on them and then reading them with imread, they changed from one-hot encoded 512x512x3 to 512x512x3 RGB images. That is, instead of each channel having a value of 0 or 1, they now range from 0-255
As a result, if I do:
myGenerator = trainGeneratorOneHot(20,'data/membrane/train','image','label',data_gen_args,save_to_dir = "data/membrane/train/aug", flag_multi_class = True,
num_class = 3, target_size=(512,512,3))
num_batch=3
for i,batch in enumerate(myGenerator):
if(i >= num_batch):
break
where trainGeneratorOneHot is below:
def trainGeneratorOneHot(batch_size,...class_mode=None, image_class_mode=None):
image_datagen = ImageDataGenerator(**aug_dict)
mask_datagen = ImageDataGenerator(**aug_dict)
image_generator = image_datagen.flow_from_directory(train_path,classes = [image_folder], class_mode = image_class_mode, color_mode = image_color_mode,target_size = target_size, ...)
mask_generator = mask_datagen.flow_from_directory(train_path, classes = [mask_folder], class_mode = class_mode, target_size = target_size,...)
train_generator = zip(image_generator, mask_generator)
for (img,mask) in train_generator:
img,mask = adjustDataOneHot(img,mask)
yield (img,mask)
def adjustDataOneHot(img,mask):
return (img,mask)
Then I get `ValueError: could not broadcast input array from shape (512,512,1) into shape (512,512,3,1)
How can I fix this?
Was dealing with the same issue a few days ago. I found it essential to make my own data generator class to deal with taking in data from a dataframe, augmenting it, and then one-hot-encoding it before passing it to my model. I was never able to get the Keras ImageDataGenerator to work for semantic segmentation problems with multiple classes.
Below is a data generator class in case it might help you out:
def one_hot_encoder(mask, num_classes = 8):
hot_mask = np.zeros(shape = mask.shape, dtype = 'uint8')
for _ in range(8):
temp = np.zeros(shape = mask.shape[0:2], dtype = 'uint8')
temp[mask[:, :, _] != 0] = 1
hot_mask[:, :, _] = temp
return hot_mask
# Image data generator class
class DataGenerator(keras.utils.Sequence):
def __init__(self, dataframe, batch_size, n_classes = 8, augment = False):
self.dataframe = dataframe
self.batch_size = batch_size
self.n_classes = n_classes
self.augment = augment
# Steps per epoch
def __len__(self):
return len(self.dataframe) // self.batch_size
# Shuffles and resets the index at the end of training epoch
def on_epoch_end(self):
self.dataframe = self.dataframe.reset_index(drop = True)
# Generates data, feeds to training
def __getitem__(self, index):
processed_images = []
processed_masks = []
for _ in range(self.batch_size):
the_image = io.imread(self.dataframe['Images'][index])
the_mask = io.imread(self.dataframe['Masks'][index]).astype('uint8');
one_hot_mask = one_hot_encoder(the_mask, 8)
if(self.augment):
# Resizing followed by some augmentations
processed_image = augs_for_images(image = the_image) / 255.0
processed_mask = augs_for_masks(image = one_hot_mask)
else:
# Still resizing but no augmentations
processed_image = resize(image = the_image) / 255.0
processed_mask = resize(image = one_hot_mask)
processed_images.append(processed_image)
processed_masks.append(processed_mask)
batch_x = np.array( processed_images )
batch_y = np.array( processed_masks )
return (batch_x, batch_y)
Also, here's a link to a repo with some semantic segmentation models that might be of interest to you. The notebook itself shows how the author dealt with multi-class semantic segmentation.

Image Generator for 3D volumes in keras with data augmentation

Since the ImageDataGenerator by keras is not suitable for 3D volumes, I started to write my own generator for keras (semantic segmentation, not classification!).
1) If there is anybody out there that has adapted the ImageDataGenerator code to work with 3D volumes, please share it! This guy has done it for videos.
2) According to this tutorial I wrote a custom generator.
import glob
import os
import keras
import numpy as np
import skimage
from imgaug import augmenters as iaa
class DataGenerator(keras.utils.Sequence):
"""Generates data for Keras"""
"""This structure guarantees that the network will only train once on each sample per epoch"""
def __init__(self, list_IDs, im_path, label_path, batch_size=4, dim=(128, 128, 128),
n_classes=4, shuffle=True, augment=False):
'Initialization'
self.dim = dim
self.batch_size = batch_size
self.list_IDs = list_IDs
self.im_path = im_path
self.label_path = label_path
self.n_classes = n_classes
self.shuffle = shuffle
self.augment = augment
self.on_epoch_end()
def __len__(self):
'Denotes the number of batches per epoch'
return int(np.floor(len(self.list_IDs) / self.batch_size))
def __getitem__(self, index):
'Generate one batch of data'
# Generate indexes of the batch
indexes = self.indexes[index * self.batch_size:(index + 1) * self.batch_size]
# Find list of IDs
list_IDs_temp = [self.list_IDs[k] for k in indexes]
# Generate data
X, y = self.__data_generation(list_IDs_temp)
return X, y
def on_epoch_end(self):
'Updates indexes after each epoch'
self.indexes = np.arange(len(self.list_IDs))
if self.shuffle == True:
np.random.shuffle(self.indexes)
def __data_generation(self, list_IDs_temp):
if self.augment:
pass
if not self.augment:
X = np.empty([self.batch_size, *self.dim])
Y = np.empty([self.batch_size, *self.dim, self.n_classes])
# Generate data
for i, ID in enumerate(list_IDs_temp):
img_X = skimage.io.imread(os.path.join(im_path, ID))
X[i,] = img_X
img_Y = skimage.io.imread(os.path.join(label_path, ID))
Y[i,] = keras.utils.to_categorical(img_Y, num_classes=self.n_classes)
X = X.reshape(self.batch_size, *self.dim, 1)
return X, Y
params = {'dim': (128, 128, 128),
'batch_size': 4,
'im_path': "some/path/for/the/images/",
'label_path': "some/path/for/the/label_images",
'n_classes': 4,
'shuffle': True,
'augment': True}
partition = {}
im_path = "some/path/for/the/images/"
label_path = "some/path/for/the/label_images/"
images = glob.glob(os.path.join(im_path, "*.tif"))
images_IDs = [name.split("/")[-1] for name in images]
partition['train'] = images_IDs
training_generator = DataGenerator(partition['train'], **params)
My images have the size (128, 128, 128) and when I load them in I get a 5D tensor of size (batch_size, depth, heigt, width, number_of_channels), e.g. (4, 128, 128, 128, 1). For the label_images (which have the same dimensions and are single channel coded (value 1 = label 1, value 2 = label 2, value 3 = label 3 and value 0 = label 4 or background)) I get a binary representation of the labels with the to_categorical() function from keras and end up with a 5D, e.g. (4, 128, 128, 128, 4). The images and label_images have the same name and are located in different folders.
As I only have very few images, I would like to extend the total number of images through image augmentation. How would I do that with this generator? I have successfully tested the imgaug package, but instead of adding images to my set I only transform the existing images (e.g. flip them horizontally)
Edit: I was in misconception regarding data augmentation. See this article about image augmentation. Images will be passed in with random transformations (on-the-fly). Now I just have to gather enough data and set the parameters with imgaug. I will update this soon.
I found an implementation of a Keras customDataGenerator for 3D volume. Here is a GitHub link. The implementation can easily be expanded to include new augmentation techniques. Here is a minimal working example I am working in my project (3D volume semantic segmentation) based in the implementation I shared in the link:
from generator import customImageDataGenerator
def generator(images, groundtruth, batch):
"""Load a batch of augmented images"""
gen = customImageDataGenerator(mirroring=True,
rotation_90=True,
transpose_axes=True
)
for b in gen.flow(x=images, y=groundtruth, batch_size=batch):
yield (b[0], (b[1]).astype(float))
# images = (123, 48,48,48,1)
# groundtruth = (123, 48,48,48,1)
history = model.fit(
x=generator(images, groundtruth, batchSize),
validation_data=(imagesTest, groundtruthTest),
steps_per_epoch=len(images) / batchSize,
epochs=epochs,
callbacks=[callbacks],
)

Keras: Combining data generators to handle image + text

I am working on a multilabel classification model where I am trying to combine two models, a CNN and a text-classifier into one model using Keras and train them together, like so:
#cnn_model is a vgg16 model
#text_model looks as follows:
### takes the vectorized text as input
text_model = Sequential()
text_model .add(Dense(vec_size, input_shape=(vec_size,), name='aux_input'))
## merging both models
merged = Merge([cnn_model, text_model], mode='concat')
### final_model takes the combined models and adds a sofmax classifier to it
final_model = Sequential()
final_model.add(merged)
final_model.add(Dense(n_classes, activation='softmax'))
As such, I am working with an ImageDataGenerator to process the images and the respective labels.
For the images I am using a custom helper function that reads images into the model via paths provided by pandas dataframes - one for training (df_train) and one for validation (df_validation). The dataframes also provide the final labels for the model in the "label_vec" column:
# From https://github.com/keras-team/keras/issues/5152
def flow_from_dataframe(img_data_gen, in_df, path_col, y_col, **dflow_args):
base_dir = os.path.dirname(in_df[path_col].values[0])
print('## Ignore next message from keras, values are replaced anyways')
df_gen = img_data_gen.flow_from_directory(base_dir, class_mode = 'sparse', **dflow_args)
df_gen.filenames = in_df[path_col].values
df_gen.classes = numpy.stack(in_df[y_col].values)
df_gen.samples = in_df.shape[0]
df_gen.n = in_df.shape[0]
df_gen._set_index_array()
df_gen.directory = '' # since we have the full path
print('Reinserting dataframe: {} images'.format(in_df.shape[0]))
return df_gen
from keras.applications.vgg16 import preprocess_input
train_datagen = keras.preprocessing.image.ImageDataGenerator(preprocessing_function=preprocess_input) horizontal_flip=True)
validation_datagen = keras.preprocessing.image.ImageDataGenerator(preprocessing_function=preprocess_input)#rescale=1./255)
train_generator = flow_from_dataframe(train_datagen, df_train,
path_col = 'filename',
y_col = 'label_vec',
target_size=(224, 224), batch_size=128, shuffle=False)
validation_generator = flow_from_dataframe(validation_datagen, df_validation,
path_col = 'filename',
y_col = 'label_vec',
target_size=(224, 224), batch_size=64, shuffle=False)
Now I am trying to provide my one-hot-encoded text vectors (i.e. [0,0,0,1,0,0]) to the model, which are also stored in a pandas dataframe.
Since my train_generator provides me with the image and label data, I am now looking for a solution to combine this generator with a generator which allows me to additionally feed the respective text-vector
You might want to consider writing your own generator (making use of Keras' Sequence object to allow for multiprocessing) instead of modifying the ImageDataGenerator code. From the Keras docs:
class CIFAR10Sequence(Sequence):
def __init__(self, x_set, y_set, batch_size):
self.x, self.y = x_set, y_set
self.batch_size = batch_size
def __len__(self):
return int(np.ceil(len(self.x) / float(self.batch_size)))
def __getitem__(self, idx):
batch_x = self.x[idx * self.batch_size:(idx + 1) * self.batch_size]
batch_y = self.y[idx * self.batch_size:(idx + 1) * self.batch_size]
return np.array([
resize(imread(file_name), (200, 200))
for file_name in batch_x]), np.array(batch_y)
You could have your labels, paths to the images, and paths to the text files in a single pandas dataframe and modify the __getitem__ method from above to have your generator yield all three of them simultaneously: one list of numpy arraysX which contains all the inputs, one numpy array Y which contains the outputs.

Tensorflow How to iterate on or convert the output of tf.train.batch?

I'm trying to make a Nearest Neighbor algorithm to classify images into 2 classes. I'm working with examples and codes from https://github.com/aymericdamien/TensorFlow-Examples
The following is a snippet of code results in Xtr and Ytr to be 2 arrays that I can loop on with a statement like for i in range(len(Xte)): and index them like Xte[i, :].
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)
Xtr, Ytr = mnist.train.next_batch(5000) #5000 for training
Xte, Yte = mnist.test.next_batch(200) #200 for testing
This code can be found here https://github.com/aymericdamien/TensorFlow-Examples/blob/master/notebooks/2_BasicModels/nearest_neighbor.ipynb
What I'm trying to do now is do the same with my own dataset of images and I follow the example from: https://github.com/aymericdamien/TensorFlow-Examples/blob/master/notebooks/5_DataManagement/build_an_image_dataset.ipynb to build my own dataset
The following statement is used after decoding and resizing the images and generates a batch of tensors that represents the images with their labels, however I can't use them and loop on them like what I did above:
X, Y = tf.train.batch([image, label], batch_size=batch_size, capacity=batch_size*8, num_threads=4)
My question is how to use those tensors and how to iterate over them or parse them to arrays in order to use them the same way as above?
Here is the full definition of my function read_images that I use to generate the batch:
N_CLASSES = 2
IMG_HEIGHT = 28
IMG_WIDTH = 28
CHANNELS = 1
def read_images(dataset_path, batch_size):
imagepaths, labels = list(), list()
label = 0
try:
classes = sorted(os.walk(dataset_path).next()[1])
except:
classes = sorted(os.walk(dataset_path).__next__()[1])
for c in classes:
c_dir = os.path.join(dataset_path, c)
try:
walk = os.walk(c_dir).next()
except:
walk = os.walk(c_dir).__next__()
for sample in walk[2]:
if(sample.endswith('.jpg') or sample.endswith('.jpeg')):
imagepaths.append(os.path.join(c_dir, sample))
labels.append(label)
imagepaths = tf.convert_to_tensor(imagepaths, dtype=tf.string)
labels = tf.convert_to_tensor(labels, dtype=tf.int32)
image, label = tf.train.slice_input_producer([imagepaths, labels], shuffle=True)
image = tf.read_file(image)
image = tf.image.decode_jpeg(image, channels=CHANNELS)
image = tf.image.resize_images(image, [IMG_HEIGHT, IMG_WIDTH])
image = image * 1.0/127.5 - 1.0
X, Y = tf.train.batch([image, label], batch_size=batch_size, capacity=batch_size*8, num_threads=4)
return X, Y

Categories