Tensorflow: Shape of 3D tensor (of an image) in Filter has None - python

I am following this tutorial on tensorflow.org.
I have folder images with two folders cat and dog in it. Following above tutorial I am trying to convert .jpg and .png images to features (NumPy array) for modeling.
Problem
After processing the images to tensors I found that some images were converted to tensor with shape (28, 28, 4). So I added condition to filter out such tensors. This logic works while explicitly looping each tensor, using for loop, after converting it to numpy array, but same logic does not work when used with filter.
Please help me fix this filter() I went through filter() documentation and could not find any solution.
Source code
import tensorflow as tf
import os
print("TensorFlow version:", tf.__version__)
def process_image(file_path_tensor):
parts = tf.strings.split(file_path_tensor, os.sep)
label = parts[-2]
image = tf.io.read_file(file_path_tensor)
image = tf.image.decode_jpeg(image)
image = tf.image.resize(image, [128, 128])
image = tf.image.convert_image_dtype(image, tf.float32)
image = image / 255
return image, label
def check_shape(x, y):
print("\nShape received in filter():", x.shape)
d1, d2, d3 = x.shape
return d3 == 3
images_ds = tf.data.Dataset.list_files("./images/*/*", shuffle=True)
file_path = next(iter(images_ds))
image, label = process_image(file_path)
print("Shape:", image.shape)
print("Class label:", label.numpy().decode())
# ETL pipeline.
X_y_tensors = (
images_ds
.map(process_image) # Extra and Transform
.filter(check_shape) # Filter
.as_numpy_iterator() # Load
)
print("\nTechnique 1:")
print("Final X count:", len(list(X_y_tensors)))
X_y_tensors = images_ds.map(process_image)
count = 0
for x, y in X_y_tensors:
d1, d2, d3 = x.shape
if d3 > 3:
continue
count += 1
print("\nTechnique 2:")
print("Final X count:", count)
Output
TensorFlow version: 2.6.0
Shape: (128, 128, 3)
Class label: cat
Shape received in filter(): (128, 128, None)
Technique 1:
Final X count: 0
Technique 2:
Final X count: 123
As it can be seen,
Count is 0 when Technique 1 is used to filter tensors, since the shape of the tensor received is (128, 128, None).
Count is 123 (image count after filtering) when Technique 2 is used.
I do not think this is an issue since I am not using batches.
Full code with dataset

Okay. So the problem is not related to filter() its caused by decode_jpeg(). Set channels parameter of tf.io.decode_jpeg() to value 3.
Three indicates RGB color channels of an image.
Solution
import tensorflow as tf
import os
print("TensorFlow version:", tf.__version__)
def process_image(file_path_tensor):
parts = tf.strings.split(file_path_tensor, os.sep)
label = parts[-2]
image = tf.io.read_file(file_path_tensor)
image = tf.image.decode_jpeg(image, channels=3)
image = tf.image.resize(image, [128, 128])
image = tf.image.convert_image_dtype(image, tf.float32)
image = image / 255
return image, label
def check_shape(x, y):
print("\nShape received in filter():", x.shape)
d1, d2, d3 = x.shape
return d3 == 3
images_ds = tf.data.Dataset.list_files("./images/*/*", shuffle=True)
file_path = next(iter(images_ds))
image, label = process_image(file_path)
print("Shape:", image.shape)
print("Class label:", label.numpy().decode())
# ETL pipeline.
X_y_tensors = (
images_ds
.map(process_image) # Extra and Transform
.filter(check_shape) # Filter
.as_numpy_iterator() # Load
)
print("\nTechnique 1:")
print("Final X count:", len(list(X_y_tensors)))
X_y_tensors = images_ds.map(process_image)
count = 0
for x, y in X_y_tensors:
d1, d2, d3 = x.shape
if d3 > 3:
continue
count += 1
print("\nTechnique 2:")
print("Final X count:", count)
Output
TensorFlow version: 2.6.0
Shape: (128, 128, 3)
Class label: cat
Shape received in filter(): (128, 128, 3)
Technique 1:
Final X count: 129
Technique 2:
Final X count: 129

Related

"Invalid argument: indices[0,0,0,0] = 30 is not in [0, 30)"

Error:
InvalidArgumentError: indices[0,0,0,0] = 30 is not in [0, 30)
[[{{node GatherV2}}]] [Op:IteratorGetNext]
History:
I have a custom data loader for a tf.keras based U-Net for semantic segmentation, based on this example. It is written as follows:
def parse_image(img_path: str) -> dict:
# read image
image = tf.io.read_file(img_path)
#image = tfio.experimental.image.decode_tiff(image)
if xf == "png":
image = tf.image.decode_png(image, channels = 3)
else:
image = tf.image.decode_jpeg(image, channels = 3)
image = tf.image.convert_image_dtype(image, tf.uint8)
#image = image[:, :, :-1]
# read mask
mask_path = tf.strings.regex_replace(img_path, "X", "y")
mask_path = tf.strings.regex_replace(mask_path, "X." + xf, "y." + yf)
mask = tf.io.read_file(mask_path)
#mask = tfio.experimental.image.decode_tiff(mask)
mask = tf.image.decode_png(mask, channels = 1)
#mask = mask[:, :, :-1]
mask = tf.where(mask == 255, np.dtype("uint8").type(NoDataValue), mask)
return {"image": image, "segmentation_mask": mask}
train_dataset = tf.data.Dataset.list_files(
dir_tls(myear = year, dset = "X") + "/*." + xf, seed = zeed)
train_dataset = train_dataset.map(parse_image)
val_dataset = tf.data.Dataset.list_files(
dir_tls(myear = year, dset = "X_val") + "/*." + xf, seed = zeed)
val_dataset = val_dataset.map(parse_image)
## data transformations--------------------------------------------------------
#tf.function
def normalise(input_image: tf.Tensor, input_mask: tf.Tensor) -> tuple:
input_image = tf.cast(input_image, tf.float32) / 255.0
return input_image, input_mask
#tf.function
def load_image_train(datapoint: dict) -> tuple:
input_image = tf.image.resize(datapoint["image"], (imgr, imgc))
input_mask = tf.image.resize(datapoint["segmentation_mask"], (imgr, imgc))
if tf.random.uniform(()) > 0.5:
input_image = tf.image.flip_left_right(input_image)
input_mask = tf.image.flip_left_right(input_mask)
input_image, input_mask = normalise(input_image, input_mask)
return input_image, input_mask
#tf.function
def load_image_test(datapoint: dict) -> tuple:
input_image = tf.image.resize(datapoint["image"], (imgr, imgc))
input_mask = tf.image.resize(datapoint["segmentation_mask"], (imgr, imgc))
input_image, input_mask = normalise(input_image, input_mask)
return input_image, input_mask
## create datasets-------------------------------------------------------------
buff_size = 1000
dataset = {"train": train_dataset, "val": val_dataset}
# -- Train Dataset --#
dataset["train"] = dataset["train"]\
.map(load_image_train, num_parallel_calls = tf.data.experimental.AUTOTUNE)
dataset["train"] = dataset["train"].shuffle(buffer_size = buff_size,
seed = zeed)
dataset["train"] = dataset["train"].repeat()
dataset["train"] = dataset["train"].batch(bs)
dataset["train"] = dataset["train"].prefetch(buffer_size = AUTOTUNE)
#-- Validation Dataset --#
dataset["val"] = dataset["val"].map(load_image_test)
dataset["val"] = dataset["val"].repeat()
dataset["val"] = dataset["val"].batch(bs)
dataset["val"] = dataset["val"].prefetch(buffer_size = AUTOTUNE)
print(dataset["train"])
print(dataset["val"])
Now I wanted to use a weighted version of tf.keras.losses.SparseCategoricalCrossentropy for my model and I found this tutorial, which is rather similar to the example above.
However, they also offered a weighted version of the loss, using:
def add_sample_weights(image, label):
# The weights for each class, with the constraint that:
# sum(class_weights) == 1.0
class_weights = tf.constant([2.0, 2.0, 1.0])
class_weights = class_weights/tf.reduce_sum(class_weights)
# Create an image of `sample_weights` by using the label at each pixel as an
# index into the `class weights` .
sample_weights = tf.gather(class_weights, indices=tf.cast(label, tf.int32))
return image, label, sample_weights
and
weighted_model.fit(
train_dataset.map(add_sample_weights),
epochs=1,
steps_per_epoch=10)
I combined those approaches since the latter tutorial uses previously loaded data, while I want to draw the images from disc (not enough RAM to load all at once).
Resulting in the code from the first example (long code block above) followed by
def add_sample_weights(image, segmentation_mask):
class_weights = tf.constant(inv_weights, dtype = tf.float32)
class_weights = class_weights/tf.reduce_sum(class_weights)
sample_weights = tf.gather(class_weights,
indices = tf.cast(segmentation_mask, tf.int32))
return image, segmentation_mask, sample_weights
(inv_weights are my weights, an array of 30 float64 values) and
model.fit(dataset["train"].map(add_sample_weights),
epochs = 45, steps_per_epoch = np.ceil(N_img/bs),
validation_data = dataset["val"],
validation_steps = np.ceil(N_val/bs),
callbacks = cllbs)
When I run
dataset["train"].map(add_sample_weights).element_spec
as in the second example, I get an output that looks reasonable to me (similar to the one in the example):
Out[58]:
(TensorSpec(shape=(None, 512, 512, 3), dtype=tf.float32, name=None),
TensorSpec(shape=(None, 512, 512, 1), dtype=tf.float32, name=None),
TensorSpec(shape=(None, 512, 512, 1), dtype=tf.float32, name=None))
However, when I try to fit the model or run something like
a, b, c = dataset["train"].map(add_sample_weights).take(1)
I will receive the error mentioned above.
So far, I have found quite some questions regarding this error (e.g., a, b, c, d), however, they all talk of "embedding layers" and things I am not aware of using.
Where does this error come from and how can I solve it?
Picture tf.gather as a fancy way to do indexing. The error you get is akin to the following example in python:
>>> my_list = [1,2,3]
>>> my_list[3]
IndexError: list index out of range
If you want to use tf.gather, then the range of value of your indices should not be bigger than the dimension size of the Tensor you are willing to index.
In your case, in the call tf.gather(class_weights,indices = tf.cast(segmentation_mask, tf.int32)), with class_weights being a Tensor of dimension (30,), the range of values of segmentation_mask should be between 0 and 29. As far as I can tell from your data pipeline, segmentation_mask has a range of value between 0 and 255. The fix will be problem dependent.

How to convert grayscale image shape with 1 channel to coloured image shape with 3 channels?

I want to load the mnist dataset to the mobilenet V1 CNN
then, I faced with this problem
ValueError: Error when checking input: expected input_1 to have shape (32, 32, 3) but got array with shape (28, 28, 1)
Below is my code
image_data, label_data = data['image'], data['label']
idx_list = {}
for i in range(10):
idx_list[i] = np.where(label_data == i) # return tuple dtype (rows indices, column indices)
selected_test_sample_indices = {}
for label in range(10):
selected_test_sample_indices[label] = random.sample(set(idx_list[label][0]), int(len(idx_list[label][0]) * 0.2))
selected_train_sample_indicies = {}
for label in range(10):
selected_train_sample_indicies[label] = list(set(idx_list[label][0])- set(selected_test_sample_indices[label]))
train_data_indicies, test_data_indicies = [],[]
for label, indicies in selected_train_sample_indicies.items():
train_data_indicies = train_data_indicies + indicies # merge 2 list
for label, indicies in selected_test_sample_indices.items():
test_data_indicies = test_data_indicies + indicies
random.shuffle(train_data_indicies)
random.shuffle(test_data_indicies)
y_train_data = np.array([label_data[idx] for idx in train_data_indicies])
X_train_data = np.array([image_data[idx] for idx in train_data_indicies])
y_test_data = np.array([label_data[idx] for idx in test_data_indicies])
X_test_data = np.array([image_data[idx] for idx in test_data_indicies])
number_of_classes = 10
y_train = y_train_data
y_test = y_test_data
X_train = X_train_data.reshape(X_train_data.shape[0], img_rows, img_cols, 1)
X_test = X_test_data.reshape(X_test_data.shape[0], img_rows, img_cols, 1)```
Whenn I tried to reshape I got the following error
ValueError: cannot reshape array of size 11146912 into shape (14218,32,32,1)
when I change it to (4500,32,32,3), the sum is lower than 11146912
It really confused me.
Please help me to fix this bug.
The MNIST dataset contains images in grayscale with the size of 28x28 pixels. That is why the shape of each image is (28, 28, 1) with each value between 0-255. Here's another stackoverflow question with the same problem. The most valid answer is to convert the grayscale images into rgb images and then resizing the images.
Well after converting the grayscale images to rgb images the shape of your images will change from
28 x 28 x 1 to 28 x 28 x 3
Then you need to resize it to 32. You can use openCV library for that.
resized_image = cv2.resize(image, (32, 32))
Then your resized_image shape would be 32 x 32 x 3

U-net training Error: The size of tensor a (16) must match the size of tensor b (6) at non-singleton dimension 1

I’m trying to train a Unit model on LandCoverNet dataset, which is a satellite imagery dataset that contains input images and corresponding land cover type masks.
I have created a custom dataset to get my images and masks:
# Create custom dataset that accepts 4 channels images
from torch.utils.data import Dataset, DataLoader, sampler
from pathlib import Path
from PIL import Image
import matplotlib.pyplot as plt
import os
import numpy as np
import rasterio as rio
from torchvision import transforms, datasets, models
# We have two dir: inputs(folder for each image) and tatgets
class LandCoverNetDataset(BaseDataset):
CLASSES = ['otherland', 'cropland', 'pastureland', 'bare soil', 'openwater', 'forestland']
def __init__(self, inputs_dir, targets_dir,
classes = None,
augmentation=None ,
preprocessing = False,
pytorch=True):
super().__init__()
self.samples = []
self.pytorch = pytorch
self.augmentation = augmentation
self.preprocessing = preprocessing
# Convert str names to class values on masks
self.class_value = [self.CLASSES.index(cls.lower()) for cls in classes]
# Create dictionary for images and targets
for sub_dir in os.listdir(inputs_dir):
files = {}
files = {
'img_bands' : os.path.join(inputs_dir, sub_dir),
'target' : os.path.join(targets_dir, sub_dir[:13] + "_LC_10m.png")
}
self.samples.append(files)
def __len__(self):
return len(self.samples)
def normalize(self, band):
'''Notmalize a numpy array to have values between 0 and 1'''
band_min, band_max = band.min(), band.max()
np.seterr(divide='ignore', invalid='ignore')
normalized_band = ((band - band_min)/(band_max - band_min))
#Remove any nan value and subtitute by zero
where_are_NaNs = isnan(normalized_band)
normalized_band[where_are_NaNs] = 0
return normalized_band
def open_as_array(self, idx, include_ndvi = False):
'''
Merge the 4 bands into one image and normalize the bands
'''
# List indivisual bands in each image folder
# Stack them togather
list_bands = []
for img_file in os.listdir(self.samples[idx]['img_bands']):
# Get the ndvi band
if 'NDVI' in img_file:
ndvi_band = os.path.join(self.samples[idx]['img_bands'], img_file)
else:
# Get the rgb bands
band = rio.open(os.path.join(self.samples[idx]['img_bands'], img_file)).read(1)
if self.preprocessing:
# preprocess the bands before stacking them (only rgb)
band = self.normalize(band)
list_bands.append(band)
# Stack the bands
raw_rgb = np.stack(list_bands, axis=2).astype('float32')
if include_ndvi:
# Include the NDVI band in the input images
ndvi = np.expand_dims(rio.open(ndvi_band).read(1).astype('float32'), 2)
raw_rgb = np.concatenate([raw_rgb, ndvi], axis=2)
if self.augmentation:
transformed = self.augmentation(image = raw_rgb)
raw_rgb = transformed["image"]
if self.preprocessing:
# transpose to tensor shape
raw_rgb = raw_rgb.transpose((2,0,1)).astype('float32')
return raw_rgb
def open_mask(self, idx):
# Extract certain classes from mask
mask = cv2.imread(self.samples[idx]['target'], 0)
masks = [(mask == v) for v in self.class_value]
mask = np.stack(masks, axis=-1).astype('long')
if self.augmentation:
transformed = self.augmentation(image = mask)
mask = transformed["image"]
if self.preprocessing:
# preprocess the mask
mask = self.normalize(mask)
# transpose to tensor shape
mask = mask.transpose((2, 0, 1)).astype('long')
mask = mask[0, :, :]
return mask
def __getitem__(self, idx):
x = torch.tensor(self.open_as_array(idx, include_ndvi=True), dtype=torch.float)
y = torch.tensor(self.open_mask(idx), dtype=torch.long)
return x, y
def open_as_pil(self, idx):
arr = 256*self.open_as_array(idx)
return Image.fromarray(arr.astype(np.uint8), 'RGB')
def __repr__(self):
s = 'Dataset class with {} files'.format(self.__len__())
return s
The input here is 4 bands.
This is the shape of the first batch for both input/target
torch.Size([16, 4, 224, 224])
torch.Size([16, 224, 224])
I’m using a model from segmentation-models-pytorch library, and here is how I customized it for my case:
ENCODER = 'se_resnext50_32x4d'
ENCODER_WEIGHTS = 'imagenet'
ACTIVATION = 'softmax2d'
DEVICE = 'cuda'
model = smp.FPN(ENCODER, classes=len(CLASSES), activation=ACTIVATION)
# Replace the model.conv1 to accept 4 channels
# first: copy the layer's weights
weight = model.encoder.layer0.conv1.weight.clone()
model.encoder.layer0.conv1 = nn.Conv2d(4, 64,kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
with torch.no_grad():
model.encoder.layer0.conv1.weight[:, :3] = weight
model.encoder.layer0.conv1.weight[:, 3] = model.encoder.layer0.conv1.weight[:, 0]
loss = smp.utils.losses.NLLLoss()
metrics = [
smp.utils.metrics.IoU(threshold=0.5),
]
optimizer = torch.optim.SGD([
dict(params=model.parameters(), lr=0.001, weight_decay=1e-8, momentum=0.9),
])
# create epoch runners
# it is a simple loop of iterating over dataloader`s samples
train_epoch = smp.utils.train.TrainEpoch(
model,
loss=loss,
metrics=metrics,
optimizer=optimizer,
device=DEVICE,
verbose=True,
)
valid_epoch = smp.utils.train.ValidEpoch(
model,
loss=loss,
metrics=metrics,
device=DEVICE,
verbose=True,
)
And here is my training loop
# train model for 40 epochs
max_score = 0
for i in range(0, 40):
print('\nEpoch: {}'.format(i))
train_logs = train_epoch.run(train_loader)
valid_logs = valid_epoch.run(valid_loader)
# do something (save model, change lr, etc.)
if max_score < valid_logs['iou_score']:
max_score = valid_logs['iou_score']
torch.save(model, './best_model.pth')
print('Model saved!')
if i == 25:
optimizer.param_groups[0]['lr'] = 1e-5
print('Decrease decoder learning rate to 1e-5!')
At first, the target shape was [16, 6, 224, 224] but I had an error and found this thread that it should be [batch_size, height, width]
That’s why I added this line in the Dataset class : mask = mask[0, :, :]
to get ride of the number of classes dim, and here where things get confusing for me, because the output of me model is torch.Size([10, 6, 224, 224]).
This is the entire error message:
Epoch: 0
train: 0%| | 0/157 [00:00<?, ?it/s]
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-215-2ae39e205dee> in <module>()
7
8 print('\nEpoch: {}'.format(i))
----> 9 train_logs = train_epoch.run(train_loader)
10 valid_logs = valid_epoch.run(valid_loader)
11
3 frames
/usr/local/lib/python3.6/dist-packages/segmentation_models_pytorch/utils/functional.py in iou(pr, gt, eps, threshold, ignore_channels)
32 pr, gt = _take_channels(pr, gt, ignore_channels=ignore_channels)
33
---> 34 intersection = torch.sum(gt * pr)
35 union = torch.sum(gt) + torch.sum(pr) - intersection + eps
36 return (intersection + eps) / union
RuntimeError: The size of tensor a (16) must match the size of tensor b (6) at non-singleton dimension 1
Thanks!
OK, I changed the loss function to smp.utils.losses.DiceLoss(), and I was able to start training my model. I also removed mask = mask[0, :, :].
I also had an issue with my normalization. Here is how I did it:
for input (4 bands):
for i in range(raw_rgb.shape[0]):
raw_rgb[i, :, :] = self.normalize(raw_rgb[i, :, :])
And the same for the masks (3 channels)
This was after converting them to tensor.
I would also still want to know how to prepare my masks for CrossEntropyLoss.

Image Generator for 3D volumes in keras with data augmentation

Since the ImageDataGenerator by keras is not suitable for 3D volumes, I started to write my own generator for keras (semantic segmentation, not classification!).
1) If there is anybody out there that has adapted the ImageDataGenerator code to work with 3D volumes, please share it! This guy has done it for videos.
2) According to this tutorial I wrote a custom generator.
import glob
import os
import keras
import numpy as np
import skimage
from imgaug import augmenters as iaa
class DataGenerator(keras.utils.Sequence):
"""Generates data for Keras"""
"""This structure guarantees that the network will only train once on each sample per epoch"""
def __init__(self, list_IDs, im_path, label_path, batch_size=4, dim=(128, 128, 128),
n_classes=4, shuffle=True, augment=False):
'Initialization'
self.dim = dim
self.batch_size = batch_size
self.list_IDs = list_IDs
self.im_path = im_path
self.label_path = label_path
self.n_classes = n_classes
self.shuffle = shuffle
self.augment = augment
self.on_epoch_end()
def __len__(self):
'Denotes the number of batches per epoch'
return int(np.floor(len(self.list_IDs) / self.batch_size))
def __getitem__(self, index):
'Generate one batch of data'
# Generate indexes of the batch
indexes = self.indexes[index * self.batch_size:(index + 1) * self.batch_size]
# Find list of IDs
list_IDs_temp = [self.list_IDs[k] for k in indexes]
# Generate data
X, y = self.__data_generation(list_IDs_temp)
return X, y
def on_epoch_end(self):
'Updates indexes after each epoch'
self.indexes = np.arange(len(self.list_IDs))
if self.shuffle == True:
np.random.shuffle(self.indexes)
def __data_generation(self, list_IDs_temp):
if self.augment:
pass
if not self.augment:
X = np.empty([self.batch_size, *self.dim])
Y = np.empty([self.batch_size, *self.dim, self.n_classes])
# Generate data
for i, ID in enumerate(list_IDs_temp):
img_X = skimage.io.imread(os.path.join(im_path, ID))
X[i,] = img_X
img_Y = skimage.io.imread(os.path.join(label_path, ID))
Y[i,] = keras.utils.to_categorical(img_Y, num_classes=self.n_classes)
X = X.reshape(self.batch_size, *self.dim, 1)
return X, Y
params = {'dim': (128, 128, 128),
'batch_size': 4,
'im_path': "some/path/for/the/images/",
'label_path': "some/path/for/the/label_images",
'n_classes': 4,
'shuffle': True,
'augment': True}
partition = {}
im_path = "some/path/for/the/images/"
label_path = "some/path/for/the/label_images/"
images = glob.glob(os.path.join(im_path, "*.tif"))
images_IDs = [name.split("/")[-1] for name in images]
partition['train'] = images_IDs
training_generator = DataGenerator(partition['train'], **params)
My images have the size (128, 128, 128) and when I load them in I get a 5D tensor of size (batch_size, depth, heigt, width, number_of_channels), e.g. (4, 128, 128, 128, 1). For the label_images (which have the same dimensions and are single channel coded (value 1 = label 1, value 2 = label 2, value 3 = label 3 and value 0 = label 4 or background)) I get a binary representation of the labels with the to_categorical() function from keras and end up with a 5D, e.g. (4, 128, 128, 128, 4). The images and label_images have the same name and are located in different folders.
As I only have very few images, I would like to extend the total number of images through image augmentation. How would I do that with this generator? I have successfully tested the imgaug package, but instead of adding images to my set I only transform the existing images (e.g. flip them horizontally)
Edit: I was in misconception regarding data augmentation. See this article about image augmentation. Images will be passed in with random transformations (on-the-fly). Now I just have to gather enough data and set the parameters with imgaug. I will update this soon.
I found an implementation of a Keras customDataGenerator for 3D volume. Here is a GitHub link. The implementation can easily be expanded to include new augmentation techniques. Here is a minimal working example I am working in my project (3D volume semantic segmentation) based in the implementation I shared in the link:
from generator import customImageDataGenerator
def generator(images, groundtruth, batch):
"""Load a batch of augmented images"""
gen = customImageDataGenerator(mirroring=True,
rotation_90=True,
transpose_axes=True
)
for b in gen.flow(x=images, y=groundtruth, batch_size=batch):
yield (b[0], (b[1]).astype(float))
# images = (123, 48,48,48,1)
# groundtruth = (123, 48,48,48,1)
history = model.fit(
x=generator(images, groundtruth, batchSize),
validation_data=(imagesTest, groundtruthTest),
steps_per_epoch=len(images) / batchSize,
epochs=epochs,
callbacks=[callbacks],
)

The tf.train.batch function is generating tensors batch of shape (8, 8, 299, 299, 3)

I am using Tensorflow 1.5 GPU version on Windows 10.
This is the code.
targets = convert_to_onehot(labels_dir, no_of_features = num_classes)
assert targets.shape == (8,120), 'THE TARGETS SHAPE IS NOT CORRECT'
targets = tf.constant(targets, dtype = tf.float32)
Images = [] #TO STORE THE RESIZED IMAGES IN THE FORM OF LIST TO PASS IT TO tf.train.batch()
#Initally having a list of 8 images just for Testing purpose2
images = glob.glob(images_file_path)
i = 0
for my_img in images:
image = mpimg.imread(my_img)[:, :, :3]
#print (image.shape)
image = tf.constant(image, dtype = tf.float32)
Images.append(image)
i = i + 1
if i == 8:
break
batch_size = 8
images, labels = tf.train.batch([Images, targets], batch_size = batch_size, num_threads = 1, capacity = batch_size)
with tf.Session() as sess:
print (images.shape)
print (labels.shape)
When I run the above code, it prints the shape (8, 8, 299, 299, 3) and (8, 8, 120) instead of (8, 299, 299, 3) and (8, 120).
The function required me to pass Images as a list and targets is a numpy array.
To the arguments in tf.train.batch add in the argument enqueue_many = True, the default is False. This tells tensorflow that the first dimension is an index of your samples.
Source: tensorflow documentation. https://www.tensorflow.org/api_docs/python/tf/train/batch
the first 8 is your batch size. change it to 4 and you will see it change accordingly.
What you are doing wrong is pre appending all images. Images.append(image)
This way your input already has a batch of 8 images, On top of that tf is also batching it, that's not how this is supposed to be done.
To solve this you will have to pass enqueue_many=True in tf.train.batch
tf.train.batch([Images, targets], batch_size = batch_size, num_threads = 1, capacity = batch_size, enqueue_many=True)
Read more here: https://www.tensorflow.org/api_docs/python/tf/train/batch

Categories