I am trying to make a tfrecords from image_dataset_from_directory; but when I try to visualize images to check if the encoding was correct, the images turn out to be distorted of some kind.
How I created the tfrecord:
Step 1: create dataset using image_dataset_from_directory
data_dir = 'path to JPG dataset'
load_split = partial(
tf.keras.preprocessing.image_dataset_from_directory,
data_dir,
validation_split=0.2,
shuffle=True,
seed=123,
image_size=(IMG_HEIGHT, IMG_WIDTH),
batch_size=1,
)
ds_train = load_split(subset='training')
ds_valid = load_split(subset='validation')
Step 2: encoding functions
def process_image(image, label):
image = tf.image.convert_image_dtype(image, dtype=tf.uint8)
image = tf.io.encode_jpeg(image)
label = tf.one_hot(label, NUM_CLASSES)
return image, label
def make_example(encoded_image, label):
image_feature = Feature(
bytes_list=BytesList(value=[
encoded_image,
]),
)
label_feature = Feature(
float_list=FloatList(value=label)
)
features = Features(feature={
'image': image_feature,
'label': label_feature,
})
example = Example(features=features)
return example.SerializeToString()
Step 3: encoding and creating tfrecord
ds_train_encoded = (
ds_train
.unbatch()
.map(process_image)
)
ds_valid_encoded = (
ds_valid
.unbatch()
.map(process_image)
)
ds_train_encoded_iter = (
ds_train_encoded
.as_numpy_iterator()
)
with tf.io.TFRecordWriter(path='train.tfrecord') as f: # you can pass gs:// path here :)
for encoded_image, label in ds_train_encoded_iter:
example = make_example(encoded_image, label)
f.write(example)
ds_valid_encoded_iter = (
ds_valid_encoded
.as_numpy_iterator()
)
with tf.io.TFRecordWriter(path='/home/et/medai/images/tfrecords/test.tfrecord') as f:
for encoded_image, label in ds_valid_encoded_iter:
example = make_example(encoded_image, label)
f.write(example)
How I tried to visualize the images in the tfrecords
Step 1: decoding functions
def _parse_image_function(example):
image_feature_description = {
'image': tf.io.FixedLenFeature([], tf.string),
'label': tf.io.FixedLenFeature([40], tf.float32),
}
features = tf.io.parse_single_example(example, image_feature_description)
image = tf.image.decode_jpeg(features['image'], channels=3)
image = tf.image.resize(image, [IMG_SIZE, IMG_SIZE])
# image = features['image']
label = features['label']
return image, label
def read_dataset(filename, batch_size):
dataset = tf.data.TFRecordDataset(filename)
dataset = dataset.map(_parse_image_function, num_parallel_calls=tf.data.experimental.AUTOTUNE)
dataset = dataset.shuffle(500)
dataset = dataset.batch(batch_size, drop_remainder=True)
# dataset = dataset.repeat()
dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
return dataset
Step 2: decode and display
x = read_dataset('/home/et/medai/images/tfrecords/tests_train.tfrecord', 32)
plt.figure(figsize=(10, 10))
batch_size = 32
for images, labels in x.take(1):
for i in range(batch_size):
# display.display(display.Image(data=images[i].numpy()))
ax = plt.subplot(6, 6, i + 1)
plt.imshow(images[i].numpy().astype("uint8"))
plt.axis("off")
The result is something distorted:
https://i.stack.imgur.com/tCAik.jpg
I am not quite sure where this distortion comes from. Original images look like this:
https://i.stack.imgur.com/Zi4HG.png
Any ideas?
I had a similar issue with you. I fixed my problem with image normalization in image processing (for your case, in process_image).
When you use 0~255 as pixel data, it tends to break up while manipulating image data, such as changing into byte and resizing because these manipulations round its pixel value.
So, I want you to try normalizing your image pixel data into 0. to 1. of float values.
I used OpenCV to fix this, and I hope you can figure out your problem in a similar way as did refer to the code I posted below.
# This line distorted my images.
img = cv2.normalize(img, None, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX)
# I changed to this line, and it worked.
img = cv2.normalize(img, None, alpha=0., beta=1., norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F)
When I faced that issue, I used:
image = open(content, 'rb').read()
instead of:
image = tf.image.convert_image_dtype(image, dtype=tf.uint8)
image = tf.io.encode_jpeg(image)
Related
I am studying electrical engineering and doing some ML-stuff on hardware level.
I downloaded the Imagenet_subset using tensorflowdataset:
ds = tfds.load('imagenet2012_subset',
data_dir=os.path.join(write_dir, 'data'),
#split='validation',
shuffle_files=False,
download=True,
as_supervised=True,
download_and_prepare_kwargs=download_and_prepare_kwargs,
with_info=False)
Currently I am preparing the data like that:
def resize_with_crop(image, label):
i = image
i = tf.cast(i, tf.float32)
#i = tf.image.resize_with_crop_or_pad(i, 224, 224)
i = tf.image.resize(i, size=(224, 224))
#i = tf.expand_dims(i, axis=0)
i = tf.keras.applications.mobilenet_v2.preprocess_input(i)
return (i, label)
train = ds['train']
val = ds['validation']
# Preprocess the images
ds_train = train.map(resize_with_crop)
ds_val = val.map(resize_with_crop)
Now comes the filthy part. The way I extract the images and labels:
label= []
img = []
for element in ds_val.take(10000).as_numpy_iterator():
img.append(element[0])
label.append(element[1])
img = np.asarray(img)
label = np.asarray(label)
This approach is working but kind of ugly and killing my RAM.
Does some of you know how to extract the labels and images in a clean way?
The model I used is the MobileNetV2 pretrained on Imagenet:
model = keras.applications.MobileNetV2()
Thank you!
I have a video in .mp4 format: eval.mp4. I also have a fine-tuned pytorch resnet nn with which I would like to perform inference on single frames that are read from the video or single png files that are saved to disk
My pre-trained nn successfully uses .png files that I load from disk and then perform the training/validation transforms. But during inference, rather than writing each frame of the eval.mp4 video to disk as .png files solely for the purpose of inferring on every frame, I would like to simply transform each captured frame into the correct format that can be evaluated by the network.
My dataset classes / dataloaders look like:
# create total dataset, no transforms
class MouseDataset(Dataset):
def __init__(self, csv_file, root_dir, transform=None):
"""
Args:
csv_file (string): Path to the csv file with annotations.
root_dir (string): Directory with all the images.
transform (callable, optional): Optional transform to be applied
on a sample.
"""
self.mouse_frame = pd.read_csv(csv_file)
self.root_dir = root_dir
self.transform = transform
def __len__(self):
return len(self.mouse_frame)
def __getitem__(self, idx):
if torch.is_tensor(idx):
idx = idx.tolist()
# img_name is root_dir+file_name
img_name = os.path.join(self.root_dir,
self.mouse_frame.iloc[idx, 0])
image = Image.open(img_name)
coordinates = self.mouse_frame.iloc[idx, 1:]
coordinates = np.array([coordinates])
if self.transform:
image = self.transform(image)
return (image, coordinates)
# break total dataset into subsets for different transforms
class DatasetSubset(Dataset):
def __init__(self, dataset, transform=None):
self.dataset = dataset
self.transform = transform
def __len__(self):
return len(self.dataset)
def __getitem__(self, index):
# get image
image = self.dataset[index][0]
# transform for input into nn
if self.transform:
image = image.convert('RGB')
image = self.transform(image)
image = image.to(torch.float)
#image = torch.unsqueeze(image, 0)
# get coordinates
coordinates = self.dataset[index][1]
# transform for input into nn
coordinates = coordinates.astype('float').reshape(-1, 2)
coordinates = torch.from_numpy(coordinates)
coordinates = coordinates.to(torch.float)
return (image, coordinates)
# create training / val split
train_split = 0.8
train_count = int(train_split * len(total_dataset))
val_count = int(len(total_dataset) - train_count)
train_subset, val_subset = torch.utils.data.random_split(total_dataset, [train_count, val_count])
# create training / val datasets
train_dataset = DatasetSubset(train_subset, transform = data_transforms['train'])
val_dataset = DatasetSubset(val_subset, transform = data_transforms['val'])
# create train / val dataloaders
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, num_workers=num_workers, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size, num_workers=num_workers)
dataloaders_dict = {}
dataloaders_dict['train'] = train_dataloader
dataloaders_dict['val'] = val_dataloader
My training vs. validation transforms (which are identical for testing purposes):
# Data augmentation and normalization for training
# Just normalization for validation
# required dimensions of input image
input_image_width = 224
input_image_height = 224
# mean and std of RGB pixel intensities
# ImageNet mean [0.485, 0.456, 0.406]
# ImageNet standard deviation [0.229, 0.224, 0.225]
model_mean = [0.485, 0.456, 0.406]
model_std = [0.229, 0.224, 0.225]
data_transforms = {
'train': transforms.Compose([
transforms.Resize((input_image_height, input_image_width)),
transforms.ToTensor(),
transforms.Normalize(model_mean, model_std)
]),
'val': transforms.Compose([
transforms.Resize((input_image_height, input_image_width)),
transforms.ToTensor(),
transforms.Normalize(model_mean, model_std)
]),
}
What I've tried to do is read each frame from an opencv vidcapture object, convert to PIL using this answer, and then infer but the result I'm getting is very different from the simply reading the frame, saving as a .png and then inferring on the .png.
The code that I am testing:
# Standard imports
import cv2
import numpy as np
import torch
import torchvision
from torchvision import models, transforms
from PIL import Image
# load best model for evaluation
BEST_PATH = 'resnet152_best.pt'
model_ft = torch.load(BEST_PATH)
#print(model_ft)
model_ft.eval()
# Data augmentation and normalization for training
# Just normalization for validation
# required dimensions of input image
input_image_width = 224
input_image_height = 224
# mean and std of RGB pixel intensities
# ImageNet mean [0.485, 0.456, 0.406]
# ImageNet standard deviation [0.229, 0.224, 0.225]
model_mean = [0.485, 0.456, 0.406]
model_std = [0.229, 0.224, 0.225]
data_transforms = {
'train': transforms.Compose([
transforms.Resize((input_image_height, input_image_width)),
transforms.ToTensor(),
transforms.Normalize(model_mean, model_std)
]),
'val': transforms.Compose([
transforms.Resize((input_image_height, input_image_width)),
transforms.ToTensor(),
transforms.Normalize(model_mean, model_std)
]),
}
# Read image
cap = cv2.VideoCapture('eval.mp4')
total_frames = cap.get(7)
cap.set(1, 6840)
ret, frame = cap.read()
cv2.imwrite('eval_6840.png', frame)
png_file = 'eval_6840.png'
# eval png
png_image = Image.open(png_file)
png_image = png_image.convert('RGB')
png_image = data_transforms['val'](png_image)
png_image = png_image.to(torch.float)
png_image = torch.unsqueeze(png_image, 0)
print(png_image.shape)
output = model_ft(png_image)
print(output)
# eval frame
vid_image = Image.fromarray(frame)
vid_image = vid_image.convert('RGB')
vid_image = data_transforms['val'](vid_image)
vid_image = vid_image.to(torch.float)
vid_image = torch.unsqueeze(vid_image, 0)
print(vid_image.shape)
output = model_ft(vid_image)
print(output)
This returns:
torch.Size([1, 3, 224, 224])
tensor([[ 0.0229, -0.0990]], grad_fn=<AddmmBackward0>)
torch.Size([1, 3, 224, 224])
tensor([[ 0.0797, -0.2219]], grad_fn=<AddmmBackward0>)
My questions are:
(1) Why is the opencv frame evaluation different from the png file evaluation? All of the transformations appear to be identical (including the RGB conversion per the comments).
(2) How can I make the frame evaluation identical to the png evaluation given that both images are captured from the exact same segment of the video?
Here's a nice fan fact about opencv: it works in BGR space, rather than RGB.
This might be the reason why you have different results processing png images (read via PIL.Image) vs processing video frames (read via opencv).
Posting this answer here in case in helps anyone.
The issue is that: png_image = Image.open(png_file) creates an object of this type: PIL.PngImagePlugin.PngImageFile.
A vidcapture frame, however, creates an object of type: numpy.ndarray. And the conversion step: vid_image = Image.fromarray(frame) creates an object of type: PIL.Image.Image
I tried converting PIL.Image.Image object to a PIL.PngImagePlugin.PngImageFile and vice versa to make them comparable, but it does not seem possible using the PIL method convert. Others seem to have had this issue as well.
So the solution was to convert back and forth between numpy.ndarray types and PIL image types to make use of the transforms functionality in the PIL image library on which pytorch relies. Probably not the most efficient method, but end result is identical input objects and model predictions.
For reference:
# Read image
cap = cv2.VideoCapture('eval.mp4')
total_frames = cap.get(7)
cap.set(1, 6840)
ret, frame = cap.read()
cv2.imwrite('eval_6840.png', frame)
png_file = 'eval_6840.png'
# eval png
png_image = Image.open(png_file)
png_array = np.array(png_image)
png_image = Image.fromarray(png_array)
png_image = data_transforms['val'](png_image)
png_image = png_image.to(torch.float)
png_image = torch.unsqueeze(png_image, 0)
png_image = png_image.to(device)
output = model_ft(png_image)
print(output)
# eval frame
vid_array = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
vid_image = Image.fromarray(vid_array)
vid_image = data_transforms['val'](vid_image)
vid_image = vid_image.to(torch.float)
vid_image = torch.unsqueeze(vid_image, 0)
vid_image = vid_image.to(device)
output = model_ft(vid_image)
print(output)
Yields:
tensor([[ 0.0229, -0.0990]], grad_fn=<AddmmBackward0>)
tensor([[ 0.0229, -0.0990]], grad_fn=<AddmmBackward0>)
I am working on Image Binarization using UNet and have a dataset of 150 images and their binarized versions too. My idea is to augment the images randomly to make them look like they are differentso I have made a function which inserts any of the 4-5 types of Noises, skewness, shearing and so on to an image. I could have easily used
ImageDataGenerator(preprocess_function=my_aug_function) to augment the images but the problem is that my y target is also an image. Also, I could have used something like:
train_dataset = (
train_dataset.map(
encode_single_sample, num_parallel_calls=tf.data.experimental.AUTOTUNE
)
.batch(batch_size)
.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
)
But it has 2 problems:
With larger dataset, it'll blow up the memory as data needs to be already in the memory
This is the crucial part that I need to augment the images on the go to make it look like I have a huge dataset.
Another Solution could be saving augmented images to a directory and making them 30-40K and then loading them. It would be silly thing to do.
Now the idea part is that I can use Sequence as the parent class but How can I keep on augmenting and generating new images on the fly with respective Y binarized images?
I have an idea as the below code. Can somebody help me with the augmentation and generation of y images. I have my X_DIR, Y_DIR where image names for binarised and original are same but stored in different directories.
class DataGenerator(tensorflow.keras.utils.Sequence):
def __init__(self, files_path, labels_path, batch_size=32, shuffle=True, random_state=42):
'Initialization'
self.files = files_path
self.labels = labels_path
self.batch_size = batch_size
self.shuffle = shuffle
self.random_state = random_state
self.on_epoch_end()
def on_epoch_end(self):
'Updates indexes after each epoch'
# Shuffle the data here
def __len__(self):
return int(np.floor(len(self.files) / self.batch_size))
def __getitem__(self, index):
# What do I do here?
def __data_generation(self, files):
# I think this is responsible for Augmentation but no idea how should I implement it and how does it works.
Custom Image Data Generator
load Directory data into dataframe for CustomDataGenerator
def data_to_df(data_dir, subset=None, validation_split=None):
df = pd.DataFrame()
filenames = []
labels = []
for dataset in os.listdir(data_dir):
img_list = os.listdir(os.path.join(data_dir, dataset))
label = name_to_idx[dataset]
for image in img_list:
filenames.append(os.path.join(data_dir, dataset, image))
labels.append(label)
df["filenames"] = filenames
df["labels"] = labels
if subset == "train":
split_indexes = int(len(df) * validation_split)
train_df = df[split_indexes:]
val_df = df[:split_indexes]
return train_df, val_df
return df
train_df, val_df = data_to_df(train_dir, subset="train", validation_split=0.2)
Custom Data Generator
import tensorflow as tf
from PIL import Image
import numpy as np
class CustomDataGenerator(tf.keras.utils.Sequence):
''' Custom DataGenerator to load img
Arguments:
data_frame = pandas data frame in filenames and labels format
batch_size = divide data in batches
shuffle = shuffle data before loading
img_shape = image shape in (h, w, d) format
augmentation = data augmentation to make model rebust to overfitting
Output:
Img: numpy array of image
label : output label for image
'''
def __init__(self, data_frame, batch_size=10, img_shape=None, augmentation=True, num_classes=None):
self.data_frame = data_frame
self.train_len = len(data_frame)
self.batch_size = batch_size
self.img_shape = img_shape
self.num_classes = num_classes
print(f"Found {self.data_frame.shape[0]} images belonging to {self.num_classes} classes")
def __len__(self):
''' return total number of batches '''
self.data_frame = shuffle(self.data_frame)
return math.ceil(self.train_len/self.batch_size)
def on_epoch_end(self):
''' shuffle data after every epoch '''
# fix on epoch end it's not working, adding shuffle in len for alternative
pass
def __data_augmentation(self, img):
''' function for apply some data augmentation '''
img = tf.keras.preprocessing.image.random_shift(img, 0.2, 0.3)
img = tf.image.random_flip_left_right(img)
img = tf.image.random_flip_up_down(img)
return img
def __get_image(self, file_id):
""" open image with file_id path and apply data augmentation """
img = np.asarray(Image.open(file_id))
img = np.resize(img, self.img_shape)
img = self.__data_augmentation(img)
img = preprocess_input(img)
return img
def __get_label(self, label_id):
""" uncomment the below line to convert label into categorical format """
#label_id = tf.keras.utils.to_categorical(label_id, num_classes)
return label_id
def __getitem__(self, idx):
batch_x = self.data_frame["filenames"][idx * self.batch_size:(idx + 1) * self.batch_size]
batch_y = self.data_frame["labels"][idx * self.batch_size:(idx + 1) * self.batch_size]
# read your data here using the batch lists, batch_x and batch_y
x = [self.__get_image(file_id) for file_id in batch_x]
y = [self.__get_label(label_id) for label_id in batch_y]
return tf.convert_to_tensor(x), tf.convert_to_tensor(y)
You can use libraries like albumentations and imgaug, both are good but I have heard there are issues with random seed with albumentations.
Here's an example of imgaug taken from the documentation here:
seq = iaa.Sequential([
iaa.Dropout([0.05, 0.2]), # drop 5% or 20% of all pixels
iaa.Sharpen((0.0, 1.0)), # sharpen the image
iaa.Affine(rotate=(-45, 45)), # rotate by -45 to 45 degrees (affects segmaps)
iaa.ElasticTransformation(alpha=50, sigma=5) # apply water effect (affects segmaps)
], random_order=True)
# Augment images and segmaps.
images_aug = []
segmaps_aug = []
for _ in range(len(input_data)):
images_aug_i, segmaps_aug_i = seq(image=image, segmentation_maps=segmap)
images_aug.append(images_aug_i)
segmaps_aug.append(segmaps_aug_i)
You are going in the right way with the custom generator. In __getitem__, make a batch using batch_x = self.files[index:index+batch_size] and same with batch_y, then augment them using X,y = __data_generation(batch_x, batch_y) which will load images(using any library you like, I prefer opencv), and return the augmented pairs (and any other manipulation).
Your __getitem__ will then return the tuple (X,y)
You can use ImageDataGenerator even if your label is an image.
Here is a simple example of how you can do that:
Code:
# Specifying your data augmentation here for both image and label
image_datagen = tf.keras.preprocessing.image.ImageDataGenerator()
mask_datagen = tf.keras.preprocessing.image.ImageDataGenerator()
# Provide the same seed and keyword arguments to the flow methods
seed = 1
image_generator = image_datagen.flow_from_directory(
data_dir,
class_mode=None,
seed=seed)
mask_generator = mask_datagen.flow_from_directory(
data_dir,
class_mode=None,
seed=seed)
# Combine the image and label generator.
train_generator = zip(image_generator, mask_generator)
Now, if you iterate over it you will get:
for image, label in train_generator:
print(image.shape,label.shape)
break
Output:
(32, 256, 256, 3) (32, 256, 256, 3)
You can use this train_generator with fit() command.
Code:
model.fit_generator(
train_generator,
steps_per_epoch=2000,
epochs=50)
With flow_from_directory your memory won't be cluttered and Imagedatagenerator will take care of the augmentation part.
I have hyperspectral dataset which is a numpy array with dimensions (num_images, height=7, width=7, num_channels=144) and datatype int32.
The label array is (batch_size, num_classes=15). I want to convert it to tf.records and read it back correctly.
So far i have read many blogs and tried a lot of different ways all of which failed. Here is what i have tried?
The problem is the code does not throw an error when i train a model with it but it's accuracy results does not make any sense, when i compare it to the case when i train the model with numpy arrays.
The question is where in the code do i make a mistake?
Do I make any mistakes in my conversion to tfrecords and reading then back?
def wrap_int64(value):
return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
def wrap_bytes(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
def convert(images, labels, save_path, save_name):
"""
:param images: np.ndarray containing images with shape (num_images,
height, width, num_channels)
:param labels: np.ndarray containing labels with shape (num_labels,),
i.e. one_hot=False
:param save_path: path in which we save the tfrecords
:return:
"""
out_path = os.path.join(save_path, save_name)
print("Converting: " + out_path)
assert images.dtype == np.int32
# Number of images
num_images = len(images)
print(num_images)
with tf.python_io.TFRecordWriter(out_path) as writer:
for i in range(num_images):
# Load a single image
img = images[i]
label = labels[i]
# Convert the image to raw bytes.
img_bytes = img.tostring()
image_shape = np.array(np.shape(image)).astype(np.int32)
# Convert the image to raw bytes.
#########################################################
# There is no need to flatten each image!!!
###########################################################
img_bytes = image.tostring()
img_shape_bytes = image_shape.tostring()
# Create a dict with the data we want to save in the
# TFRecords file. You can add more relevant data here.
data = \
{
'image': wrap_bytes(tf.compat.as_bytes(img_bytes)),
'image_shape': wrap_bytes(tf.compat.as_bytes(img_shape_bytes)),
'label': wrap_int64(label)
}
# Wrap the data as TensorFlow Features.
feature = tf.train.Features(feature=data)
# Wrap again as a TensorFlow Example.
example = tf.train.Example(features=feature)
# Serialize the data.
serialized = example.SerializeToString()
# Write the serialized data to the TFRecords file.
writer.write(serialized)
#
def parse(serialized, num_classes, normalization_factor):
features = \
{
'image': tf.FixedLenFeature([], tf.string),
'image_shape': tf.FixedLenFeature([], tf.string),
'label': tf.FixedLenFeature([], tf.int64),
}
# Parse the serialized data so we get a dict with our data.
parsed_example = \
tf.parse_single_example(
serialized=serialized,
features=features)
# Get the image, shape and label as raw bytes.
image_raw = parsed_example['image']
image_shape_raw = parsed_example['image_shape']
label = parsed_example['label']
# Decode the raw bytes so it becomes a tensor with type.
# have to be converted to the exact same datatype as it was before
starting conversion to tfrecords
image = tf.decode_raw(image_raw, tf.int32)
image_shape = tf.decode_raw(image_shape_raw, tf.int32)
# reshape the image back to its original shape
image_reshaped = tf.reshape(image, image_shape)
# let's cast the image to tf.float32 and normalize it. Let's
# change the label to one_hot as well.
image_normed = normalization_factor * tf.cast(image_reshaped, tf.float32)
label_one_hot = tf.one_hot(label, num_classes)
# The image and label are now correct TensorFlow types.
return image_normed, label_one_hot
#
def input_fn(filenames, num_classes, normalization_factor, train, batch_size=1024, prefetch_buffer_size=5):
buffer_size = 10 * batch_size
dataset = tf.data.TFRecordDataset(filenames=filenames)
dataset = dataset.map(lambda x: parse(x, num_classes, normalization_factor))
if train:
dataset = dataset.shuffle(buffer_size=buffer_size)
# Allow infinite reading of the data.
num_repeat = None
else:
num_repeat = 1
# Repeat the dataset the given number of times.
dataset = dataset.repeat(num_repeat)
# Get a batch of data with the given size.
dataset = dataset.batch(batch_size)
dataset = dataset.prefetch(buffer_size=prefetch_buffer_size)
# Create an iterator for the dataset and the above modifications.
iterator = dataset.make_one_shot_iterator()
# Get the next batch of images and labels.
batch_images_tf, batch_labels_tf = iterator.get_next()
return batch_images_tf, batch_labels_tf
you will need to use the tf.train.Feature for example
(assuming your labels are ints)
for the int values:
def _int64_feature(value):
return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
and for the bytes:
def _bytes_feature(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
them:
data = {'label': _int64_feature(label),
'image': _bytes_feature(tf.compat.as_bytes(img_bytes))}
that should do the work
# load training set and test set
batch_images_tf, batch_labels_tf = \
input_fn(
filenames_train,
FLAGS.num_classes,
normalization_factor=FLAGS.normalization_factor,
train=True,
batch_size=FLAGS.batch_size,
prefetch_buffer_size=5)
Wrong way!!!!
batch_images, batch_labels = sess.run(batch_images_tf),sess.run(batch_labels_tf)
Right way!!!!
batch_images, batch_labels = sess.run([batch_images_tf, batch_labels_tf])
I'm trying to write variable size images in TensorFlow's protobuf format with the following code:
img_feature = tf.train.Feature(
bytes_list=tf.train.BytesList(value=[
img.flatten().tostring()]))
# Define how the sequence length is stored
seq_len_feature = tf.train.Feature(
int64_list=tf.train.Int64List(value=[seq_len]))
# Define how the label list is stored
label_list_feature = tf.train.Feature(
int64_list=tf.train.Int64List(value=label_list))
# Define the feature dictionary that defines how the data is stored
feature = {
IMG_FEATURE_NAME: img_feature,
SEQ_LEN_FEATURE_NAME: seq_len_feature,
LABEL_LIST_FEATURE_NAME: label_list_feature}
# Create an example object to store
example = tf.train.Example(
features=tf.train.Features(feature=feature))
Where the images img that I save has a fixed height but variable length.
Now if I want to parse this image with the following code:
# Define how the features are read from the example
features_dict = {
IMG_FEATURE_NAME: tf.FixedLenFeature([], tf.string),
SEQ_LEN_FEATURE_NAME: tf.FixedLenFeature([1], tf.int64),
LABEL_LIST_FEATURE_NAME: tf.VarLenFeature(tf.int64),
}
features = tf.parse_single_example(
serialized_example,
features=features_dict)
# Decode string to uint8 and reshape to image shape
img = tf.decode_raw(features[IMG_FEATURE_NAME], tf.uint8)
img = tf.reshape(img, (self.img_shape, -1))
seq_len = tf.cast(features[SEQ_LEN_FEATURE_NAME], tf.int32)
# Convert list of labels
label_list = tf.cast(features[LABEL_LIST_FEATURE_NAME], tf.int32)
I get the following error:
ValueError: All shapes must be fully defined: [TensorShape([Dimension(28), Dimension(None)]), TensorShape([Dimension(1)]), TensorShape([Dimension(3)])]
Is there a way to store images with variable size (more specifically variable width in my case) and read them with TFRecordReader?
I was able to make it work eventually with the following code to create the protobuf data file:
_, img_png = cv2.imencode('.png', img)
img_png = img_png.tostring()
label_list_feature = [
tf.train.Feature(bytes_list=tf.train.BytesList(value=[label]))
for label in label_list]
img_feature = tf.train.Feature(bytes_list=tf.train.BytesList(
value=[img_png]))
# Define feature for sequence length
seq_len_feature = tf.train.Feature(
int64_list=tf.train.Int64List(value=[seq_len]))
# Feature list that contains list of labels
feature_list = {
LABEL_LIST_FEATURE_NAME: tf.train.FeatureList(
feature=label_list_feature)
}
# Context that contains sequence lenght and image
context = tf.train.Features(feature={
IMG_FEATURE_NAME: img_feature,
SEQ_LEN_FEATURE_NAME: seq_len_feature
})
feature_lists = tf.train.FeatureLists(feature_list=feature_list)
# Add sequence length as context
example = tf.train.SequenceExample(
feature_lists=feature_lists,
context=context)
And the following code to read from the protobuf:
# Sequence length is a context feature
context_features = {
IMG_FEATURE_NAME: tf.FixedLenFeature([], dtype=tf.string),
SEQ_LEN_FEATURE_NAME: tf.FixedLenFeature([], dtype=tf.int64)
}
# Image and target word is a sequence feature
sequence_features = {
LABEL_LIST_FEATURE_NAME: tf.FixedLenSequenceFeature(
[], dtype=tf.string)
}
# Parse the example
context_parsed, sequence_parsed = tf.parse_single_sequence_example(
serialized=serialized_example,
context_features=context_features,
sequence_features=sequence_features
)
seq_len = tf.cast(context_parsed[SEQ_LEN_FEATURE_NAME], tf.int32)
# Process the image
img = context_parsed[IMG_FEATURE_NAME]
img = tf.image.decode_png(img, dtype=tf.uint8, channels=nb_channels)
img = tf.reshape(img, (img_height, -1, nb_channels))
labels = sequence_parsed[LABEL_LIST_FEATURE_NAME]
return img, seq_len, labels
Note: in this example I changes my list of integer labels to a list of string labels (which in my case are more natural). I'm also storing the images a png byte string.
First, I was not able to reproduce the error. The following code works just fine:
import tensorflow as tf
import numpy as np
image_height = 100
img = np.random.randint(low=0, high=255, size=(image_height,200), dtype='uint8')
IMG_FEATURE_NAME = 'image/raw'
with tf.Graph().as_default():
img_feature = tf.train.Feature(
bytes_list=tf.train.BytesList(value=[
img.flatten().tostring()]))
feature = {IMG_FEATURE_NAME: img_feature}
example = tf.train.Example(features=tf.train.Features(feature=feature))
serialized_example = example.SerializeToString()
features_dict = {IMG_FEATURE_NAME: tf.FixedLenFeature([], tf.string)}
features = tf.parse_single_example(serialized_example, features=features_dict)
img_tf = tf.decode_raw(features[IMG_FEATURE_NAME], tf.uint8)
img_tf = tf.reshape(img_tf, (image_height, -1))
with tf.Session() as sess:
img_np = sess.run(img_tf)
print(img_np)
print('Images are identical: %s' % (img == img_np).all())
It outputs:
Images are identical: True
Second, I'd recommend to store images encoded as PNG instead of RAW and read them using tf.VarLenFeature+tf.image.decode_png. It will save you a lot of space and naturally supports variable size images.