How to load a large image dataset efficiently? - python

I am trying to work on an image colorizer using autoencoders. The 'input' is a grayscale image and the 'labels' are their corresponding color images. I am trying to get it to work on google colab on a subset of the dataset. The problem is that the session crashes when I try to convert the list of images to a numpy array.
Here's what I tried:
X = []
y = []
errors = 0
count = 0
image_path = 'drive/My Drive/datasets/imagenette'
for file in tqdm(os.listdir(image_path)):
try:
# Load, transform image
color_image = io.imread(os.path.join(image_path,file))
color_image = transform.resize(color_image,(224,224))
lab_image = color.rgb2lab(color_image)
L = lab_image[:,:,0]/100.
ab = lab_image[:,:,1:]/128.
# Append to list
gray_scale_image = np.stack((L,L,L), axis=2)
X.append(gray_scale_image)
y.append(ab)
count += 1
if count == 5000:
break
except:
errors += 1
print(f'Errors encountered: {errors}')
X = np.array(X)
y = np.array(y)
print(X.shape)
print(y.shape)
Is there a way to load only a few batches, feed them and while the model is being trained on them, load another set of batches?

Related

Why converting npy files (containing video frames) to tfrecords consumes too much disk space?

I am working on a violence detection service. I am trying to develop software based on the code in this repo. My dataset consists of videos resided in two directories "Violence" and "Non-Violence".
I used this code to generate npy files out of RGB channels and optical flow features. The output of this part would be 2 folders containing npy array with 244x244x5 shape. (np.float32 dtype). so it's like I have video frames in RGB in the first 3 channels (npy[...,:3]) and optical flow features in the next two channels (npy[..., 3:]).
Now I am trying to convert them to tfrecords and use tf.data.tfrecorddataset to speed up the training process. Since my model input has to be a cube tensor, my training elements has to be 64 frames of each video. It means the data point shape has to be 64x244x244x5.
So I used this code to convert the npy files to tfrecords.
from pathlib import Path
from os.path import join
import tensorflow as tf
import numpy as np
import cv2
from tqdm import tqdm
def normalize(data):
mean = np.mean(data)
std = np.std(data)
return (data - mean) / std
def random_flip(video, prob):
s = np.random.rand()
if s < prob:
video = np.flip(m=video, axis=2)
return video
def color_jitter(video):
# range of s-component: 0-1
# range of v component: 0-255
s_jitter = np.random.uniform(-0.2, 0.2)
v_jitter = np.random.uniform(-30, 30)
for i in range(len(video)):
hsv = cv2.cvtColor(video[i], cv2.COLOR_RGB2HSV)
s = hsv[..., 1] + s_jitter
v = hsv[..., 2] + v_jitter
s[s < 0] = 0
s[s > 1] = 1
v[v < 0] = 0
v[v > 255] = 255
hsv[..., 1] = s
hsv[..., 2] = v
video[i] = cv2.cvtColor(hsv, cv2.COLOR_HSV2RGB)
return video
def uniform_sample(video: str, target_frames: int = 64) -> np.ndarray:
"""
gets video and outputs n_frames number of frames in video.
Args:
video:
target_frames:
Returns:
"""
len_frames = int(len(data))
interval = int(np.ceil(len_frames / target_frames))
# init empty list for sampled video and
sampled_video = []
for i in range(0, len_frames, interval):
sampled_video.append(video[i])
# calculate number of padded frames and fix it
num_pad = target_frames - len(sampled_video)
if num_pad > 0:
padding = [video[i] for i in range(-num_pad, 0)]
sampled_video += padding
return np.array(sampled_video, dtype=np.float32)
def _int64_feature(value):
return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
def _bytes_feature(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
if __name__ == '__main__':
path = Path('transformed/')
npy_files = list(path.rglob('*.npy'))[:100]
aug = True
# one_hots = to_categorical(range(2), dtype=np.int8)
path_to_save = 'data_tfrecords'
tfrecord_path = join(path_to_save, 'all_data.tfrecord')
with tf.io.TFRecordWriter(tfrecord_path) as writer:
for file in tqdm(npy_files, desc='files converted'):
# load npy files
npy = np.load(file.as_posix(), mmap_mode='r')
data = np.float32(npy)
del npy
# Uniform sampling
data = uniform_sample(data, target_frames=64)
# Add augmentation
if aug:
data[..., :3] = color_jitter(data[..., :3])
data = random_flip(data, prob=0.5)
# Normalization
data[..., :3] = normalize(data[..., :3])
data[..., 3:] = normalize(data[..., 3:])
# Label one hot encoding
label = 1 if file.parent.stem.startswith('F') else 0
# label = one_hots[label]
feature = {'image': _bytes_feature(tf.compat.as_bytes(data.tobytes())),
'label': _int64_feature(int(label))}
example = tf.train.Example(features=tf.train.Features(feature=feature))
writer.write(example.SerializeToString())
The code works fine, but the real problem is that it consumes too much disk drive. my whole dataset consisting of 2000 videos takes 12 GB, when I converted them to npy files, it became around 80 GB, and now using tfrecords It became over 120 GB or so. How can I convert them in an efficient way to reduce the space required to store them?
The answer might be too late. But I see you are still saving the video frame in your tfrecords file.
Try removing the "image" feature from your features list. And saving per frame as their Height, Width, Channels, and so forth.
feature = {'label': _int64_feature(int(label))}
Which is why the file is taking more space.

How to label training data for CNN?

I'm trying to design an neural network that predicts a photo image based on 5 distinct film stock. I have 5000 total images. 4000 training and 1000 testing. I've stored images into two sub-folders for training and testing data.
training_dir = r'C:\...\Training Set'
test_dir = r'C:\...\Test Set'
I'm able to collect the training images using skimage io.ImageCollection.
folders = []
for image_path in os.scandir(training_dir):
img = io.ImageCollection(os.path.join(training_dir, image_path, '*.jpg'))
folders.append(img)
I then collect the training images based on class and apply a loop to save image data into a list.
ektachrome = folders[0]
HP5 = folders[1]
LomoP = folders[2]
Trix = folders[3]
velvia = folders[4]
images = []
for i in range(0, 800):
ekta = ektachrome[i]
images.append(ekta)
hp5 = HP5[i]
images.append(hp5)
lomo = LomoP[i]
images.append(lomo)
trix = Trix[i]
images.append(trix)
Velvia = velvia[i]
images.append(Velvia)
When I put the list of training images into an array np.asarray(images).shape, I get a shape of (4000,). I'm having trouble labeling the data. Here are my labels.
label = {'Ektachrome':1, 'HP5':2, 'Lomochrome Purple':3, 'Tri-X':4, 'Velvia 50':5}
How do I label my images?
As per my understanding, your Images should be Categorized based on the Class Names, as shown below (since you've mentioned Folders[0], Folders[1], etc.):
In that case, you can use the below code for Labelling the Images.
Ektachrome_dir = train_dir / 'Ektachrome'
HP5_dir = train_dir / 'HP5'
Lomochrome_Purple_dir = train_dir / 'Lomochrome Purple'
Tri_X_dir = train_dir / 'Tri-X'
Velvia_50_dir = train_dir / 'Velvia 50'
# Get the list of all the images
Ektachrome_Images = Ektachrome_dir.glob('*.jpeg')
HP5_Images = HP5_dir.glob('*.jpeg')
Lomochrome_Purple_Images = Lomochrome_Purple_dir.glob('*.jpeg')
Tri_X_Images = Tri_X_dir.glob('*.jpeg')
Velvia_50_Images = Velvia_50_dir.glob('*.jpeg')
# An empty list. We will insert the data into this list in (img_path, label) format
train_data = []
for img in Ektachrome_Images:
train_data.append((img,1))
for img in HP5_Images:
train_data.append((img, 2))
for img in Lomochrome_Purple_Images:
train_data.append((img, 3))
for img in Tri_X_Images:
train_data.append((img, 4))
for img in Velvia_50_Images:
train_data.append((img, 5))

Out of memory converting image files to numpy array

I'm trying to run a loop that iterates through an image folder and returns two numpy arrays: x - stores the image as a numpy array y - stores the label.
A folder can easily have over 40.000 rgb images, with dimensions (224,224).
I have around 12Gb of memory but after some iterations, the used memory just spikes up and everything stops.
What can I do to fix this issue?
def create_set(path, quality):
x_file = glob.glob(path + '*')
x = []
for i, img in enumerate(x_file):
image = cv2.imread(img, cv2.IMREAD_COLOR)
x.append(np.asarray(image))
if i % 50 == 0:
print('{} - {} images processed'.format(path, i))
x = np.asarray(x)
x = x/255
y = np.zeros((x.shape[0], 2))
if quality == 0:
y[:,0] = 1
else:
y[:,1] = 1
return x, y
You just can't load that many images into memory. You're trying to load every file in a given path to memory, by appending them to x.
Try processing them in batches, or if you're doing this for a tensorflow application try writing them to .tfrecords first.
If you want to save some memory, leave the images as np.uint8 rather than casting them to float (which happens automatically when you normalise them in this line > x = x/255)
You also don't need np.asarray in your x.append(np.asarray(image)) line. image is already an array. np.asarray is for converting lists, tuples, etc to arrays.
edit:
a very rough batching example:
def batching function(imlist, batchsize):
ims = []
batch = imlist[:batchsize]
for image in batch:
ims.append(image)
other_processing()
new_imlist = imlist[batchsize:]
return x, new_imlist
def main():
imlist = all_the_globbing_here()
for i in range(total_files/batch_size):
ims, imlist = batching_function(imlist, batchsize)
process_images(ims)

Python numpy ValueError when I load image data and transfer it to an array

I couldn't find the solution.My image shape is 128*128*3,it has three channel,but it also cause the error
File "E:/ML/keras_test/vgg.py", line 30, in load_data data[i,:,:,:] = arr
ValueError: could not broadcast input array from shape (128,128) into
shape (128,128,3)
My code as below:
def load_data(path):
data = np.empty((12755,128,128,3),dtype="uint8")
label = np.empty((12755,),dtype="uint8")
imgs = []
imgs_name = []
for each_person in os.listdir(path):
temp = os.path.join(path,each_person)
for each_image in os.listdir(temp):
imgs.append(temp + "\\" + each_image)
imgs_name.append(each_image)
num = len(imgs)
for i in range(num):
img = Image.open(imgs[i])
arr = np.asarray(img,dtype="uint8")
print arr.shape
data[i,:,:,:] = arr
label[i] = int(imgs_name[i].split('.')[0])
print 'load_data is ok!' + str(data.shape[0])
return data,label
You are trying to put the original data into a small package, which is impossible. I think you are trying to transfer a image which has RGB channel into a gray scale image which has one channel. Try
datum = (imgs.sum(axis=2) / 3).reshape((12755, -1))
The resulting datum is a 12755 x 16384 array.

SVM with openCV & Python

I'm tryint to build an application that classifies different objects. I have a training folder with a bunch of images i want to use as training for my SVM.
Up untill now I have followed this (GREAT) answer:
using OpenCV and SVM with images
here is a sample of my code:
def getTrainingData():
address = "..//data//training"
labels = []
trainingData = []
for items in os.listdir(address):
## extracts labels
name = address + "//" + items
for it in os.listdir(name):
path = name + "//" + it
print path
img = cv.imread(path, cv.CV_LOAD_IMAGE_GRAYSCALE)
d = np.array(img, dtype = np.float32)
q = d.flatten()
trainingData.append(q)
labels.append(items)
######DEBUG######
#cv.namedWindow(path,cv.WINDOW_NORMAL)
#cv.imshow(path,img)
return trainingData, labels
svm_params = dict( kernel_type = cv.SVM_LINEAR,
svm_type = cv.SVM_C_SVC,
C=2.67, gamma=3 )
training, labels = getTrainingData()
train = np.asarray(training)
svm = cv.SVM()
svm.train(train, labels, params=svm_params)
svm.save('svm_data.dat')
But when i try to run i recieve the following error:
svm.train(train, labels, params=svm_params)
TypeError: trainData data type = 17 is not supported
What am i doing wrong?
Thanks A lot!
You should resize your input images. like this:
img = cv2.resize(img, (64,64))
Size is up to you.

Categories