I have a function which applies masking operation on the input images as follows:
file_names = glob(os.path.join(IMAGE_DIR, "*.jpg"))
masks_prediction = np.zeros((2000, 2000, len(file_names)))
for i in range(len(file_names)):
print(i)
image = skimage.io.imread(file_names[i])
predictions = model.detect([image], verbose=1)
p = predictions[0]
masks = p['masks']
merged_mask = np.zeros((masks.shape[0], masks.shape[1]))
for j in range(masks.shape[2]):
merged_mask[masks[:,:,j]==True] = True
masks_prediction[:,:,i] = merged_mask
print(masks_prediction.shape)
So basically it reads all the images from the directory, creates a mask for each and runs the detection.
However, since the images are of different sizes, it does not work:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-10-764e6229811a> in <module>()
10 for j in range(masks.shape[2]):
11 merged_mask[masks[:,:,j]==True] = True
---> 12 masks_prediction[:,:,i] = merged_mask
13 print(masks_prediction.shape)
ValueError: could not broadcast input array from shape (1518,1077) into shape (2000,2000)
I was thinking of a way to know the size of each image before the mask operation is applied (before line 12 in the error message), thus passing the exact image shape size correctly for the masking operation.
Is this somehow possible in Python?
EDIT: So apparently people somehow didn't get what I wanted to achieve - although I genuinely believe it was written in a very simple way. Nevertheless here is the entire code (copied from ipython notebook) where the function is located:
import os
import sys
import random
import math
import re
import time
import numpy as np
import tensorflow as tf
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import skimage.draw
# Root directory of the project
ROOT_DIR = os.path.abspath("../../")
# Import Mask RCNN
sys.path.append(ROOT_DIR) # To find local version of the library
from mrcnn import utils
from mrcnn import visualize
from mrcnn.visualize import display_images
import mrcnn.model as modellib
from mrcnn.model import log
from glob import glob
import components
%matplotlib inline
# Directories to be referred
MODEL_DIR = os.path.join(ROOT_DIR, "logs")
IMAGE_DIR = os.path.join(ROOT_DIR, "datasets/components/back/predict")
ANNOTATION_DIR = os.path.join(ROOT_DIR, "datasets/components/front/")
WEIGHTS_PATH = os.path.join(ROOT_DIR, "logs/back/mask_rcnn_components_0100.h5")
config = components.ComponentsConfig()
# Override the training configurations with a few
# changes for inferencing.
class InferenceConfig(config.__class__):
# Run detection on one image at a time
GPU_COUNT = 1
IMAGES_PER_GPU = 1
config = InferenceConfig()
config.display()
# Create model in inference mode
with tf.device(DEVICE):
model = modellib.MaskRCNN(mode="inference", model_dir=MODEL_DIR,
config=config)
# Load weights
print("Loading weights ", WEIGHTS_PATH)
model.load_weights(WEIGHTS_PATH, by_name=True)
file_names = glob(os.path.join(IMAGE_DIR, "*.jpg"))
masks_prediction = np.zeros((2000, 2000, len(file_names)))
for i in range(len(file_names)):
print(i)
image = skimage.io.imread(file_names[i])
predictions = model.detect([image], verbose=1)
p = predictions[0]
masks = p['masks']
merged_mask = np.zeros((masks.shape[0], masks.shape[1]))
for j in range(masks.shape[2]):
merged_mask[masks[:,:,j]==True] = True
masks_prediction[:,:,i] = merged_mask
print(masks_prediction.shape)
dataset = components.ComponentsDataset()
dataset.load_components(ANNOTATION_DIR, "predict")
accuracy = 0
precision = 0
for image_id in range(len(dataset.image_info)):
name = dataset.image_info[image_id]['id']
file_name = os.path.join(IMAGE_DIR, name)
image_id_pred = file_names.index(file_name)
merged_mask = masks_prediction[:, :, image_id_pred]
annotated_mask = dataset.load_mask(image_id)[0]
merged_annotated_mask = np.zeros((510, 510))
for i in range(annotated_mask.shape[2]):
merged_annotated_mask[annotated_mask[:,:,i]==True] = True
accuracy += np.sum(merged_mask==merged_annotated_mask) / (1200 * 1600)
all_correct = np.sum(merged_annotated_mask[merged_mask == 1])
precision += all_correct / (np.sum(merged_mask))
print('accuracy:{}'.format(accuracy / len(file_names)))
print('precision:{}'.format(precision / len(file_names)))
file_names = glob(os.path.join(IMAGE_DIR, "*.jpg"))
class_names = ['BG', 'screw', 'lid']
test_image = skimage.io.imread(file_names[random.randint(0,len(file_names)-1)])
predictions = model.detect([test_image], verbose=1) # We are replicating the same image to fill up the batch_size
p = predictions[0]
visualize.display_instances(test_image, p['rois'], p['masks'], p['class_ids'],
class_names, p['scores'])
The image is just a numpy array. So to answer your question "is it possible to know the size of each image": Yes, simply use the shape of the image.
If you are working on many images of different sizes, it might make sense to resize them to a uniform resolution.
skimage has a built-in functionality for that, the skimage.transform.resize method.
Look at the docs here.
If you use resize, you should make sure that no artifacts are introduced to your images. Check the result of the resizing operation before you use it.
The resize of skimage is fairly slow. If you need more performance, you could use opencv. They have a great python API and since there is a conda package, installation has become really easy.
resized_images = []
file_names = glob(os.path.join(IMAGE_DIR, "*.jpg"))
for i in range(len(file_names)):
print("Resizing: " + str(i))
image = skimage.io.imread(file_names[i])
image_resized = resize(image, (1200, 800),anti_aliasing=True)
resized_images.append(image_resized)
Related
I'm using Midas very like in Huggingface's demo.
My issue is that the RAM usage increase at each depth map computation.
Here is the full code.
#!venv/bin/python3
from pathlib import Path
import psutil
import numpy as np
import torch
import cv2
def make_model():
model_type = "DPT_BEiT_L_512" # MiDaS v3.1 - Large
midas = torch.hub.load("intel-isl/MiDaS", model_type)
device = torch.device("cuda")
midas.to(device)
midas.eval()
midas_transforms = torch.hub.load("intel-isl/MiDaS", "transforms")
transform = midas_transforms.dpt_transform
return {"transform": transform,
"device": device,
"midas": midas
}
def inference(cv_image, model):
"""Make the inference."""
transform = model['transform']
device = model["device"]
midas = model["midas"]
input_batch = transform(cv_image).to(device)
with torch.no_grad():
prediction = midas(input_batch)
prediction = torch.nn.functional.interpolate(
prediction.unsqueeze(1),
size=cv_image.shape[:2],
mode="bilinear",
align_corners=False,
).squeeze()
output = prediction.cpu().numpy()
formatted = (output * 255 / np.max(output)).astype('uint8')
return formatted
# Create Midas "DPT_BEiT_L_512" - MiDaS v3.1 - Large
model = make_model()
image_dir = Path('.') / "all_images"
for image_file in image_dir.iterdir():
ram_usage = psutil.virtual_memory()[2]
print("image", ram_usage)
cv_image = cv2.imread(str(image_file))
_ = inference(cv_image, model)
In short:
Create the model "DPT_BEiT_L_512"
Define the function inference
loop over the images in the directory all_images
for each: cv2.imread
compute the depthmap (do not keep the result in memory)
I see that the RAM usage keeps raising over and over.
Variation:
I've tried to read only one image (so only one cv2.imread) and, in the loop, only add a random noise on that image. Up to random noise, the inference function always receive the same image.
In that case, the RAM usage is stable.
QUESTIONS:
Where does the memory leak come from ?
Do I have to "reset" something between two inferences ?
EDIT some variations
variation 1: always the same image
replace the iterdir loop by this:
cv_image = cv2.imread("image.jpg")
for i in range(1, 100):
ram_usage = psutil.virtual_memory()[2]
print(i, ram_usage)
_ = get_depthmap(cv_image, model)
Here you get no memory leak.
Variation 2: do not compute the depth map
for image_file in image_dir.iterdir():
ram_usage = psutil.virtual_memory()[2]
print("image", ram_usage)
cv_image = cv2.imread(str(image_file))
# _ = get_depthmap(cv_image, model)
The memory leak does not occurs.
I deduce that cv2.imread itself does not makes the leak.
Variation 3: same image, random noise:
cv_image = cv2.imread("image.jpg")
for i in range(1, 100):
ram_usage = psutil.virtual_memory()[2]
print(i, ram_usage)
noise = np.random.randn(
cv_image.shape[0], cv_image.shape[1], cv_image.shape[2]) * 20
noisy_img = cv_image + noise
noisy_img = np.clip(noisy_img, 0, 255)
_ = get_depthmap(noisy_img, model)
No leak in this version.
Im trying to read images from folders into a dataframe , where each row in the dataframe is all the images for a folder :
import cv2
import os,glob
import matplotlib.pylab as plt
from os import listdir,makedirs
from os.path import isfile,join
import pandas as pd
import PIL
import numpy as np
from scipy.ndimage import imread
pth = 'C:/Users/Documents/myfolder/'
folders = os.listdir(pth)
videos = pd.DataFrame()
for folder in folders:
pth_upd = pth + folder + '/'
allfiles = os.listdir(pth_upd)
files = []
columns = ['data']
index = [folders]
for file in allfiles:
files.append(file) if ('.bmp' in file) else None
samples = np.empty((0,64,64))
for file in files:
img = cv2.imread(os.path.join(pth_upd,file),cv2.IMREAD_GRAYSCALE)
img = img.reshape(1,64,64)
samples = np.append(samples, img, axis=0)
result = pd.DataFrame([samples], index=[folder], columns=['videos'])
videos = videos.append(result)
after reading all the images in each folder into (samples array ) how can I insert images for each folder in a dataframe row
ValueError Traceback (most recent call last)
in
17 samples = np.append(samples, img, axis=0)
18
---> 19 result = pd.DataFrame([samples], index=[folder], columns=['videos'])
20 videos = videos.append(result)
ValueError: Must pass 2-d input
:
It's certainly possible to put strings of the resized images into pandas, but there are much better ways to accomplish CNN training. I adapted your image processing code to show how you could do what you asked:
import io
import pandas as pd
import numpy as np
import sklearn
import requests
import tempfile
import os
import cv2
# Image processing for the df
def process_imgfile(x):
img = cv2.imread(os.path.join(
x.Folder, x.image),cv2.IMREAD_GRAYSCALE)
img = cv2.resize(img, (64, 64))
img = str(img)
return img
# Simulate folders with images in them
with tempfile.TemporaryDirectory() as f:
f1 = os.path.join(f, "Folder1")
f2 = os.path.join(f, "Folder2")
os.mkdir(f1)
os.mkdir(f2)
print(r.status_code)
for x in range(20):
with open(os.path.join(f1, "f1-{}.jpg".format(x)), "wb") as file1, open(
os.path.join(f2, "f2-{}.jpg".format(x)), "wb") as file2:
r = requests.get(
'https://upload.wikimedia.org/wikipedia/en/a/a9/Example.jpg',
stream=True)
for chunk in r.iter_content(16): # File writing...
file1.write(chunk)
file2.write(chunk)
result = [x for x in os.walk(f)]
folder1 = result[1][2]
folder2 = result[2][2]
# Generate dataframe data
j = {"Folder":[], "image":[]}
for x in folder1:
j["Folder"].append(result[1][0])
j["image"].append(x)
for x in folder2:
j["Folder"].append(result[2][0])
j["image"].append(x)
# Use the process_imgfile function to append image data
df = pd.DataFrame(j)
df["imgdata"] = df.apply(process_imgfile, axis=1)
But on a large set of images this is not going to work. Instead, check out ImageDataGenerator which can let you load images at train and test time. It can also help you apply augmentation or synthesize data.
I have a code which uses a list of image URLs from a CSV file and then performs face detection on those images after which it loads some models and does predictions on those images.
I did some load tests and found that the get_face function in the code takes a major chunk of the time required to produce the results and the extra time is taken by the pickle file created for predictions.
Question: Is there a possibility that by running these processes in threads, time can be reduced and also how this can be done in a multi threading way?
Here is the code example:
from __future__ import division
import numpy as np
from multiprocessing import Process, Queue, Pool
import os
import pickle
import pandas as pd
import dlib
from skimage import io
from skimage.transform import resize
df = pd.read_csv('/home/instaurls.csv')
detector = dlib.get_frontal_face_detector()
img_width, img_height = 139, 139
confidence = 0.8
def get_face():
output = None
data1 = []
for row in df.itertuples():
img = io.imread(row[1])
dets = detector(img, 1)
for i, d in enumerate(dets):
img = img[d.top():d.bottom(), d.left():d.right()]
img = resize(img, (img_width, img_height))
output = np.expand_dims(img, axis=0)
break
data1.append(output)
data1 = np.concatenate(data1)
return data1
get_face()
csv sample
data
https://scontent-frt3-2.cdninstagram.com/t51.2885-19/s320x320/23101834_1502115223199537_1230866541029883904_n.jpg
https://scontent-frt3-2.cdninstagram.com/t51.2885-19/s320x320/17883193_940000882769400_8455736118338387968_a.jpg
https://scontent-frt3-2.cdninstagram.com/t51.2885-19/s320x320/22427207_1737576603205281_7879421442167668736_n.jpg
https://scontent-frt3-2.cdninstagram.com/t51.2885-19/s320x320/12976287_1720757518213286_1180118177_a.jpg
https://scontent-frt3-2.cdninstagram.com/t51.2885-19/s320x320/23101834_1502115223199537_1230866541029883904_n.jpg
https://scontent-frx5-1.cdninstagram.com/t51.2885-19/s320x320/16788491_748497378632253_566270225134125056_a.jpg
https://scontent-frx5-1.cdninstagram.com/t51.2885-19/s320x320/21819738_128551217878233_9151523109507956736_n.jpg
https://scontent-frx5-1.cdninstagram.com/t51.2885-19/s320x320/14295447_318848895135407_524281974_a.jpg
https://scontent-frx5-1.cdninstagram.com/t51.2885-19/s320x320/18160229_445050155844926_2783054824017494016_a.jpg
https://scontent-frt3-2.cdninstagram.com/t51.2885-19/s320x320/23101834_1502115223199537_1230866541029883904_n.jpg
https://scontent-frt3-2.cdninstagram.com/t51.2885-19/s320x320/17883193_940000882769400_8455736118338387968_a.jpg
https://scontent-frt3-2.cdninstagram.com/t51.2885-19/s320x320/22427207_1737576603205281_7879421442167668736_n.jpg
https://scontent-frt3-2.cdninstagram.com/t51.2885-19/s320x320/12976287_1720757518213286_1180118177_a.jpg
https://scontent-frt3-2.cdninstagram.com/t51.2885-19/s320x320/23101834_1502115223199537_1230866541029883904_n.jpg
https://scontent-frx5-1.cdninstagram.com/t51.2885-19/s320x320/16788491_748497378632253_566270225134125056_a.jpg
https://scontent-frx5-1.cdninstagram.com/t51.2885-19/s320x320/21819738_128551217878233_9151523109507956736_n.jpg
https://scontent-frx5-1.cdninstagram.com/t51.2885-19/s320x320/14295447_318848895135407_524281974_a.jpg
https://scontent-frx5-1.cdninstagram.com/t51.2885-19/s320x320/18160229_445050155844926_2783054824017494016_a.jpg
https://scontent-frt3-2.cdninstagram.com/t51.2885-19/s320x320/23101834_1502115223199537_1230866541029883904_n.jpg
Here is how you could try to do it in parallel:
from __future__ import division
import numpy as np
from multiprocessing import Process, Queue, Pool
import os
import pickle
import pandas as pd
import dlib
from skimage import io
from skimage.transform import resize
from csv import DictReader
df = DictReader(open('/home/instaurls.csv')) # DictReader is iterable
detector = dlib.get_frontal_face_detector()
img_width, img_height = 139, 139
confidence = 0.8
def get_face(row):
"""
Here row is dictionary where keys are CSV header names
and values are values from current CSV row.
"""
output = None
img = io.imread(row[1]) # row[1] has to be changed to row['data']?
dets = detector(img, 1)
for i, d in enumerate(dets):
img = img[d.top():d.bottom(), d.left():d.right()]
img = resize(img, (img_width, img_height))
output = np.expand_dims(img, axis=0)
break
return output
if __name__ == '__main__':
pool = Pool() # default to number CPU cores
data = list(pool.imap(get_face, df))
print np.concatenate(data)
Pay attention to get_face and argument that it has. Also, to what it returns. This is what I meant when I said smaller chunks of work. Now get_face processes one row from CSV.
When you run this script, pool will be a reference to a instance of a Pool and you then call get_face for each row/tuple in df.itertuples().
After everything is done, data holds processing data and then you do np.concatenate on it.
I have downloaded Caltech101. Its structure is:
#Caltech101 dir
#class1 dir
#images of class1 jpgs
#class2 dir
#images of class2 jpgs
...
#class100 dir
#images of class100 jpgs
My problem is that I can't keep in memory two np arrays x and y of shape (9144, 240, 180, 3) and (9144). So my solution is to overallocate a h5py dataset, load them in 2 chunks and write them to file one after the other. Precisely:
from __future__ import print_function
import os
import glob
from scipy.misc import imread, imresize
from sklearn.utils import shuffle
import numpy as np
import h5py
from time import time
def load_chunk(images_dset, labels_dset, chunk_of_classes, counter, type_key, prev_chunk_length):
# getting images and processing
xtmp = []
ytmp = []
for label in chunk_of_classes:
img_list = sorted(glob.glob(os.path.join(dir_name, label, "*.jpg")))
for img in img_list:
img = imread(img, mode='RGB')
img = imresize(img, (240, 180))
xtmp.append(img)
ytmp.append(label)
print(label, 'done')
x = np.concatenate([arr[np.newaxis] for arr in xtmp])
y = np.array(ytmp, dtype=type_key)
print('x: ', type(x), np.shape(x), 'y: ', type(y), np.shape(y))
# writing to dataset
a = time()
images_dset[prev_chunk_length:prev_chunk_length+x.shape[0], :, :, :] = x
print(labels_dset.shape)
print(y.shape, y.shape[0])
print(type(y), y.dtype)
print(prev_chunk_length)
labels_dset[prev_chunk_length:prev_chunk_length+y.shape[0]] = y
b = time()
print('Chunk', counter, 'written in', b-a, 'seconds')
return prev_chunk_length+x.shape[0]
def write_to_file(remove_DS_Store):
if os.path.isfile('caltech101.h5'):
print('File exists already')
return
else:
# the name of each dir is the name of a class
classes = os.listdir(dir_name)
if remove_DS_Store:
classes.pop(0) # removes .DS_Store - may not be used on other terminals
# need the dtype of y in order to initialize h5 dataset
s = ''
key_type_y = s.join(['S', str(len(max(classes, key=len)))])
classes = np.array(classes, dtype=key_type_y)
# number of chunks in which the dataset must be divided
nb_chunks = 2
nb_chunks_loaded = 0
prev_chunk_length = 0
# open file and allocating a dataset
f = h5py.File('caltech101.h5', 'a')
imgs = f.create_dataset('images', shape=(9144, 240, 180, 3), dtype='uint8')
labels = f.create_dataset('labels', shape=(9144,), dtype=key_type_y)
for class_sublist in np.array_split(classes, nb_chunks):
# loading chunk by chunk in a function to avoid memory overhead
prev_chunk_length = load_chunk(imgs, labels, class_sublist, nb_chunks_loaded, key_type_y, prev_chunk_length)
nb_chunks_loaded += 1
f.close()
print('Images and labels saved to \'caltech101.h5\'')
return
dir_name = '../Datasets/Caltech101'
write_to_file(remove_DS_Store=True)
This works quite well, and also reading is actually fast enough. The problem is that I need to shuffle the dataset.
Observations:
Shuffling the dataset objects: obviously veeeery slow because they're on disk.
Creating an array of shuffled indices and use advanced numpy indexing. This means slower reading from file.
Shuffling before writing to file would be nice, problem: I have only about half of the dataset in memory each time. I would get an improper shuffling.
Can you think of a way to shuffle before writing? I'm open also to solutions which rethink the writing process, as long as it doesn't use a lot of memory.
You could shuffle the file paths before reading the image data.
Instead of shuffling the image data in memory, create a list of all file paths that belong to the dataset. Then shuffle the list of file paths. Now you can create your HDF5 database as before.
You could for example use glob to create the list of files for shuffling:
import glob
import random
files = glob.glob('../Datasets/Caltech101/*/*.jpg')
shuffeled_files = random.shuffle(files)
You could then retrieve the class label and image name from the path:
import os
for file_path in shuffeled_files:
label = os.path.basename(os.path.dirname(file_path))
image_id = os.path.splitext(os.path.basename(file_path))[0]
I'm tryint to build an application that classifies different objects. I have a training folder with a bunch of images i want to use as training for my SVM.
Up untill now I have followed this (GREAT) answer:
using OpenCV and SVM with images
here is a sample of my code:
def getTrainingData():
address = "..//data//training"
labels = []
trainingData = []
for items in os.listdir(address):
## extracts labels
name = address + "//" + items
for it in os.listdir(name):
path = name + "//" + it
print path
img = cv.imread(path, cv.CV_LOAD_IMAGE_GRAYSCALE)
d = np.array(img, dtype = np.float32)
q = d.flatten()
trainingData.append(q)
labels.append(items)
######DEBUG######
#cv.namedWindow(path,cv.WINDOW_NORMAL)
#cv.imshow(path,img)
return trainingData, labels
svm_params = dict( kernel_type = cv.SVM_LINEAR,
svm_type = cv.SVM_C_SVC,
C=2.67, gamma=3 )
training, labels = getTrainingData()
train = np.asarray(training)
svm = cv.SVM()
svm.train(train, labels, params=svm_params)
svm.save('svm_data.dat')
But when i try to run i recieve the following error:
svm.train(train, labels, params=svm_params)
TypeError: trainData data type = 17 is not supported
What am i doing wrong?
Thanks A lot!
You should resize your input images. like this:
img = cv2.resize(img, (64,64))
Size is up to you.