I am new to python and Machine Learning. I have a huge image dataset of cars having more than 27000 images and labels. I am trying to create a dataset so I can use it in my training classifier, but ofcourse handling this amount of data will be a real pain for the Memory, and that's where I am stuck. At first I was trying to do something like this.
import os
import matplotlib.pyplot as plt
import matplotlib.image as mpg
import cv2
import gc
import numpy as np
from sklearn.preprocessing import normalize
import gc
import resource
import h5py
bbox = "/run/media/fdai5182/LAMAMADAN/Morethan4000samples/data/labels"
imagepath = "/run/media/fdai5182/LAMAMADAN/Morethan4000samples/data/image"
training_data = []
training_labels = []
count = 0
for root, _, files in os.walk(bbox):
cdp = os.path.abspath(root)
for rootImage , _ , fileImage in os.walk(imagepath):
cdpimg = os.path.abspath(r)
for f in files:
ct = 0
name,ext = os.path.splitext(f)
for fI in fileImage:
n , e = os.path.splitext(fI)
if name == n and ext == ".txt" and e == ".jpg":
cip = os.path.join(cdp,f)
cipimg = os.path.join(cdpimg,fI)
txt = open(cip,"r")
for q in txt:
ct = ct + 1
if ct == 3:
x1 = int(q.rsplit(' ')[0])
y1 = int(q.rsplit(' ')[1])
x2 = int(q.rsplit(' ')[2])
y2 = int(q.rsplit(' ')[3])
try:
read_img = mpg.imread(cipimg)
read_img = read_img.astype('float32')
read_img_bbox = read_img[y1:y2, x1:x2,:]
resize_img = cv2.cv2.resize(read_img_bbox,(300,300))
resize_img /= 255.0
training_labels.append(int(cipimg.split('\\')[4]))
training_data.append(resize_img)
print("len Of Training_data",len(training_data))
training_labels.append(int(cipimg.split('/')[8]))
del resize_img
print("len Of Training Labels", len(training_labels))
gc.collect()
except Exception as e:
print("Error",str(e), cip)
count = count + 1
print(count)
txt.flush()
txt.close()
np.save('/run/media/fdai5182/LAMA MADAN/Training_Data_4000Samples',training_data)
np.save('/run/media/fdai5182/LAMA MADAN/Training_Labels_4000Samples',training_labels)
print("DONE")
But it always gives me a huge Memory error after reading images even on 32gb RAM.
So, for that I want to do some other steps which may be useful taking less memory and get this working.
The Steps I want to do are as follows.
allocate np array X of shape N,150,150,3/300,300,3 of type
float32 (not astype)
iterate through images and fill each row of array X with 150,150,3 image pixels
normalize in-place: X /= 255
Write in file (.npy format)
What I did till now is
import cv2
import matplotlib.pyplot as plt
import matplotlib.iamge as mpg
import numpy as np
bbox = "/run/media/fdai5182/LAMAMADAN/Morethan4000samples/data/labels"
imagepath = "/run/media/fdai5182/LAMAMADAN/Morethan4000samples/data/image"
for root, _, files in os.walk(bbox):
cdp = os.path.abspath(root)
for rootImage, _, fileImage in os.walk(imagepath):
cdpimg = os.path.abspath(rootImage)
for f in files:
ct = 0
name,ext = os.path.splitext(f)
for fI in fileImage:
n , e = os.path.splitext(fI)
if name == n and ext == ".txt" and e == ".jpg":
nparrayX = np.zeros((150,150,3)).view('float32')
cip = os.path.join(cdp,f)
cipImg = os.path.join(cdpimg,fI)
read_image = mpg.imread(cip)
resize_image = cv2.cv2.resize(read_image,(150,150))
Am I on the right path?
Also, How can I fill each row of imageformat with 150,150,3 image pixels. I don't want to use list anymore as they take more Memory and time consuming.
Please help me through this.
Also, as a new member if the question is not obeying the rules and regulations of StackOverflow please tell me and I will edit it more.
Thank you,
Both tensorflow/keras and pytorch provide data set / generator classes, which you can use to construct memory efficient data loaders.
For tensorflow/keras there is an excellent tutorial created by Stanford's Shervine Amidi.
For pytorch you find a good tutorial on the project's man page.
I would strongly suggest to make use of these frameworks for your implementation since they allow you to avoid writing boiler-plate code and make your training scalable.
Thank you for your help . But I wanted to do it manually to check How can we do it without using other generators. Below is my Code.
import cv2
import matplotlib.pyplot as plt
import matplotlib.image as mpg
import numpy as np
import os
N = 0
training_labels = []
bbox = "D:/Morethan4000samples/data/labels"
imagepath = "D:/Morethan4000samples/data/image/"
for root, _, files in os.walk(imagepath):
cdp = os.path.abspath(root)
for f in files:
name, ext = os.path.splitext(f)
if ext == ".jpg":
cip = os.path.join(cdp,f)
N += 1
print(N)
imageX = np.zeros((N,227,227,3), dtype='float32')
i = 0
for root, _ , files in os.walk(imagepath):
cdp = os.path.abspath(root)
print(cdp)
for f in files:
ct = 0
name, ext = os.path.splitext(f)
if ext == ".jpg":
cip = os.path.join(cdp,f)
read = mpg.imread(cip)
cipLabel = cip.replace('image','labels')
cipLabel = cipLabel.replace('.jpg','.txt')
nameL , extL = os.path.splitext(cipLabel)
if extL == '.txt':
boxes = open(cipLabel, 'r')
for q in boxes:
ct = ct + 1
if ct == 3:
x1 = int(q.rsplit(' ')[0])
y1 = int(q.rsplit(' ')[1])
x2 = int(q.rsplit(' ')[2])
y2 = int(q.rsplit(' ')[3])
readimage = read[y1:y2, x1:x2]
resize = cv2.cv2.resize(readimage,(227,227))
resize = cv2.cv2.GaussianBlur(resize, (5,5),0)
imageX[i] = resize
#training_labels.append(int(cip.split('\\')[4]))
training_labels.append(int(cip.split('/')[8]))
print(len(training_labels), len(imageX))
i += 1
print(i)
imageX /= 255.0
plt.imshow(imageX[10])
plt.show()
print(imageX.shape)
print(len(training_labels))
np.save("/run/media/fdai5182/LAMA MADAN/Morethan4000samples/227227/training_images", imageX)
np.save("/run/media/fdai5182/LAMA MADAN/Morethan4000samples/227227/trainin_labels",training_labels)
To save each of your image in a row of matrix of same dimensions is the most efficient way to do that.
Related
I have the following code in which I am loading a single DICOM file and checking if there are sagittal and coronal view present or not.
I want to modify this to read all DICOM files from the folder.
print there is no sagittal and coronal view if sag_aspect,cor_aspect value is zero
How do I do this?
import pydicom
import numpy as np
import matplotlib.pyplot as plt
import sys
import glob
# load the DICOM files
files = []
print('glob: {}'.format(sys.argv[1]))
for fname in glob.glob('dicom/3.dcm', recursive=False):
print("loading: {}".format(fname))
files.append(pydicom.dcmread(fname))
print("file count: {}".format(len(files)))
# skip files with no SliceLocation (eg scout views)
slices = []
skipcount = 0
for f in files:
if hasattr(f, 'SliceLocation'):
slices.append(f)
else:
skipcount = skipcount + 1
print("skipped, no SliceLocation: {}".format(skipcount))
# ensure they are in the correct order
slices = sorted(slices, key=lambda s: s.SliceLocation)
# pixel aspects, assuming all slices are the same
ps = slices[0].PixelSpacing
ss = slices[0].SliceThickness
ax_aspect = ps[1]/ps[0]
sag_aspect = ps[1]/ss
cor_aspect = ss/ps[0]
# create 3D array
img_shape = list(slices[0].pixel_array.shape)
img_shape.append(len(slices))
img3d = np.zeros(img_shape)
# fill 3D array with the images from the files
for i, s in enumerate(slices):
img2d = s.pixel_array
img3d[:, :, i] = img2d
# plot 3 orthogonal slices
print(img3d.shape)
print(img_shape)
a1 = plt.subplot(2, 2, 1)
plt.imshow(img3d[:, :, img_shape[2]//2])
a1.set_title("transverse view")
a1.set_aspect(ax_aspect)
a2 = plt.subplot(2, 2, 2)
#print(img3d[:, img_shape[1]//2, :].shape)
plt.imshow(img3d[:, img_shape[1]//2, :])
a2.set_title("sagital view")
a2.set_aspect(sag_aspect)
a3 = plt.subplot(2, 2, 3)
plt.imshow(img3d[img_shape[0]//2, :, :].T)
a3.set_title("coronal view")
a3.set_aspect(cor_aspect)
plt.show()
For reading multiple dicom files from a folder you can use the code below.
import os
from pathlib import Path
import pydicom
dir_path = r"path\to\dicom\files"
dicom_set = []
for root, _, filenames in os.walk(dir_path):
for filename in filenames:
dcm_path = Path(root, filename)
if dcm_path.suffix == ".dcm":
try:
dicom = pydicom.dcmread(dcm_path, force=True)
except IOError as e:
print(f"Can't import {dcm_path.stem}")
else:
dicom_set.append(dicom)
I have leveraged the pathlib library which I strongly suggest to use whenever dealing with folder/file paths. I have also added an exception, but you can modify it to meet your needs.
My code below is intended to get a batch of images and convert them to RGB. But I keep getting an error which says to convert to type uint8. I have seen other questions regarding the conversion to uint8, but none directly from an array to uint8. Any advice on how to make that happen is welcome, thank you!
from skimage import io
import numpy as np
import glob, os
from tkinter import Tk
from tkinter.filedialog import askdirectory
import cv2
# wavelength in microns
MWIR = 4.5
R = .692
G = .582
B = .140
rgb_sum = R + G + B;
NRed = R/rgb_sum;
NGreen = G/rgb_sum;
NBlue = B/rgb_sum;
path = askdirectory(title='Select PNG Folder') # shows dialog box and return the path
outpath = askdirectory(title='Select SAVE Folder')
for file in os.listdir(path):
if file.endswith(".png"):
imIn = io.imread(os.path.join(path, file))
imOut = np.zeros(imIn.shape)
for i in range(imIn.shape[0]): # Assuming Rayleigh-Jeans law
for j in range(imIn.shape[1]):
imOut[i,j,0] = imIn[i,j,0]/((NRed/MWIR)**4)
imOut[i,j,1] = imIn[i,j,0]/((NGreen/MWIR)**4)
imOut[i,j,2] = imIn[i,j,0]/((NBlue/MWIR)**4)
io.imsave(os.path.join(outpath, file) + '_RGB.png', imOut)
the code I am trying to integrate into my own (found in another thread, used to convert type to uint8) is:
info = np.iinfo(data.dtype) # Get the information of the incoming image type
data = data.astype(np.float64) / info.max # normalize the data to 0 - 1
data = 255 * data # Now scale by 255
img = data.astype(np.uint8)
cv2.imshow("Window", img)
thank you!
Normally imInt is of type uint8, after your normalisation it is of type float32 because of the casting cause by the division. you must convert back to uint8 before saving to PNG file:
io.imsave(os.path.join(outpath, file) + '_RGB.png', imOut.astype(np.uint8))
Note that the two loops are not necessary, you can use numpy vector operations instead:
MWIR = 4.5
R = .692
G = .582
B = .140
vector = [R, G, B]
vector = vector / vector.sum()
vector = vector / MWIR
vector = np.pow(vector, 4)
for file in os.listdir(path):
if file.endswith((".png"):
imgIn = ...
imgOut = imgIn * vector
io.imsave(
os.path.join(outpath, file) + '_RGB.png',
imgOut.astype(np.uint8))
mat` files from the main folder which contains seven subfolders. Each folder is named with class number.
import glob
import os
import hdf5storage
import numpy as np
DATASET_PATH = "D:/Dataset/Multi-resolution_data/Visual/High/"
files = glob.glob(DATASET_PATH + "**/*.mat", recursive= True)
class_labels = [i.split(os.sep)[-2] for i in files]
for label in range(0, len(class_labels)):
class_labels [label] = int(class_labels[label])
files variable contains the following:
Class labels contains the following:
I want to ask couple of things:
1) when I read the .mat files, it comes if dict and each dict contains different variable name. I want to know how can I read the key and assign to the array?
array_store=[]
for f in files:
mat = hdf5storage.loadmat(f)
arrays = np.array(mat.keys())
array_store.append(arrays)
2) files = glob.glob(DATASET_PATH + "**/*.mat", recursive= True) Is it possible to randomly read the specific amount of files from each folder inside the main folder? like 60% for training and 40% testing?
UPDATE
I have tried what #vopsea sugeested in Answer.
The output looks like that for train variable.
How I make the final array of images each files foy Key 1 - 7 (array (256 x 256 x 11 x total number of images))and labels (total number of images x 1 )? Labels will be same as key values, for example for all the files associated with Key 1 (188 files) will have label 1 (188 x 1).
UPDATE
resolving issue of making label and accessing key without key name.
import os
import random
import hdf5storage
import numpy as np
DATASET_PATH = "D:/Dataset/Multi-resolution_data/Visual/High/"
train_images = []
test_images = []
train_label = list()
test_label = list()
percent_train = 0.4
class_folders = next(os.walk(DATASET_PATH))[1]
for x in class_folders:
files = os.listdir(os.path.join(DATASET_PATH,x))
random.shuffle(files)
n = int(len(files) * percent_train)
train_i = []
test_i = []
for i,f in enumerate(files):
abs_path= os.path.join(DATASET_PATH,x,f)
mat = hdf5storage.loadmat(abs_path)
if(i < n):
train_i.append(mat.values())
train_label.append(x)
else:
test_i.append(mat.values())
test_label.append(x)
train_images.append(train_i)
test_images.append(test_i)
1) Could you explain a bit more what you want in question 1? What is being appended? I might be misunderstanding, but it's easy to read unknown key, value pairs
for key, value in mat.items():
print(key, value)
2) I did this without glob. Shuffle the class files and slice them into two lists according to training percent. Probably best to have the same number of files for each class (or close) so training doesn't favor one especially.
import os
import random
DATASET_PATH = "D:/Dataset/Multi-resolution_data/Visual/High/"
train = {}
test = {}
percent_train = 0.4
class_folders = next(os.walk(DATASET_PATH))[1]
for x in class_folders:
files = os.listdir(os.path.join(DATASET_PATH,x))
random.shuffle(files)
n = int(len(files) * percent_train)
train[x] = files[:n]
test[x] = files[n:]
EDIT 2:
Is this what you mean?
import os
import random
import hdf5storage
import numpy as np
DATASET_PATH = "D:/Dataset/Multi-resolution_data/Visual/High/"
train_images = []
test_images = []
train_label = []
test_label = []
percent_train = 0.4
class_folders = next(os.walk(DATASET_PATH))[1]
for x in class_folders:
files = os.listdir(os.path.join(DATASET_PATH,x))
random.shuffle(files)
n = int(len(files) * percent_train)
for i,f in enumerate(files):
abs_path= os.path.join(DATASET_PATH,x,f)
mat = hdf5storage.loadmat(abs_path)
if(i < n):
train_images.append(mat.values())
train_label.append(x)
else:
test_images.append(mat.values())
test_label.append(x)
EDIT 3: Using dict for simplicity
Notice how simple it is to run through the images at the end. The alternative is storing two lists (data and labels) and one will have many duplicate items. You then have to through them both at the same time.
Although depending on what you're doing with this later, two lists could be the right choice.
import os
import random
import hdf5storage
import numpy as np
DATASET_PATH = "D:/Dataset/Multi-resolution_data/Visual/High/"
train_images = {}
test_images = {}
percent_train = 0.4
class_folders = next(os.walk(DATASET_PATH))[1]
for x in class_folders:
files = os.listdir(os.path.join(DATASET_PATH,x))
random.shuffle(files)
n = int(len(files) * percent_train)
for i,f in enumerate(files):
abs_path= os.path.join(DATASET_PATH,x,f)
mat = hdf5storage.loadmat(abs_path)
if(i < n):
train_images[x] = mat.values()
else:
test_images[x] = mat.values()
for img_class,img_data in train_images.items():
print( img_class, img_data )
Im trying to read images from folders into a dataframe , where each row in the dataframe is all the images for a folder :
import cv2
import os,glob
import matplotlib.pylab as plt
from os import listdir,makedirs
from os.path import isfile,join
import pandas as pd
import PIL
import numpy as np
from scipy.ndimage import imread
pth = 'C:/Users/Documents/myfolder/'
folders = os.listdir(pth)
videos = pd.DataFrame()
for folder in folders:
pth_upd = pth + folder + '/'
allfiles = os.listdir(pth_upd)
files = []
columns = ['data']
index = [folders]
for file in allfiles:
files.append(file) if ('.bmp' in file) else None
samples = np.empty((0,64,64))
for file in files:
img = cv2.imread(os.path.join(pth_upd,file),cv2.IMREAD_GRAYSCALE)
img = img.reshape(1,64,64)
samples = np.append(samples, img, axis=0)
result = pd.DataFrame([samples], index=[folder], columns=['videos'])
videos = videos.append(result)
after reading all the images in each folder into (samples array ) how can I insert images for each folder in a dataframe row
ValueError Traceback (most recent call last)
in
17 samples = np.append(samples, img, axis=0)
18
---> 19 result = pd.DataFrame([samples], index=[folder], columns=['videos'])
20 videos = videos.append(result)
ValueError: Must pass 2-d input
:
It's certainly possible to put strings of the resized images into pandas, but there are much better ways to accomplish CNN training. I adapted your image processing code to show how you could do what you asked:
import io
import pandas as pd
import numpy as np
import sklearn
import requests
import tempfile
import os
import cv2
# Image processing for the df
def process_imgfile(x):
img = cv2.imread(os.path.join(
x.Folder, x.image),cv2.IMREAD_GRAYSCALE)
img = cv2.resize(img, (64, 64))
img = str(img)
return img
# Simulate folders with images in them
with tempfile.TemporaryDirectory() as f:
f1 = os.path.join(f, "Folder1")
f2 = os.path.join(f, "Folder2")
os.mkdir(f1)
os.mkdir(f2)
print(r.status_code)
for x in range(20):
with open(os.path.join(f1, "f1-{}.jpg".format(x)), "wb") as file1, open(
os.path.join(f2, "f2-{}.jpg".format(x)), "wb") as file2:
r = requests.get(
'https://upload.wikimedia.org/wikipedia/en/a/a9/Example.jpg',
stream=True)
for chunk in r.iter_content(16): # File writing...
file1.write(chunk)
file2.write(chunk)
result = [x for x in os.walk(f)]
folder1 = result[1][2]
folder2 = result[2][2]
# Generate dataframe data
j = {"Folder":[], "image":[]}
for x in folder1:
j["Folder"].append(result[1][0])
j["image"].append(x)
for x in folder2:
j["Folder"].append(result[2][0])
j["image"].append(x)
# Use the process_imgfile function to append image data
df = pd.DataFrame(j)
df["imgdata"] = df.apply(process_imgfile, axis=1)
But on a large set of images this is not going to work. Instead, check out ImageDataGenerator which can let you load images at train and test time. It can also help you apply augmentation or synthesize data.
I have a function which applies masking operation on the input images as follows:
file_names = glob(os.path.join(IMAGE_DIR, "*.jpg"))
masks_prediction = np.zeros((2000, 2000, len(file_names)))
for i in range(len(file_names)):
print(i)
image = skimage.io.imread(file_names[i])
predictions = model.detect([image], verbose=1)
p = predictions[0]
masks = p['masks']
merged_mask = np.zeros((masks.shape[0], masks.shape[1]))
for j in range(masks.shape[2]):
merged_mask[masks[:,:,j]==True] = True
masks_prediction[:,:,i] = merged_mask
print(masks_prediction.shape)
So basically it reads all the images from the directory, creates a mask for each and runs the detection.
However, since the images are of different sizes, it does not work:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-10-764e6229811a> in <module>()
10 for j in range(masks.shape[2]):
11 merged_mask[masks[:,:,j]==True] = True
---> 12 masks_prediction[:,:,i] = merged_mask
13 print(masks_prediction.shape)
ValueError: could not broadcast input array from shape (1518,1077) into shape (2000,2000)
I was thinking of a way to know the size of each image before the mask operation is applied (before line 12 in the error message), thus passing the exact image shape size correctly for the masking operation.
Is this somehow possible in Python?
EDIT: So apparently people somehow didn't get what I wanted to achieve - although I genuinely believe it was written in a very simple way. Nevertheless here is the entire code (copied from ipython notebook) where the function is located:
import os
import sys
import random
import math
import re
import time
import numpy as np
import tensorflow as tf
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import skimage.draw
# Root directory of the project
ROOT_DIR = os.path.abspath("../../")
# Import Mask RCNN
sys.path.append(ROOT_DIR) # To find local version of the library
from mrcnn import utils
from mrcnn import visualize
from mrcnn.visualize import display_images
import mrcnn.model as modellib
from mrcnn.model import log
from glob import glob
import components
%matplotlib inline
# Directories to be referred
MODEL_DIR = os.path.join(ROOT_DIR, "logs")
IMAGE_DIR = os.path.join(ROOT_DIR, "datasets/components/back/predict")
ANNOTATION_DIR = os.path.join(ROOT_DIR, "datasets/components/front/")
WEIGHTS_PATH = os.path.join(ROOT_DIR, "logs/back/mask_rcnn_components_0100.h5")
config = components.ComponentsConfig()
# Override the training configurations with a few
# changes for inferencing.
class InferenceConfig(config.__class__):
# Run detection on one image at a time
GPU_COUNT = 1
IMAGES_PER_GPU = 1
config = InferenceConfig()
config.display()
# Create model in inference mode
with tf.device(DEVICE):
model = modellib.MaskRCNN(mode="inference", model_dir=MODEL_DIR,
config=config)
# Load weights
print("Loading weights ", WEIGHTS_PATH)
model.load_weights(WEIGHTS_PATH, by_name=True)
file_names = glob(os.path.join(IMAGE_DIR, "*.jpg"))
masks_prediction = np.zeros((2000, 2000, len(file_names)))
for i in range(len(file_names)):
print(i)
image = skimage.io.imread(file_names[i])
predictions = model.detect([image], verbose=1)
p = predictions[0]
masks = p['masks']
merged_mask = np.zeros((masks.shape[0], masks.shape[1]))
for j in range(masks.shape[2]):
merged_mask[masks[:,:,j]==True] = True
masks_prediction[:,:,i] = merged_mask
print(masks_prediction.shape)
dataset = components.ComponentsDataset()
dataset.load_components(ANNOTATION_DIR, "predict")
accuracy = 0
precision = 0
for image_id in range(len(dataset.image_info)):
name = dataset.image_info[image_id]['id']
file_name = os.path.join(IMAGE_DIR, name)
image_id_pred = file_names.index(file_name)
merged_mask = masks_prediction[:, :, image_id_pred]
annotated_mask = dataset.load_mask(image_id)[0]
merged_annotated_mask = np.zeros((510, 510))
for i in range(annotated_mask.shape[2]):
merged_annotated_mask[annotated_mask[:,:,i]==True] = True
accuracy += np.sum(merged_mask==merged_annotated_mask) / (1200 * 1600)
all_correct = np.sum(merged_annotated_mask[merged_mask == 1])
precision += all_correct / (np.sum(merged_mask))
print('accuracy:{}'.format(accuracy / len(file_names)))
print('precision:{}'.format(precision / len(file_names)))
file_names = glob(os.path.join(IMAGE_DIR, "*.jpg"))
class_names = ['BG', 'screw', 'lid']
test_image = skimage.io.imread(file_names[random.randint(0,len(file_names)-1)])
predictions = model.detect([test_image], verbose=1) # We are replicating the same image to fill up the batch_size
p = predictions[0]
visualize.display_instances(test_image, p['rois'], p['masks'], p['class_ids'],
class_names, p['scores'])
The image is just a numpy array. So to answer your question "is it possible to know the size of each image": Yes, simply use the shape of the image.
If you are working on many images of different sizes, it might make sense to resize them to a uniform resolution.
skimage has a built-in functionality for that, the skimage.transform.resize method.
Look at the docs here.
If you use resize, you should make sure that no artifacts are introduced to your images. Check the result of the resizing operation before you use it.
The resize of skimage is fairly slow. If you need more performance, you could use opencv. They have a great python API and since there is a conda package, installation has become really easy.
resized_images = []
file_names = glob(os.path.join(IMAGE_DIR, "*.jpg"))
for i in range(len(file_names)):
print("Resizing: " + str(i))
image = skimage.io.imread(file_names[i])
image_resized = resize(image, (1200, 800),anti_aliasing=True)
resized_images.append(image_resized)