Loop over images in directories/Subdirectories for data processing

Loop over images in directories/Subdirectories for data processing - python

I am trying to do image processing on my dataset. The dataset is divided into 346 folders according to the following manner
What I want to do is
loop over the 346.
Enter each folder and process the images within
Process the image in regards to changing it to gray scale, resize and normalize (these three steps should be applied to my whole dataset.
I want to keep the same folders/files names and dont change it when I run my data processing.
The folder/ files name are as follows
video_0001/ 00000.png, 00001.png, .....
video_0002/ 00000.png, 00001.png,
The number of files vary according to each folder and the last video_0346
P.S when I try to normalize the images I get black images when dividing by 255
I am still new to python. Here's what I was able to accomplish
I appreciate your help
Img_height = 512
Img_width = 512
srcdir = "C:\\Users\\Desktop\\_dataset"
for subdir, dirs, files in os.walk(srcdir):
for i in range(346):
for file in os.listdir(subdir):
print(file )
img = cv2.imread(os.path.join(srcdir,file))
print("image", img)
gray_img = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
Image_sample_resized = resize(gray_img, (height, width))
plt.imshow( Image_sample_resized, cmap = "gray")
i = i+1

I was still trying to understand how you wanted to save you formated images but the methods below just save them to a new dir that you specify so you can batch them while training if you want.
import os
import cv2
import matplotlib.image as mpimg
def resize_image(image, size=(512, 512)):
"""
Resize an image to a fixed size
"""
return cv2.resize(image, size)
def convert_to_grayscale(image):
"""
Convert an image to grayscale
"""
return cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
def normalize_image(image):
"""
Normalize an image
"""
return cv2.normalize(image, None, 0, 255, cv2.NORM_MINMAX)
def save_image(image, directory, filename):
"""
Save an image to a new directory
"""
cv2.imwrite(os.path.join(directory, filename), image)
def main():
"""
Main function
"""
# Get the directory to process
directory = input("Enter the directory to process: ")
# Get the directory to save the images
save_directory = input("Enter the directory to save the images: ")
# Size to resize the images
img_width, img_height = 512, 512
# Iterate through the directory
for root, _, files in os.walk(directory):
for file in files:
# Get the filepath
filepath = os.path.join(root, file)
# Get the filename
filename = os.path.basename(filepath)
# Get the extension
extension = os.path.splitext(filepath)[1]
# Check if the file is an image
if extension in [".jpg", ".jpeg", ".png"]:
# Load the image
image = mpimg.imread(filepath)
# Resize the image
image = resize_image(image, (img_width, img_height))
# Convert the image to grayscale
image = convert_to_grayscale(image)
# Normalize the image
image = normalize_image(image)
# Save the image
save_image(image, save_directory, filename)
if __name__ == "__main__":
main()

Related

How would I read a folder of images on my computer into a dataframe on Jupyter Notebooks?

This is my code right now, I have used a for loop to go through all the images in the folder and use the PIL library to read it into an array.
import cv2
import os
folder = '/Users/x/x/x/x'
def load_images_from_folder(folder):
images = []
for filename in os.listdir(folder):
img = cv2.imread(os.path.join(folder,filename))
if img is not None:
images.append(img)
return images
img = Image.fromarray(images_array,'RGB', dtype = object)

How to read multiple image from different folders using python with Opencv

I am trying to read multiple images from 3 different folder. Each folder contain 100 images. I try 2 different code but I have no idea why the codes does not work. Anyone can help? Thanks
For example:
Cropped Matrix:Matrix1.png, Matrix2.png,...
Cropped Marks:Mark1.png, Mark2.png,...
Cropped Marks1:Mark1Q1.png, Mark1Q2.png,...
Output: Matrix1.png + Mark1.png + Mark1Q1.png
Code 1:
#1st
path1 = os.path.abspath("C:/Users/TSL/Desktop/Crop/Cropped Matrix/*.png")
path2 = os.path.abspath("C:/Users/TSL/Desktop/Crop/Cropped Marks/*.png")
path3 = os.path.abspath("C:/Users/TSL/Desktop/Crop/Cropped Marks1/*.png")
folder= os.path.join(path1, path2, path3)
def load(folder):
images = []
for filename in os.listdir(folder):
if filename.endswith(".png"):
img = cv2.imread(os.path.join(folder, filename))
if img is not None:
images.append(img)
return images
root = 'C:/Users/TSL/Desktop/Crop'
folders = [os.path.join(root, x) for x in ('Cropped Matrix', 'Cropped Marks', 'Cropped Marks1')]
all = [img for folder in folders for img in load(folder)]
cv2.imshow("Join img", all)
cv2.waitKey(0)
Code 2
#2nd
path1 = os.path.abspath('Cropped Matrix')
path2 = os.path.abspath('Cropped Marks')
path3 = os.path.abspath('Cropped Marks1')
folder= os.path.join(path1, path2, path3)
def load(folder):
images = []
for filename in os.listdir(folder):
if any([filename.endswith(x) for x in [".png"]]):
img = cv2.imread(os.path.join(folder, filename))
if img is not None:
images.append(img)
return images
folders = ['Cropped Matrix', 'Cropped Marks',]
for folder in folders:
images = load(folder)
read = cv2.imread(images)
cv2.imshow("Join images", read)
cv2.waitKey(0)

all is a list of images and you try to show it using imshow. To show all images one by one you can loop through all and show each with imshow.
Also, as #gold_cy correctly points out, all is a built in python function, so you should avoid using it as a variable name. Change it to something like all_images.
all_images = [img for folder in folders for img in load(folder)]
for i in all_images:
cv2.imshow("Image", i) #or however you want
cv2.waitKey(10) #or any suitable number

How to continuously update video from images that keep coming in python?

I am working on a robotics project in which a robot creates images in .pgm format (not camera images) continuously which I'm converting to .jpeg and saving to a directory. However, this directory keeps updating as more images keep getting added while I want to convert these images continuously in a video. I can already create a video from the images saved in one instance but what I want is to update the video continuously as a single file (streaming onto a URL) while images keep getting saved. Here is my code but it doesn't work in parallel for images as well as for videos. Anyone can help?
from PIL import Image
import os, glob
import cv2
import numpy as np
from os.path import isfile, join
# remove .yaml files
directory = "/home/user/catkin_ws/src/ros_map/map_pgms"
files_in_directory = os.listdir(directory)
filtered_files = [file for file in files_in_directory if file.endswith(".yaml")]
for file in filtered_files:
path_to_file = os.path.join(directory, file)
os.remove(path_to_file)
def batch_image(in_dir, out_dir):
if not os.path.exists(out_dir):
print(out_dir, 'is not existed.')
os.mkdir(out_dir)
if not os.path.exists(in_dir):
print(in_dir, 'is not existed.')
return -1
count = 0
for files in glob.glob(in_dir + '/*'):
filepath, filename = os.path.split(files)
out_file = filename[0:9] + '.jpg'
# print(filepath,',',filename, ',', out_file)
im = Image.open(files)
new_path = os.path.join(out_dir, out_file)
print(count, ',', new_path)
count = count + 1
im.save(os.path.join(out_dir, out_file))
if __name__ == '__main__':
batch_image('/home/user/catkin_ws/src/ros_map/map_pgms', './batch')
# convert the .jpgs to video
img_array = []
for filename in glob.glob('./batch/*.jpg'):
img = cv2.imread(filename)
height, width, layers = img.shape
size = (width, height)
img_array.append(img)
out = cv2.VideoWriter('project.avi', cv2.VideoWriter_fourcc(*'DIVX'), 15, size)
for i in range(len(img_array)):
out.write(img_array[i])
out.release()

Limiting the amount of images from to be loaded from a folder

I have 50 images inside a folder, but for testing i'll need to choose between different sizes, for instance sometimes ill need just 10 images from the folder, and some times ill need 11 or 15. But what i am doing right now is loading all images inside a given folder path. Which in turn, would make me create a new folder with the desired amount of images i would like to load a different amount of images, which is optimal.
This is the current way im doing to load all images:
def loadImages(path):
imagesList = listdir(path)
loadedImages = []
for image in imagesList:
img = PImage.open(path + image)
arr = np.array(img)
loadedImages.append(arr)
return loadedImages
Then i'd go and do
images = loadImages('imgfolder'), which would load everything inside the folder, when i just want the amount i really need.

You could add a second parameter to the funcion, which is the maximum size in MegaByte:
import os
import numpy as np
from PIL import Image
def loadImages(path, max_size):
imagesList = os.listdir(path)
loadedImages = []
temp_size = 0
for image in imagesList:
os.chdir(path)
temp_size += os.path.getsize(image)
if temp_size > max_size*1000000:
break
img = Image.open(path + image)
arr = np.array(img)
loadedImages.append(arr)
return loadedImages
loadImages(path, 50)
If you want to set the number of pictures you can add a second parameter as max_num:
def loadImages(path, max_num):
imagesList = listdir(path)
loadedImages = []
for i, image in enumerate(imagesList):
if i == max_num:
break
img = PImage.open(path + image)
arr = np.array(img)
loadedImages.append(arr)
return loadedImages
loadImages(path, 10)

Images to Video using opencv

I have a root folder /train_images which have subfolders like /train_images/1 ,/train_images/2...etc. Ech of the subfolders have images in it. I want to make videos corresponding to each subfolder if num(images)>4. Below is the code which I wrote for the same. it works if there is only 1 subfolder,and stops at the creation of 2nd Videowriter object. Please help
import cv2
import os
root_folder = 'train_images'
video_folder = 'output_video'
folders = [folder for folder in os.listdir(root_folder)]
for folder in folders:
child = os.path.join(root_folder,folder)
images = [img for img in os.listdir(child) if img.endswith(".jpg")]
print('%d images in the path %s' % (len(images),child))
if(len(images)>=4):
video_path ='{}.avi'.format(folder)
print(video_path)
print(os.path.join(video_folder, video_path))
video = cv2.VideoWriter(os.path.join(video_folder, video_path), -1, 5, (224, 320))
for img in images:
frame = cv2.imread(os.path.join(child, img))
print('writing from %s' %(os.path.join(child, img)) )
frame_new=cv2.resize(frame,(224,320))
#print(frame_new.shape)
video.write(frame_new)
cv2.destroyAllWindows()
video.release()

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Loop over images in directories/Subdirectories for data processing - python

Related

How would I read a folder of images on my computer into a dataframe on Jupyter Notebooks?

How to read multiple image from different folders using python with Opencv

How to continuously update video from images that keep coming in python?

Limiting the amount of images from to be loaded from a folder

Images to Video using opencv

Categories

Resources