Writting a file with multiple images in a grid - python

I'm trying to write a file with multiple images (100) in a 10x10 grid. I use 3 for iterations to:
-open the file
-set coordinates (i,j)
The problem is when I look my file, all I can see is the last image multiple times. Maybe the files is overwrite every time that the program enters the for loop. Until now I can't find a solution.
The code is:
import Image
from os import listdir
from os.path import isfile, join
files = [ f for f in listdir("/mnt/hgfs/Documents/Notebooks/test1/") if isfile(join("/mnt/hgfs/Documents/Notebooks/test1/", f)) ]
new_im = Image.new('RGB', (3000,3000))
for i in xrange(0,3000,300):
for j in xrange(0,3000,300):
for ima in files:
#paste the image at location i,j:
im = Image.open(ima)
im.thumbnail((300,300))
new_im.paste(im, (i,j))
new_im.save("hola.png")
Thanks!

Here's a simple bug fix. You only need two for loops, not three.
import Image
from os import listdir
from os.path import isfile, join
files = [ f for f in listdir("/mnt/hgfs/Documents/Notebooks/test1/") if isfile(join("/mnt/hgfs/Documents/Notebooks/test1/", f)) ]
new_im = Image.new('RGB', (3000,3000))
index = 0
for i in xrange(0,3000,300):
for j in xrange(0,3000,300):
im = Image.open(files[index])
im.thumbnail((300,300))
new_im.paste(im, (i,j))
index += 1
new_im.save("hola.png")

This is the Python 3 code to make a squre grid of images file from any directory with images using matplotlib.
Square size calculates dynamicly by count of existing images.
import math
import os
import matplotlib.pyplot as plt
# Config:
images_dir = './your_dir_with_images'
result_grid_filename = './grid.jpg'
result_figsize_resolution = 40 # 1 = 100px
images_list = os.listdir(images_dir)
images_count = len(images_list)
print('Images: ', images_list)
print('Images count: ', images_count)
# Calculate the grid size:
grid_size = math.ceil(math.sqrt(images_count))
# Create plt plot:
fig, axes = plt.subplots(grid_size, grid_size, figsize=(result_figsize_resolution, result_figsize_resolution))
current_file_number = 0
for image_filename in images_list:
x_position = current_file_number % grid_size
y_position = current_file_number // grid_size
plt_image = plt.imread(images_dir + '/' + images_list[current_file_number])
axes[x_position, y_position].imshow(plt_image)
print((current_file_number + 1), '/', images_count, ': ', image_filename)
current_file_number += 1
plt.subplots_adjust(left=0.0, right=1.0, bottom=0.0, top=1.0)
plt.savefig(result_grid_filename)
Images in the directory screenshot
result grid image file

Related

How to read multiple DICOM files from a folder?

I have the following code in which I am loading a single DICOM file and checking if there are sagittal and coronal view present or not.
I want to modify this to read all DICOM files from the folder.
print there is no sagittal and coronal view if sag_aspect,cor_aspect value is zero
How do I do this?
import pydicom
import numpy as np
import matplotlib.pyplot as plt
import sys
import glob
# load the DICOM files
files = []
print('glob: {}'.format(sys.argv[1]))
for fname in glob.glob('dicom/3.dcm', recursive=False):
print("loading: {}".format(fname))
files.append(pydicom.dcmread(fname))
print("file count: {}".format(len(files)))
# skip files with no SliceLocation (eg scout views)
slices = []
skipcount = 0
for f in files:
if hasattr(f, 'SliceLocation'):
slices.append(f)
else:
skipcount = skipcount + 1
print("skipped, no SliceLocation: {}".format(skipcount))
# ensure they are in the correct order
slices = sorted(slices, key=lambda s: s.SliceLocation)
# pixel aspects, assuming all slices are the same
ps = slices[0].PixelSpacing
ss = slices[0].SliceThickness
ax_aspect = ps[1]/ps[0]
sag_aspect = ps[1]/ss
cor_aspect = ss/ps[0]
# create 3D array
img_shape = list(slices[0].pixel_array.shape)
img_shape.append(len(slices))
img3d = np.zeros(img_shape)
# fill 3D array with the images from the files
for i, s in enumerate(slices):
img2d = s.pixel_array
img3d[:, :, i] = img2d
# plot 3 orthogonal slices
print(img3d.shape)
print(img_shape)
a1 = plt.subplot(2, 2, 1)
plt.imshow(img3d[:, :, img_shape[2]//2])
a1.set_title("transverse view")
a1.set_aspect(ax_aspect)
a2 = plt.subplot(2, 2, 2)
#print(img3d[:, img_shape[1]//2, :].shape)
plt.imshow(img3d[:, img_shape[1]//2, :])
a2.set_title("sagital view")
a2.set_aspect(sag_aspect)
a3 = plt.subplot(2, 2, 3)
plt.imshow(img3d[img_shape[0]//2, :, :].T)
a3.set_title("coronal view")
a3.set_aspect(cor_aspect)
plt.show()
For reading multiple dicom files from a folder you can use the code below.
import os
from pathlib import Path
import pydicom
dir_path = r"path\to\dicom\files"
dicom_set = []
for root, _, filenames in os.walk(dir_path):
for filename in filenames:
dcm_path = Path(root, filename)
if dcm_path.suffix == ".dcm":
try:
dicom = pydicom.dcmread(dcm_path, force=True)
except IOError as e:
print(f"Can't import {dcm_path.stem}")
else:
dicom_set.append(dicom)
I have leveraged the pathlib library which I strongly suggest to use whenever dealing with folder/file paths. I have also added an exception, but you can modify it to meet your needs.

Vectorise nested for loops

I'm wondering if it is possible to vectorise the nested for loop in my code to speed up the code. I am basically trying to split (or trim) an image into smaller bits.
import numpy as np
import os
from PIL import Image
image_path = 'sample.png'
number_of_rows = 4
input_np_arr_image = np.asarray(Image.open(image_path))
height, width = input_np_arr_image.shape[0:2]
height_trimmed = (height // number_of_rows) * number_of_rows
width_trimmed = (width // number_of_rows) * number_of_rows
trimmed_image = input_np_arr_image[:height_trimmed, :width_trimmed]
image_pieces = [np.hsplit(segment, 4)
for segment in np.vsplit(trimmed_image, 4)
]
I then try to save the result using this nested for loop
for row in range(len(image_pieces)):
for col in range(len(image_pieces[row])):
# create output image name
segment_name = f"{image_path[:len(image_path)-4]}_{row}_{col}{image_path[-4:]}"
# convert array to image
label_image = Image.fromarray(image_pieces[row][col])
# make output directory for saving
output_directory = "split_images"
os.makedirs(output_directory, exist_ok = True)
label_image.save(output_directory +"/"+ segment_name)
I'm curious if it's possible to do this without using the nested for loops. Thanks

convert numpy array to uint8 using python

My code below is intended to get a batch of images and convert them to RGB. But I keep getting an error which says to convert to type uint8. I have seen other questions regarding the conversion to uint8, but none directly from an array to uint8. Any advice on how to make that happen is welcome, thank you!
from skimage import io
import numpy as np
import glob, os
from tkinter import Tk
from tkinter.filedialog import askdirectory
import cv2
# wavelength in microns
MWIR = 4.5
R = .692
G = .582
B = .140
rgb_sum = R + G + B;
NRed = R/rgb_sum;
NGreen = G/rgb_sum;
NBlue = B/rgb_sum;
path = askdirectory(title='Select PNG Folder') # shows dialog box and return the path
outpath = askdirectory(title='Select SAVE Folder')
for file in os.listdir(path):
if file.endswith(".png"):
imIn = io.imread(os.path.join(path, file))
imOut = np.zeros(imIn.shape)
for i in range(imIn.shape[0]): # Assuming Rayleigh-Jeans law
for j in range(imIn.shape[1]):
imOut[i,j,0] = imIn[i,j,0]/((NRed/MWIR)**4)
imOut[i,j,1] = imIn[i,j,0]/((NGreen/MWIR)**4)
imOut[i,j,2] = imIn[i,j,0]/((NBlue/MWIR)**4)
io.imsave(os.path.join(outpath, file) + '_RGB.png', imOut)
the code I am trying to integrate into my own (found in another thread, used to convert type to uint8) is:
info = np.iinfo(data.dtype) # Get the information of the incoming image type
data = data.astype(np.float64) / info.max # normalize the data to 0 - 1
data = 255 * data # Now scale by 255
img = data.astype(np.uint8)
cv2.imshow("Window", img)
thank you!
Normally imInt is of type uint8, after your normalisation it is of type float32 because of the casting cause by the division. you must convert back to uint8 before saving to PNG file:
io.imsave(os.path.join(outpath, file) + '_RGB.png', imOut.astype(np.uint8))
Note that the two loops are not necessary, you can use numpy vector operations instead:
MWIR = 4.5
R = .692
G = .582
B = .140
vector = [R, G, B]
vector = vector / vector.sum()
vector = vector / MWIR
vector = np.pow(vector, 4)
for file in os.listdir(path):
if file.endswith((".png"):
imgIn = ...
imgOut = imgIn * vector
io.imsave(
os.path.join(outpath, file) + '_RGB.png',
imgOut.astype(np.uint8))

Reading the huge image data for training classifiers

I am new to python and Machine Learning. I have a huge image dataset of cars having more than 27000 images and labels. I am trying to create a dataset so I can use it in my training classifier, but ofcourse handling this amount of data will be a real pain for the Memory, and that's where I am stuck. At first I was trying to do something like this.
import os
import matplotlib.pyplot as plt
import matplotlib.image as mpg
import cv2
import gc
import numpy as np
from sklearn.preprocessing import normalize
import gc
import resource
import h5py
bbox = "/run/media/fdai5182/LAMAMADAN/Morethan4000samples/data/labels"
imagepath = "/run/media/fdai5182/LAMAMADAN/Morethan4000samples/data/image"
training_data = []
training_labels = []
count = 0
for root, _, files in os.walk(bbox):
cdp = os.path.abspath(root)
for rootImage , _ , fileImage in os.walk(imagepath):
cdpimg = os.path.abspath(r)
for f in files:
ct = 0
name,ext = os.path.splitext(f)
for fI in fileImage:
n , e = os.path.splitext(fI)
if name == n and ext == ".txt" and e == ".jpg":
cip = os.path.join(cdp,f)
cipimg = os.path.join(cdpimg,fI)
txt = open(cip,"r")
for q in txt:
ct = ct + 1
if ct == 3:
x1 = int(q.rsplit(' ')[0])
y1 = int(q.rsplit(' ')[1])
x2 = int(q.rsplit(' ')[2])
y2 = int(q.rsplit(' ')[3])
try:
read_img = mpg.imread(cipimg)
read_img = read_img.astype('float32')
read_img_bbox = read_img[y1:y2, x1:x2,:]
resize_img = cv2.cv2.resize(read_img_bbox,(300,300))
resize_img /= 255.0
training_labels.append(int(cipimg.split('\\')[4]))
training_data.append(resize_img)
print("len Of Training_data",len(training_data))
training_labels.append(int(cipimg.split('/')[8]))
del resize_img
print("len Of Training Labels", len(training_labels))
gc.collect()
except Exception as e:
print("Error",str(e), cip)
count = count + 1
print(count)
txt.flush()
txt.close()
np.save('/run/media/fdai5182/LAMA MADAN/Training_Data_4000Samples',training_data)
np.save('/run/media/fdai5182/LAMA MADAN/Training_Labels_4000Samples',training_labels)
print("DONE")
But it always gives me a huge Memory error after reading images even on 32gb RAM.
So, for that I want to do some other steps which may be useful taking less memory and get this working.
The Steps I want to do are as follows.
allocate np array X of shape N,150,150,3/300,300,3 of type
float32 (not astype)
iterate through images and fill each row of array X with 150,150,3 image pixels
normalize in-place: X /= 255
Write in file (.npy format)
What I did till now is
import cv2
import matplotlib.pyplot as plt
import matplotlib.iamge as mpg
import numpy as np
bbox = "/run/media/fdai5182/LAMAMADAN/Morethan4000samples/data/labels"
imagepath = "/run/media/fdai5182/LAMAMADAN/Morethan4000samples/data/image"
for root, _, files in os.walk(bbox):
cdp = os.path.abspath(root)
for rootImage, _, fileImage in os.walk(imagepath):
cdpimg = os.path.abspath(rootImage)
for f in files:
ct = 0
name,ext = os.path.splitext(f)
for fI in fileImage:
n , e = os.path.splitext(fI)
if name == n and ext == ".txt" and e == ".jpg":
nparrayX = np.zeros((150,150,3)).view('float32')
cip = os.path.join(cdp,f)
cipImg = os.path.join(cdpimg,fI)
read_image = mpg.imread(cip)
resize_image = cv2.cv2.resize(read_image,(150,150))
Am I on the right path?
Also, How can I fill each row of imageformat with 150,150,3 image pixels. I don't want to use list anymore as they take more Memory and time consuming.
Please help me through this.
Also, as a new member if the question is not obeying the rules and regulations of StackOverflow please tell me and I will edit it more.
Thank you,
Both tensorflow/keras and pytorch provide data set / generator classes, which you can use to construct memory efficient data loaders.
For tensorflow/keras there is an excellent tutorial created by Stanford's Shervine Amidi.
For pytorch you find a good tutorial on the project's man page.
I would strongly suggest to make use of these frameworks for your implementation since they allow you to avoid writing boiler-plate code and make your training scalable.
Thank you for your help . But I wanted to do it manually to check How can we do it without using other generators. Below is my Code.
import cv2
import matplotlib.pyplot as plt
import matplotlib.image as mpg
import numpy as np
import os
N = 0
training_labels = []
bbox = "D:/Morethan4000samples/data/labels"
imagepath = "D:/Morethan4000samples/data/image/"
for root, _, files in os.walk(imagepath):
cdp = os.path.abspath(root)
for f in files:
name, ext = os.path.splitext(f)
if ext == ".jpg":
cip = os.path.join(cdp,f)
N += 1
print(N)
imageX = np.zeros((N,227,227,3), dtype='float32')
i = 0
for root, _ , files in os.walk(imagepath):
cdp = os.path.abspath(root)
print(cdp)
for f in files:
ct = 0
name, ext = os.path.splitext(f)
if ext == ".jpg":
cip = os.path.join(cdp,f)
read = mpg.imread(cip)
cipLabel = cip.replace('image','labels')
cipLabel = cipLabel.replace('.jpg','.txt')
nameL , extL = os.path.splitext(cipLabel)
if extL == '.txt':
boxes = open(cipLabel, 'r')
for q in boxes:
ct = ct + 1
if ct == 3:
x1 = int(q.rsplit(' ')[0])
y1 = int(q.rsplit(' ')[1])
x2 = int(q.rsplit(' ')[2])
y2 = int(q.rsplit(' ')[3])
readimage = read[y1:y2, x1:x2]
resize = cv2.cv2.resize(readimage,(227,227))
resize = cv2.cv2.GaussianBlur(resize, (5,5),0)
imageX[i] = resize
#training_labels.append(int(cip.split('\\')[4]))
training_labels.append(int(cip.split('/')[8]))
print(len(training_labels), len(imageX))
i += 1
print(i)
imageX /= 255.0
plt.imshow(imageX[10])
plt.show()
print(imageX.shape)
print(len(training_labels))
np.save("/run/media/fdai5182/LAMA MADAN/Morethan4000samples/227227/training_images", imageX)
np.save("/run/media/fdai5182/LAMA MADAN/Morethan4000samples/227227/trainin_labels",training_labels)
To save each of your image in a row of matrix of same dimensions is the most efficient way to do that.

How can I add a list of saved images into an existing dataframe in pandas?

I was hoping somebody would be able to help me. I am trying to store a list of saved images from MatPlotLib as a dataframe (or a list) and then add it to an existing dataframe (effectively creating small barcharts for each entry in the dataframe e.g. databars).
I have managed to save the images successfully with a loop. There are 242 images. How can I show these images in a column in a dataframe. I want it to be easy to append it to my existing dataframe to show visually the number of zero values in this dataset. My code gives errors that it NoneType object is not iterable.
This is my code. (Top half just here for clarification as to what q1 and q2 are.)
Thanks.
import csv
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import sys
q1 = pandas.read_csv("data\q1.csv") #dataframe
q1.apply(lambda x: x.str.strip() if x.dtype == "object" else x) #strip whitespace
q1 = q1.dropna()
code = q1.loc[:,"Code"]
region = q1.loc[:,"Region"]
name = q1.loc[:,"Name"]
heads = list(q1.columns.values) #creates list of header values
nz = (q1 == 0).sum(axis=1) #count number of zero values in rows
q2 = q1[['Code','Region','Name']]
q2 = q2.assign(nz=nz.values)
samples=[]
y=1
for val in q2['nz']:
val = val/q2['nz'].max() * 100
plt.barh(val, width = val, color="blue")
plt.xlim((0,100))
plt.yticks([0])
plt.axis('off')
x = plt.savefig("value" + str(y) + ".png", bbox_inches='tight')
samples.append(x)
plt.close()
y = y + 1
imgdf = pandas.DataFrame.from_records(samples)
q3 = q2.append(imgdf)
If you are working in a jupyter notebook, then you can use the HTML display to show the images.
# Some imports
import base64
import pandas as pd
from PIL import Image
from io import BytesIO
from IPython.display import HTML
pd.set_option('display.max_colwidth', -1)
def get_thumbnail(path):
"""
Output a 150x150 sized PIL Image
"""
i = Image.open(path)
i.thumbnail((150, 150), Image.LANCZOS)
return i
def image_base64(im):
"""
Convert to base64 to be given as the src field of img in HTML
"""
if isinstance(im, str):
im = get_thumbnail(im)
with BytesIO() as buffer:
im.save(buffer, 'jpeg')
return base64.b64encode(buffer.getvalue()).decode()
def image_formatter(im):
return f'<img src="data:image/jpeg;base64,{image_base64(im)}">'
# Skipping some of your code
image_paths = []
for val in q2['nz']:
#... Do somethings here
x = plt.savefig("value" + str(y) + ".png", bbox_inches='tight')
plt.close()
image_paths.append("value" + str(y) + ".png")
y = y + 1
q2["images_paths"] = pd.Series(image_paths).values
q2["image"] = q2.image_paths.map(lambda f: get_thumbnail(f))
# Display PIL Images embedded in the dataframe
HTML(q2.to_html(formatters={"image": image_formatter}, escape=False))

Categories