Batch processing of Tiff files using skimage (Python) - python

I am looking to; open, process and save multiple TIFFs in Python.
I have the following code to open, process and save 1 (one) TIFF, but I have trouble with multiple files:
import skimage.io
import skimage.viewer
import skimage
import skimage.io
# Read 1 image.TIF:
image = skimage.io.imread(fname=path)
image[2,1]= 1.0
# Process the file (make binary)
gray_image = skimage.color.rgb2gray(image)
# Blur the image to denoise (larger sigma = more noise removed)
blurred_image = skimage.filters.gaussian(gray_image, sigma=5)
# Adding threshold, t:
t = 0.8
binary_mask = blurred_image < t
# Save the file to another location:
skimage.io.imsave(fname=path, arr = binary_mask)
Any help is appreciated!

Here's a multiprocessing approach that may help:
import skimage
from concurrent.futures import ProcessPoolExecutor
from glob import glob
import os.path
source_dir = '<your source directory>'
target_dir = '<your target directory>'
filetype = '*.tif'
def process(path):
image = skimage.io.imread(fname=path)
image[2,1] = 1.0
gray_image = skimage.color.rgb2gray(image)
blurred_image = skimage.filters.gaussian(gray_image, sigma=5)
outpath = os.path.join(target_dir, os.path.basename(path))
arr = blurred_image < 0.8
skimage.io.imsave(fname=outpath, arr=arr)
def main():
with ProcessPoolExecutor() as executor:
filelist = glob(os.path.join(source_dir, filetype))
executor.map(process, filelist)
if __name__ == '__main__':
main()
Use glob to identify all the files matching the *.tif pattern then utilise the ProcessPoolExecutor's map function to process each file in its own process. As the processing is mainly CPU intensive, multiprocessing is likely to be the best fit for this

Is it necessary that this be parallelized? It's not a huge bit of processing that you are performing. If you don't need parallel processing you can just run a for loop on your images
import skimage.io
import skimage.viewer
import skimage
import skimage.io
import os
import glob
# set up an in and out directory
in_dir = 'directory\with\images'
out_ir = 'directory\for\procecessed\images'
# make a list of all of the raw image files
os.chdir(in_dir)
filelist = glob.glob('*.png') # change to whatever file pattern you need here
for file_iter in filelist:
os.chdir(in_dir)
image = skimage.io.imread(fname=file_iter)
image[2,1]= 1.0
# Process the file (make binary)
gray_image = skimage.color.rgb2gray(image)
# Blur the image to denoise (larger sigma = more noise removed)
blurred_image = skimage.filters.gaussian(gray_image, sigma=5)
# Adding threshold, t:
t = 0.8
binary_mask = blurred_image < t
# Save the file to another location:
out_filename = file_iter[:-4] + 'processed.png' # make new filename based on old filename
os.chdir(out_dir)
skimage.io.imsave(fname=out_filename, arr = binary_mask)

Related

How can I save an image with Image.thumbnail using PIL

Basically, I copied and paste a script, and merged it with other script, and now the script doesn't work, it says an error "NoneType object has no attribute save"Screenshot
And here's the script:
` from PIL import Image
import PIL
import os
from glob import glob
imgs = [y for x in os.walk(".") for y in glob(os.path.join(x[0], '*.png'))]
size = 32, 32
lastdir = None
for file in imgs:
img = Image.open(file)
img = img.thumbnail(size, resample=PIL.Image.NEAREST)
file = file.replace('img', 'icon', 1)
dir = os.path.dirname(file)
try:
os.makedirs(dir)
except:
pass
if dir!=lastdir:
print(dir)
lastdir = dir
img.save(file + ".png", "PNG")`
Resize various images on various directories and save them, but the images are not saving.
The thumbnail() method modifies its object rather than returning a new one. So, this line:
img = img.thumbnail(size, resample=PIL.Image.NEAREST)
should be:
img.thumbnail(size, resample=PIL.Image.NEAREST)

PIL Image Open is not able to open some files in a zip folder

I have about 300000 image files in a zip folder. Some of those files have path starting with '__'. PIL function Image.Open() is not able to open these files. Please suggest a way to open them. My code below:
import pandas as pd
import numpy as np
from zipfile import ZipFile
from io import BytesIO
from PIL import Image
from PIL import UnidentifiedImageError
problem_files = []
file_paths = []
img_list = []
img_size = (128,128)
with ZipFile('/XXX/YYY/ZZZ/AI_ML/Project2/words.zip') as myzip:
contents = myzip.namelist()
for i in range(0,len(contents)-1):
text = str(contents[i])
if '.png' in text:
file_paths.append(contents[i])
for path in file_paths:
img = myzip.read(path)
try:
img_data = Image.open(BytesIO(img))
except UnidentifiedImageError:
problem_files.append(path)
img_data = img_data.convert('L')
img_data = img_data.resize(img_size)
image_as_array = np.array(img_data, np.uint8)
image_as_array = np.reshape(image_as_array,(1,-1))
img_list.append(image_as_array)
This puts all the files with path starting with '__' into problem_files list
problem_files[-10:]
['__MACOSX/words/j04/j04-070/._j04-070-08-07.png',
'__MACOSX/words/j04/j04-070/._j04-070-04-07.png',
'__MACOSX/words/j04/j04-070/._j04-070-04-06.png',
'__MACOSX/words/j04/j04-070/._j04-070-08-06.png',
'__MACOSX/words/j04/j04-070/._j04-070-06-03.png',
'__MACOSX/words/j04/j04-070/._j04-070-06-01.png',
'__MACOSX/words/j04/j04-070/._j04-070-08-04.png',
'__MACOSX/words/j04/j04-070/._j04-070-04-04.png',
'__MACOSX/words/j04/j04-070/._j04-070-04-05.png',
'__MACOSX/words/j04/j04-070/._j04-070-08-05.png']
There are about 100000 images in problem_files list

How to loop images sequence?

I want to continusly loop image gallery (folder) until I press a key.
So I have a folder with 3 images 1,2,3. I want to display them in order and then to repeat.
I've used while but I didn't manage to make it work.
import Image
image1 = Image.open('image1.jpg')
image.show()
image2 = Image.open('image2.jpg')
image.show()
image3 = Image.open('image3.jpg')
image.show()
See if this works. I'm sure there is an easier way, but this is what I could think of.
from os import listdir
from os.path import isfile, join, abspath
import time
import subprocess
mypath = abspath(__file__)
files_in_folder = [f for f in listdir() if isfile(join(mypath, f))]
# get images
imgs = []
for f in files_in_folder:
_, file_extension = os.path.splitext(f)
if file_extension = ".jpg"
imgs.append(f)
# run loop until keyboard interrupt
try:
while True:
for img in imgs:
viewer = subprocess.Popen(['some_viewer', img])
viewer.terminate()
time.sleep(3)
viewer.kill()
except KeyboardInterrupt:
pass
Here is how you can use the glob module:
import Image
from glob import glob
path = 'C:\\Users\\User\\Desktop\\Folder'
#images = []
for ing in glob(path+'\\*.jpg'):
image = Image.open('image1.jpg')
#images.append(image)
image.show()
The commented lines of code are for if you want to be able to access the images later in the code.

how to add multiple pdfs to be converted into excel?

I have program which converts pdf to excel, Now i want add multiple inputs i.e. multiple pdfs to be converted one by one.
my code is below:
from PIL import Image
import io
import pytesseract
from wand.image import Image as wi
import os
import cv2
import pandas as pd
import re
import numpy as np
import os
pdf = wi(filename= "pdfs/jaalna.pdf", resolution =300)
pdfImage = pdf.convert("jpg")
imageBlobs = []
for img in pdfImage.sequence:
imgPage = wi(image = img)
#img.filter(ImageFilter.EDGE_ENHANCE_MORE )
imageBlobs.append(imgPage.make_blob('jpg'))
recognized_text = []
for imgBlob in imageBlobs:
im = Image.open(io.BytesIO(imgBlob))
text = pytesseract.image_to_string(im, lang = 'eng1+mar1')
recognized_text.append(text)
newfile = open('aama.txt','w')
newfile.write(",".join(recognized_text))
#add a folder as input.
you can use loop
for name in ["pdfs/jaalna.pdf", "other/file.pdf"]:
pdf = wi(filename=name, resolution=300)
# rest of code
or you can use sys.argv to get names as
script.py pdfs/jaalna.pdf other/file.pdf other/third.pdf
and code
import sys
for name in sys.argv[1:]:
pdf = wi(filename=name, resolution=300)
# rest of code
Try the code below. This will loop through every PDF file in the folder directory you define. Be sure to update your file_path to be where your PDFs are saved, making sure you use double backslashs in place of single backslashes.
from PIL import Image
import io
import pytesseract
from wand.image import Image as wi
import cv2
import pandas as pd
import re
import numpy as np
import os
file_path = "C:\\Users\\..."
for file in os.listdir(file_path):
if file.endswith(".pdf"):
pdf = wi(file, resolution =300)
pdfImage = pdf.convert("jpg")
imageBlobs = []
for img in pdfImage.sequence:
imgPage = wi(image = img)
#img.filter(ImageFilter.EDGE_ENHANCE_MORE )
imageBlobs.append(imgPage.make_blob('jpg'))
recognized_text = []
for imgBlob in imageBlobs:
im = Image.open(io.BytesIO(imgBlob))
text = pytesseract.image_to_string(im, lang = 'eng1+mar1')
recognized_text.append(text)
newfile = open(file+'.txt','w')
newfile.write(",".join(recognized_text))
#add a folder as input.

histogram.cpp:3915: error: (-215) _src.type() == CV_8UC1 in function cv::equalizeHist

files2 = [f for f in listdir(dstpath) if isfile(join(dstpath,f))]
for image in files2:
img = cv2.imread(os.path.join(dstpath,image))
equ = cv2.equalizeHist(img)
dstPath2 = join(dstpath,image)
cv2.imwrite(dstPath2,equ)
I have a folder consisting of grayscale images in jpg format but when I run my above code for Histogram equalization it gives me the above mentioned error. Pls help
imread load image in color mode by default. Try to use img = cv2.imread(your_image_path,cv2.IMREAD_GRAYSCALE) instead
#author: Quantum
"""
import cv2
import os
from os import listdir,makedirs
from os.path import isfile,join
path = r'' # Source Folder
dstpath = r'' # Destination Folder
try:
makedirs(dstpath)
except:
print ("Directory already exist, images will be written in asme folder")
# Folder won't used
files = [f for f in listdir(path) if isfile(join(path,f))]
for image in files:
try:
img = cv2.imread(os.path.join(path,image),cv2.IMREAD_GRAYSCALE)
**imgnew=cv2.equalizeHist(img)**
dstPath = join(dstpath,image)
cv2.imwrite(dstPath,imgnew)
except:
print ("{} is not converted".format(image))
All I did was added the histeq function while my files are converted to grayscale

Categories