How to iterate/loop over a folder of images in opencv? - python

I am trying to loop over a folder of images in OpenCv. Each images is a line of text. I would like to segment each line into words and then save each word as an image. I have used the following code which works well on a single image at a time. But when I try to apply it on all the files in the folder, it just exports one of them and doesn't start looping over them all.
I'll appreciate your help and thank you in advance.
ayaasiin
from PIL import Image
import cv2
import numpy as np
from pathlib import Path
import glob
corpus_dir = Path('data')
files = list(corpus_dir.glob(pattern='*.jpg'))
files
for f in files:
image = Image.open(f)
image = cv2.imread(str(f))
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray,(5,5),0)
ret, thresh1 = cv2.threshold(blur, 0, 255, cv2.THRESH_OTSU |
cv2.THRESH_BINARY_INV)
cv2.imwrite('threshold_image.jpg',thresh1)
rect_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (4,30))
dilation = cv2.dilate(thresh1, rect_kernel, iterations = 6)
cv2.imwrite('dilation_image.jpg',dilation)
contours, hierarchy = cv2.findContours(dilation, cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE)
im2 = img.copy()
crop_number=1
cnt = sorted(contours, key=lambda x: cv2.boundingRect(x)[1:])
for cnt in contours:
x, y, w, h = cv2.boundingRect(cnt)
if h > 50 and w > 50:
roi = im2[y:y+h, 0:x]
#Draw the bounding box on the text area
rect = cv2.rectangle(im2, (x, y), (x + w, y + h), (0, 255, 0), 2)
​
​
# Crop the bounding box area
cropped = im2[y:y + h, x:x + w]
cv2.imwrite("sawir "+str(crop_number)+".jpeg",cropped)
crop_number+=1

Related

Python - How to process a binary image to align sparse letters in a row

I'm trying to align letters from an image in order to obtain the full word with tesseract OCR:
import cv2
import numpy as np
img = cv2.imread("captcha.png", 0)
h1, w1 = img.shape
img = cv2.resize(img, (w1*5, h1*5))
# Threshold the image and find the contours
_, thresh = cv2.threshold(img, 123, 255, cv2.THRESH_BINARY_INV)
contours, hierarchy = cv2.findContours(
thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
# Create a white background iamge to paste the letters on
bg = np.zeros((200, 200), np.uint8)
bg[:] = 255
left = 5
# Iterate through the contours
for contour, h in zip(contours, hierarchy[0]):
# Ignore inside parts (circle in a 'p' or 'b')
if h[3] == -1:
# Get the bounding rectangle
x, y, w, h = cv2.boundingRect(contour)
# Paste it onto the background
bg[5:5+h, left:left+w] = img[y:y+h, x:x+w]
left += (w + 5)
cv2.imshow('thresh', bg)
cv2.waitKey()
And the image that I want to process is this one
However, I got this message:
>Traceback (most recent call last):
File ".\img.py", line 24, in <module>
bg[5:5+h, left:left+w] = img[y:y+h, x:x+w]
ValueError: could not broadcast input array from shape (72,750) into shape (72,195)
Just with tesseract OCR I got "acba" without the zero and four so I need to reorder the letters to obtain it. Any suggestions?
You try to put bigger image in smaller area - but they have to be the same.
You may get shapes for both objects and get min() for width and height and use it
h1, w1 = bg[5:5+h, left:left+w].shape
h2, w2 = img[y:y+h, x:x+w].shape
min_h = min(h1, h2)
min_w = min(w1, w2)
bg[5:5+min_h, left:left+min_w] = img[y:y+min_h, x:x+min_w]
EDIT:
OR maybe you should use x,y instead of 5 and left (also 5)
bg[y:y+h, x:x+w] = img[y:y+h, x:x+w]
And maybe you should create bg with the same size as img (after resizing)
h1, w1 = img.shape
bg = np.zeros((h1, w1), np.uint8)
EDIT:
Full working code with other changes.
I read image in RGB to see what contours it found because it seems it found something different then you may expect.
import cv2
import numpy as np
print('CV:', cv2.__version__)
img_color = cv2.imread("ZzSgt.png", cv2.IMREAD_UNCHANGED)
h, w = img_color.shape[:2]
print('original shape (W,H):', w, h)
img_color = cv2.resize(img_color, (w*5, h*5))
h, w = img_color.shape[:2]
print('resized shape (W,H) :', w, h)
img = cv2.cvtColor(img_color, cv2.COLOR_BGR2GRAY)
# Threshold the image and find the contours
_, thresh = cv2.threshold(img, 123, 255, cv2.THRESH_BINARY_INV)
contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
# Create a white background image to paste the letters on
bg = np.full((h, w), 255, np.uint8)
# Create image to display contours
img_contours = np.full((h, w, 3), 255, np.uint8)
left = 5
# Iterate through the contours
for contour, h in zip(contours, hierarchy[0]):
# Ignore inside parts (circle in a 'p' or 'b')
if h[3] == -1:
# Get the bounding rectangle
x, y, w, h = cv2.boundingRect(contour)
print('contour (X,Y,W,H):', x, y, w, h)
# Paste it onto the background
h1, w1 = bg[5:5+h, left:left+w].shape
h2, w2 = img[y:y+h, x:x+w].shape
min_h = min(h1, h2)
min_w = min(w1, w2)
bg[5:5+min_h, left:left+min_w] = img[y:y+min_h, x:x+min_w]
left += (w + 5)
# Copy color regions and draw contours
img_contours[y:y+h, x:x+w] = img_color[y:y+h, x:x+w]
img_contours = cv2.drawContours(img_contours, [contour], 0, (0,0,255))
cv2.imshow('contours', img_contours)
cv2.imshow('background', bg)
cv2.waitKey()
cv2.destroyAllWindows()
contours
background
EDIT:
I get better result if I revese image img = ~img and change threshold from 123 to 30
thresh
contours
background (and now I see it could have size even (75, 255) or safer (100, 300))
import cv2
import numpy as np
print('CV:', cv2.__version__)
#img_color = cv2.imread("captcha.png", cv2.IMREAD_UNCHANGED)
img_color = cv2.imread("ZzSgt.png", cv2.IMREAD_UNCHANGED)
h, w = img_color.shape[:2]
print('original shape (W,H):', w, h)
img_color = cv2.resize(img_color, (w*5, h*5))
h, w = img_color.shape[:2]
print('resized shape (W,H) :', w, h)
img = cv2.cvtColor(img_color, cv2.COLOR_BGR2GRAY)
img = ~img
# Threshold the image and find the contours
_, thresh = cv2.threshold(img, 30, 255, cv2.THRESH_BINARY_INV)
cv2.imshow('thresh', thresh)
contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
# Create a white background image to paste the letters on
bg = np.full((h, w), 255, np.uint8)
# Create image to display contours
img_contours = np.full((h, w, 3), 255, np.uint8)
left = 5
# Iterate through the contours
for contour, h in zip(contours, hierarchy[0]):
# Ignore inside parts (circle in a 'p' or 'b')
if h[3] == -1:
# Get the bounding rectangle
x, y, w, h = cv2.boundingRect(contour)
print('contour (X,Y,W,H):', x, y, w, h)
# Paste it onto the background
h1, w1 = bg[5:5+h, left:left+w].shape
h2, w2 = img[y:y+h, x:x+w].shape
min_h = min(h1, h2)
min_w = min(w1, w2)
bg[5:5+min_h, left:left+min_w] = img[y:y+min_h, x:x+min_w]
left += (w + 5)
# Copy (color) region and draw contour
img_contours[y:y+h, x:x+w] = img_color[y:y+h, x:x+w]
img_contours = cv2.drawContours(img_contours, [contour], 0, (0,0,255))
cv2.imshow('contours', img_contours)
cv2.imshow('background', bg)
cv2.waitKey()
cv2.destroyAllWindows()

How to generalize contouring handwritten characters using OpenCV in Python?

I try yo detect and crop handwritten characters from an image. Some characters can be recognized and enclosed in a rectangle, but for others the same parameters do not work. How can I generlize it?
Raw Image
import cv2
import numpy as np
import matplotlib.pyplot as plt
im = cv2.imread('mission.png',0)
img_blured = cv2.GaussianBlur(im,(5,5),7)
closing = cv2.morphologyEx(img_blured, cv2.MORPH_CLOSE, (31,31))
thresh = 195
ret, bw_img = cv2.threshold(closing, thresh, 255, cv2.THRESH_BINARY)
_,contours, hierarchy = cv2.findContours(bw_img,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
for cnt in contours:
x,y,w,h = cv2.boundingRect(cnt)
cv2.rectangle(im,(x,y),(x+w,y+h),(0,255,0),3)
i=0
for cnt in contours:
x,y,w,h = cv2.boundingRect(cnt)
if w>150 and h>150:
cv2.imwrite(str(i)+".jpg",bw_img[y:y+h,x:x+w])
i=i+1
plt.imshow(im)
plt.show()
cv2.imwrite("output.png",im)
Processed Image
Maybe this can help you:
# Import preprocessors
import os
import cv2
import numpy as np
# Read image
dir = os.path.abspath(os.path.dirname(__file__))
im = cv2.imread(dir+'/nvCXT.png')
# Add padding around the original image
pad = 5
h, w = im.shape[:2]
im2 = ~(np.ones((h+pad*2, w+pad*2, 3), dtype=np.uint8))
im2[pad:pad+h, pad:pad+w] = im[:]
im = im2
# Blur it to remove noise
im = cv2.GaussianBlur(im, (5, 5), 5)
# Gray and B/W version
im = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
bw = cv2.threshold(im, 200, 255, cv2.THRESH_BINARY)[1]
# Find contours and sort them by position
cnts, _ = cv2.findContours(bw, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
cnts.sort(key=lambda x:cv2.boundingRect(x)[0])
# Find and save blocks
s1, s2 = w/2, w/10
i = 0
x2, y2, w2, h2 = 0, 0, 0, 0
for cnt in cnts:
x, y, w, h = cv2.boundingRect(cnt)
if (w+h < s1 and w+h > s2) and (i==0 or (x2+w2) < x):
i += 1
cv2.imwrite(dir+'/_'+str(i)+".jpg", im[y:y+h, x:x+w])
cv2.rectangle(im, (x, y), (x+w, y+h), (0, 255, 0), 2)
x2, y2, w2, h2 = x, y, w, h
# Save the processed images
cv2.imwrite(dir+'/out.png', im)
cv2.imwrite(dir+'/out_bw.png', bw)

How do I get Tesseract to read the license plate in the this Python OpenCV project?

My OpenCV code works just fine. It find the license plate, extracts a black and white version of it, using the contours, and then when I pass it to pytesseract, it won't read any of the letters. I've tracked the program throughout each line of the code and the OpenCV works fine, but pytesseract won't extract the text from the image. There are no errors, it just doesn't read any text. The license plate is mine.
import cv2
# pip install imutils
import imutils
import pytesseract
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files (x86)\Tesseract-OCR\tesseract.exe'
# Read the image file
image = cv2.imread('LP.jpg')
# image = imutils.resize(image, width=500)
# Convert to Grayscale Image
gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# Removes Noise
gray_image = cv2.bilateralFilter(gray_image, 11, 17, 17)
# Canny Edge Detection
canny_edge = cv2.Canny(gray_image, 100, 200)
# Find contours based on Edges
# The code below needs an - or else you'll get a ValueError: too many values to unpack (expected 2) or a numpy error
_, contours, new = cv2.findContours(canny_edge.copy(), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
contours = sorted(contours, key=cv2.contourArea, reverse=True)[:30]
# # Initialize license Plate contour and x,y coordinates
contour_with_license_plate = None
license_plate = None
x = None
y = None
w = None
h = None
# Find the contour with 4 potential corners and create a Region of Interest around it
for contour in contours:
# Find Perimeter of contour and it should be a closed contour
perimeter = cv2.arcLength(contour, True)
approx = cv2.approxPolyDP(contour, 0.02 * perimeter, True)
# This checks if it's a rectangle
if len(approx) == 4:
contour_with_license_plate = approx
x, y, w, h = cv2.boundingRect(contour)
license_plate = gray_image[y:y + h, x:x + w]
break
# # approximate_contours = cv2.drawContours(image, [contour_with_license_plate], -1, (0, 255, 0), 3)
# Text Recognition
text = pytesseract.image_to_string(license_plate, lang='eng')
print(text)
# Draw License Plate and write the Text
image = cv2.rectangle(image, (x, y), (x+w, y+h), (0, 255, 0), 3)
image = cv2.putText(image, text, (x-100, y-50), cv2.FONT_HERSHEY_SIMPLEX, 3, (0, 255, 0), 6, cv2.LINE_AA)
print("License Plate: ", text)
cv2.imshow("License Plate Detection", image)
cv2.waitKey(0)
Here is my partial answer, maybe you can perfect it.
Apply adaptive-threshold + bitwise-not operations to the license_plate variable.
The result will be:
Now if you read it:
txt = pytesseract.image_to_string(bnt, config="--psm 6")
print(txt)
Result:
277 BOY
Unfortunately Q is recognized as O.
Code: (Just replace text recogniiton commented part with the below)
thr = cv2.adaptiveThreshold(license_plate, 252, cv2.ADAPTIVE_THRESH_MEAN_C,
cv2.THRESH_BINARY_INV, 91, 93)
bnt = cv2.bitwise_not(thr)
txt = pytesseract.image_to_string(bnt, config="--psm 6")
print(txt)

Python: Showing every Object of an image in its own window

I've written some code, to crop an object (in this case the Data Matrix Code) from an image:
import numpy as np
import cv2
image = cv2.imread("datamatrixc.png")
img_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
img_height, img_width = image.shape[:2]
WHITE = [255, 255, 255]
# Threshold filter
ret, thresh = cv2.threshold(img_gray, 127, 255, cv2.THRESH_BINARY_INV)
# Get Contours
_, contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# Get Last element of the contours object
max = len(contours) - 1
cnt = contours[max]
# Get coordinates for the bounding box
x, y, w, h = cv2.boundingRect(cnt)
image_region = image[ int(((img_height / 2) - h) / 2) : int(((img_height / 2) - h) / 2 + h), int(x): int(x + w) ]
dmc = cv2.copyMakeBorder(image_region, 10, 10, 10, 10, cv2.BORDER_CONSTANT, value = WHITE)
cv2.imshow("Test", dmc)
cv2.waitKey(0)
cv2.destroyAllWindows()
The code works fine and I received as result:
However, the next image is a little more complicated.
I receive the same result as in the previous image, but I have no idea how to detect the two other objects.
Is there an easier way every object showing in its window?
For this specific image take the biggest contours you have and check if the object is 4 sided shape.If the half-point between the bounding box's corners (see pairs below) is in the contour array then voila, problem solved.
Pairs : TopRight-TopLeft, TopRight-BottomRight, TopLeft-BottomLeft, BottomLeft-BottomRight
Or you could check if there pixels that are not black/white inside the bounding box ?
And for the ploting individualy just slap a for on what you allready have
How about this?
import numpy as np
import cv2
image = cv2.imread("datamatrixc.png")
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
ret, bin_img = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
kernel = np.ones((3,3),np.uint8)
closing = cv2.morphologyEx(bin_img, cv2.MORPH_CLOSE, kernel, iterations=4)
n_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(bin_img)
size_thresh = 5000
for i in range(1, n_labels):
if stats[i, cv2.CC_STAT_AREA] >= size_thresh:
print(stats[i, cv2.CC_STAT_AREA])
x = stats[i, cv2.CC_STAT_LEFT]
y = stats[i, cv2.CC_STAT_TOP]
w = stats[i, cv2.CC_STAT_WIDTH]
h = stats[i, cv2.CC_STAT_HEIGHT]
cv2.imshow('img', image[y:y+h, x:x+w])
cv2.waitKey(0)

File is replicated without reason (Python, OpenCV)

First, it's hard to explain. If someone have a better title, feel free to edit/suggest.
So, I am using the next code to delete ROI's from given images.
import cv2
import os
import numpy as np
import shutil
src = (os.path.expanduser('~\\Desktop\\output\\'))
causali = os.listdir(src) # CREO LISTA CAUSALI-2
causali.sort(key=lambda x: int(x.split('.')[0]))
for file in enumerate(causali): # CONTA NUMERO DI FILE CAUSALE
#import image
image = cv2.imread(os.path.expanduser('~\\Desktop\\output\\{}'.format(file[1])))
cv2.imshow('orig',image)
cv2.waitKey(0)
#grayscale
gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
#cv2.imshow('gray',gray)
#cv2.waitKey(0)
#binary
ret,thresh = cv2.threshold(gray,127,255,cv2.THRESH_BINARY_INV)
#cv2.imshow('second',thresh)
#cv2.waitKey(0)
#dilation
kernel = np.ones((1,80), np.uint8)
img_dilation = cv2.dilate(thresh, kernel, iterations=1)
#cv2.imshow('dilated',img_dilation)
#cv2.waitKey(0)
#find contours
im2,ctrs, hier = cv2.findContours(img_dilation.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
#sort contours
sorted_ctrs = sorted(ctrs, key=lambda ctr: cv2.boundingRect(ctr)[0])
for i, ctr in enumerate(sorted_ctrs):
# Get bounding box
x, y, w, h = cv2.boundingRect(ctr)
# Getting ROI
roi = image[y:y+h, x:x+w]
if h < 25:
clean = cv2.rectangle(image,(x,y),( x + w, y + h ),(255,255,255),-1)
cv2.imwrite(os.path.expanduser('~\\Desktop\\output2\\{}.png').format(file[0]), clean)
I put a condition if h < 25 to delete the ROI I don't want to be visible in the final image.
This is source folder..
and this is the output the program give out..
As you can see, file n°8 came out as the n°7. This because the program don't find any ROI in that image which is ok for the condition.
The problem is I don't understand why it replicate the last file he worked (7 ---> 8). How can I fix this ?
In case no ROI is found it should just copy the file, not overwrite it with the last one..
Thanks
I rewrite the code, make copy before every processing, and fill them with color, now it's more clear:
import cv2
import os
import numpy as np
causali = os.listdir("causali")
causali.sort(key=lambda x: int(x.split('.')[0]))
print(causali)
for idx, fname in enumerate(causali):
fname = os.path.expanduser("causali/"+fname)
print(fname)
img = cv2.imread(fname)
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
ret,thresh = cv2.threshold(gray,127,255,cv2.THRESH_BINARY_INV)
kernel = np.ones((1,80), np.uint8)
dilated = cv2.dilate(thresh, kernel, iterations=1)
cnts = cv2.findContours(dilated.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[-2]
cnts = sorted(cnts, key=lambda cnt: cv2.boundingRect(cnt)[0])
## make an copy first
clean = img.copy()
for i, cnt in enumerate(cnts):
x, y, w, h = cv2.boundingRect(cnt)
roi = img[y:y+h, x:x+w]
if h < 25:
#clean = cv2.rectangle(img,(x,y),( x + w, y + h ),(255,255,255),-1)
clean = cv2.rectangle(img,(x,y),( x + w, y + h ),(0,255,0),-1)
## save the "clean"
cv2.imwrite(os.path.expanduser("output/{}.png").format(idx), clean)
This is the result:

Categories