How to loop through different image crops? - python

So what I want to do is loop through two image crops and then see how many white pixels there are on both of these images. If a certain amount is detected on one image you print out something and if another amount is detected on the other you print out something etc etc.
I currently have this:
import numpy as np
import cv2
#img = cv2.imread('FIFA_Full_Crop_2_button.jpg')
img = cv2.imread('FIFA2.jpg')
#img = cv2.imread('FIFA_Full_Crop_2_button_3.jpg')
mask = np.zeros(img.shape[:2], np.uint8)
bgdModel = np.zeros((1, 65), np.float64)
fgdModel = np.zeros((1, 65), np.float64)
rect = (1512, 20, 180, 185) # boundary of interest
cv2.grabCut(img, mask, rect, bgdModel, fgdModel, 5, cv2.GC_INIT_WITH_RECT)
mask2 = np.where((mask == 2) | (mask == 0), 0, 1).astype('uint8')
img = img * mask2[:, :, np.newaxis]
cv2.imwrite('Image_mask.jpg', img)
mx = (1510, 22, 110, 185)
x, y, h, w = mx
# x,y coordinates for specified "fixed" location
# Left button
# mx = (1525, 58, 27, 22)
# x, y, h, w = mx
# Circle button 2
# mz = (1664, 58, 27, 22)
# x, y, h, w = mz
# Output to files
crop = img[y:y+h, x:x+w]
cv2.imwrite('Image_crop.jpg', crop)
cv2.rectangle(img,(x,y),(x+w,y+h),(0,255,0),2)
cv2.imwrite('Image_cont.jpg', img)
# Detect white pixels from cropped image
img = cv2.imread('Image_crop.jpg')
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
ret,gray = cv2.threshold(gray, 150,255,0)
gray2 = gray.copy()
cv2.imshow('IMG',gray2)
# Example left button detection 72 pixels
# n_white_pix = np.sum(gray2 == 255)
# print('Number of white pixels:', n_white_pix)
# if crop.sum() >= 72:
# print("Left")
# else:
# print("No button detected")
n_white_pix = np.sum(gray2 == 255)
print('Number of white pixels:', n_white_pix)
if n_white_pix > 0:
print("White pixel detected")
else:
print("Nothing detected")
cv2.waitKey(0)
cv2.destroyAllWindows()
As seen above I have commented out the left button and circle part as I can only do one image crop at a time, how would I go about in doing things if I want to check both crops at the same time?
An ideal solution would be looping through the button crops and then checking them with n_white_pix to see if they exceed a certain amount.
So the main problem at hand now is, how do I loop through two image crops, # Left button and # Circle button 2.

Related

How to make adaptive Threshold at part on image in Opencv python?

Maybe my question is strange something but I need to make an adaptive Threshold on part of the image that the user selects with his mouse and that's my code
import cv2
img = cv2.imread("test.png")
# img2 = cv2.imread("flower.jpg")
# variables
ix = -1
iy = -1
drawing = False
def draw_reactangle_with_drag(event, x, y, flags, param):
global ix, iy, drawing, img
if event == cv2.EVENT_LBUTTONDOWN:
drawing = True
ix = x
iy = y
elif event == cv2.EVENT_MOUSEMOVE:
if drawing == True:
img2 = cv2.imread("test.png")
cv2.rectangle(img2, pt1=(ix, iy), pt2=(x, y),
color=(0, 255, 255), thickness=1)
img = img2
elif event == cv2.EVENT_LBUTTONUP:
drawing = False
img2 = cv2.imread("test.png")
cv2.rectangle(img2, pt1=(ix, iy), pt2=(x, y),
color=(0, 255, 255), thickness=1)
img = img2
gray = cv2.cvtColor(img2[y: iy, x: ix], cv2.COLOR_BGR2GRAY)
th = cv2.adaptiveThreshold(gray,
255, # maximum value assigned to pixel values exceeding the threshold
cv2.ADAPTIVE_THRESH_GAUSSIAN_C, # gaussian weighted sum of neighborhood
cv2.THRESH_BINARY, # thresholding type
5, # block size (5x5 window)
3) # constant
img = th
cv2.namedWindow(winname="Title of Popup Window")
cv2.setMouseCallback("Title of Popup Window", draw_reactangle_with_drag)
while True:
cv2.imshow("Title of Popup Window", img)
if cv2.waitKey(10) == 27:
break
cv2.destroyAllWindows()
and that's what I got at attached screen
What am I missing?
Here is one solution for the desired region in Python/OpenCV. It is to use division normalization rather than adaptive thresholding. (This may or may not work for other regions.)
Read the input
Specify crop coordinates for rectangle
Crop the image
Blur the cropped image
Divide the input by the blurred image
Save the result
Input:
import cv2
import numpy as np
# read the input
img = cv2.imread('equador.png')
# specify the crop rectangle
# 364 396 359 453 (y iy x ix)
x1 = 359
y1 = 364
x2 = 453
y2 = 396
# crop the input
crop = img[y1:y2, x1:x2]
# blur
blur = cv2.GaussianBlur(crop, (0,0), sigmaX=99, sigmaY=99)
# divide
divide = cv2.divide(crop, blur, scale=255)
# put the divide back into the input
result = img.copy()
result[y1:y2, x1:x2] = divide
# save results
cv2.imwrite('equador_crop.png', crop)
cv2.imwrite('equador_crop_blur.png', blur)
cv2.imwrite('equador_crop_divide.png', divide)
cv2.imwrite('equador_crop_divide_result.png', result)
# show results
cv2.imshow('crop', crop)
cv2.imshow('blur', blur)
cv2.imshow('divide', divide)
cv2.imshow('result', result)
cv2.waitKey(0)
Cropped Image:
Blurred Image:
Division Normalized Crop:
Division Normalized Replace:
Note: you may prefer to convert the cropped image to grayscale before blurring and then divide the grayscale version by the blurred image.

Using mask of image to find contours openCV

I have the task of find the contours of a red boundary drawn on a site location map. From the contours detected, I need to find the coordinates and save these to an array. I am able to filter for the red boundary and draw the contours, however I don't know how use this new image in my coordinate extraction. As a temporary solution I have screenshotted the mask generated, saved this, then in a new program have used this screenshot to find the coordinates. Is there a way to join all this code together?
This is the code for drawing the contours:
import cv2
img = cv2.imread(r'C:\Users\abbys\OneDrive\Pictures\agileapp\cornwall_cropped.png')
img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
# Gen lower mask (0-5) and upper mask (175-180) of RED
mask1 = cv2.inRange(img_hsv, (0,50,20), (5,255,255))
mask2 = cv2.inRange(img_hsv, (175,50,20), (180,255,255))
# Merge the mask and crop the red regions
mask = cv2.bitwise_or(mask1, mask2 )
cropped = cv2.bitwise_and(img, img, mask=mask)
## Display
cv2.imshow("mask", mask)
cv2.imshow("cropped", cropped)
cv2.waitKey()
This is the code used to extract the coordinates - the image I read in is a screen shot of the 'cropped' image from the above code
# Reading image
font = cv2.FONT_HERSHEY_COMPLEX
img2 = cv2.imread(r'C:\Users\abbys\OneDrive\Pictures\agileapp\cropped.png', cv2.IMREAD_COLOR)
# Reading same image in another
# variable and converting to gray scale.
img = cv2.imread(r'C:\Users\abbys\OneDrive\Pictures\agileapp\cropped.png', cv2.IMREAD_GRAYSCALE)
# Converting image to a binary image
# ( black and white only image).
_, threshold = cv2.threshold(img, 110, 255, cv2.THRESH_BINARY)
# Detecting contours in image.
contours, _= cv2.findContours(threshold, cv2.RETR_LIST,
cv2.CHAIN_APPROX_SIMPLE)
# Going through every contour found in the image.
for cnt in contours :
approx = cv2.approxPolyDP(cnt, 0.009 * cv2.arcLength(cnt, True), True)
# draws boundary of contours.
cv2.drawContours(img2, [approx], 0, (0, 0, 255), 5)
# Used to flatted the array containing
# the co-ordinates of the vertices.
n = approx.ravel()
i = 0
for j in n :
if(i % 2 == 0):
x = n[i]
y = n[i + 1]
# String containing the co-ordinates.
string = str(x) + " " + str(y)
if(i == 0):
# text on topmost co-ordinate.
cv2.putText(img2, "Arrow tip", (x, y),
font, 0.5, (255, 0, 0))
else:
# text on remaining co-ordinates.
cv2.putText(img2, string, (x, y),
font, 0.5, (0, 255, 0))
i = i + 1
# Showing the final image.
cv2.imshow('image2', img2)
# Exiting the window if 'q' is pressed on the keyboard.
if cv2.waitKey(0) & 0xFF == ord('q'):
cv2.destroyAllWindows()

how to extract self-defined ROI with dlib facelandmarks?

I don't know how to extract the irregular area surrounded by green lines. i.e., the left cheek and the right cheek of a face.
from collections import OrderedDict
import numpy as np
import cv2
import dlib
import imutils
CHEEK_IDXS = OrderedDict([("left_cheek", (1, 2, 3, 4, 5, 48, 31)),
("right_cheek", (11, 12, 13, 14, 15, 35, 54))
])
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor("shape_predictor_68_face_landmarks.dat")
img = cv2.imread('Tom_Cruise.jpg')
img = imutils.resize(img, width=600)
overlay = img.copy()
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
detections = detector(gray, 0)
for k, d in enumerate(detections):
shape = predictor(gray, d)
for (_, name) in enumerate(CHEEK_IDXS.keys()):
pts = np.zeros((len(CHEEK_IDXS[name]), 2), np.int32)
for i, j in enumerate(CHEEK_IDXS[name]):
pts[i] = [shape.part(j).x, shape.part(j).y]
pts = pts.reshape((-1, 1, 2))
cv2.polylines(overlay, [pts], True, (0, 255, 0), thickness=2)
cv2.imshow("Image", overlay)
cv2.waitKey(0)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cv2.destroyAllWindows()
I know if just simply extract a rectangular area from the face as cheeks, the code can be like this
ROI1 = img[shape[29][1]:shape[33][1], shape[54][0]:shape[12][0]] #right cheeks
ROI1 = img[shape[29][1]:shape[33][1], shape[4][0]:shape[48][0]] #left cheek
but I want to extract the irregular area for subsequent processing, how can i do it ?
You can accomplish this by two simple steps:
Create a mask using the point coordinates you have
Execute bitwise_and operation (crop)
Code:
cv2.drawContours(mask, [pts], -1, (255, 255, 255), -1, cv2.LINE_AA)
output = cv2.bitwise_and(img, img, mask=mask)
Output:
Additionally, if you want to focus on the cropped polygons, you can create a bounding rectangle to the polygons then crop from the output frame like tihs:
# Create a bounding rects list at global level
bounding_rects = []
# Calculate Bounding Rects for each pts array inside the for loop
bounding_rects.append(cv2.boundingRect(pts))
# Assign geometrical values to variables to crop (Use a range(len(bounding_boxes)) for loop here)
enter code here
x1,y1,w1,h1 = bounding_rects[0]
x2,y2,w2,h2, = bounding_rects[1]
# At the end of the program, crop the bounding boxes from output
cropped1= output[y1:y1+h1, x1:x1+w1]
cropped2= output[y2:y2+h2, x2:x2+w2]
Output:

pytesseract detects the wrong integer values

I'm trying to detects the numbers found in my sqares, and I thought I could use the libary pytesseract, but for some reason I read the wrong values.
This is the console output:
And here I have all my pictures (they are seperated, this is just to show them all)
import numpy as np
import cv2
import re
from PIL import Image
import pytesseract
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract'
img = cv2.imread('gulRecNum.jpg')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# convert to HSV, since red and yellow are the lowest hue colors and come before green
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
# create a binary thresholded image on hue between red and yellow
lower = (0,240,160)
upper = (30,255,255)
thresh = cv2.inRange(hsv, lower, upper)
# apply morphology
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (9,9))
clean = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (15,15))
clean = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
# get external contours
contours = cv2.findContours(clean, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
result1 = img.copy()
result2 = img.copy()
mask = np.zeros(result2.shape, dtype=np.uint8)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
ROI_number = 0
for c in contours:
cv2.drawContours(result1,[c],0,(0,0,0),2)
# get rotated rectangle from contour
rot_rect = cv2.minAreaRect(c)
box = cv2.boxPoints(rot_rect)
box = np.int0(box)
# draw rotated rectangle on copy of img
cv2.drawContours(result2,[box],0,(0,0,0),2)
# Gør noget hvis arealet er større end 1.
# Whats the area of the component?
areal = cv2.contourArea(c)
if(areal > 1):
# get the center of mass
M = cv2.moments(c)
cx = int(M['m10']/M['m00'])
cy = int(M['m01']/M['m00'])
center = (cx, cy)
print("\nx: ",cx,"\ny: ",cy)
color = (0, 0, 255)
cv2.circle(result2, center, 3, color, -1)
cv2.putText(result2, "center", (int(cx) - 10, int(cy) - 20),
cv2.FONT_HERSHEY_SIMPLEX, 1.2, color, 2)
# LOOK AT THIS PART
x,y,w,h = cv2.boundingRect(c)
ROI = 255 - thresh[y:y+h, x:x+w]
cv2.drawContours(mask, [c], -1, (255,255,255), -1)
cv2.imwrite('ROI_{}.png'.format(ROI_number), ROI)
Number = pytesseract.image_to_string(ROI, config='--psm 13 --oem 3 -c tessedit_char_whitelist=0123456789')
print("Number ", Number)
ROI_number += 1
# save result
cv2.imwrite("4cubes_result2.png",result2)
# display result
imS = cv2.resize(result2, (600, 400))
cv2.imshow("result2", imS)
cv2.waitKey(0)
cv2.destroyAllWindows()
Thought I could write Number = pytesseract.image_to_string(ROI, config='--psm 13 --oem 3 -c tessedit_char_whitelist=0123456789') print(Number)
and then get the number from the image, but I don't, how can that be?
EDIT NEW ERROR
how do i solve it with this picture?
from PIL import Image
from operator import itemgetter
import numpy as np
import easyocr
import cv2
import re
import imutils
import pytesseract
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract'
reader = easyocr.Reader(['ch_sim','en']) # need to run only once to load model into memory
#Define empty array
Cubes = []
def getNumber(ROI):
img = cv2.imread(ROI)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
ret,thresh = cv2.threshold(gray,127,255,0)
#cv2.imshow(thresh)
#cv2.imshow('Thresholded original',thresh)
#cv2.waitKey(0)
## Get contours
contours,h = cv2.findContours(thresh,cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE)
## only draw contour that have big areas
imx = img.shape[0]
imy = img.shape[1]
lp_area = (imx * imy) / 10
tmp_img = img.copy()
for cnt in contours:
approx = cv2.approxPolyDP(cnt,0.01 * cv2.arcLength(cnt, True), True)
if cv2.contourArea(cnt) > lp_area:
# Draw box corners and minimum area rectangle
rect = cv2.minAreaRect(cnt)
box = cv2.boxPoints(rect)
box = np.int0(box)
#cv2.drawContours(tmp_img, [box], 0, (0, 50, 255), 3)
#cv2.circle(tmp_img, tuple(box[0]), 8, (0, 255, 0), -1)
#cv2.circle(tmp_img, tuple(box[1]), 8, (0, 255, 0), -1)
#cv2.circle(tmp_img, tuple(box[2]), 8, (0, 255, 0), -1)
#cv2.circle(tmp_img, tuple(box[3]), 8, (0, 255, 0), -1)
#cv2.imshow(tmp_img)
#cv2.imshow('Minimum Area Rectangle', tmp_img)
#cv2.waitKey(0)
## Correct orientation and crop
# Link, https://jdhao.github.io/2019/02/23/crop_rotated_rectangle_opencv/
width = int(rect[1][0])
height = int(rect[1][1])
src_pts = box.astype("float32")
dst_pts = np.array([[0, height-1],
[0, 0],
[width-1, 0],
[width-1, height-1]], dtype="float32")
M = cv2.getPerspectiveTransform(src_pts, dst_pts)
warped = cv2.warpPerspective(img, M, (width, height))
# Run OCR on cropped image
# If the predicted value is digit print else rotate first
result = reader.readtext(warped)
print(result)
predicted_digit = result[0][1]
if np.char.isdigit(predicted_digit) == True:
cv2.imshow("warped " + ROI,warped)
else:
rot_img = warped.copy()
for i in range(0, 3):
rotated_image = cv2.rotate(rot_img, cv2.cv2.ROTATE_90_CLOCKWISE)
result = reader.readtext(rotated_image)
#if np.array(result).size == 0:
# continue
if not result:
rot_img = rotated_image
continue
#if len(result) == 0:
# continue
predicted_digit = result[0][1]
#print(result)
#print(predicted_digit)
#cv2.imshow(rotated_image)
if np.char.isdigit(predicted_digit) == True:
cv2.imshow("Image " + ROI, rotated_image)
break
rot_img = rotated_image
return predicted_digit
def sortNumbers(Cubes):
Cubes = sorted(Cubes, key=lambda x: int(x[2]))
#Cubes.sort(key=itemgetter(2)) # In-place sorting
#Cubes = sorted(Cubes, key=itemgetter(2)) # Create a new list
return Cubes
#img = cv2.imread('gulRecNum.jpg')
img = cv2.imread('webcam7.png')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# convert to HSV, since red and yellow are the lowest hue colors and come before green
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
# create a binary thresholded image on hue between red and yellow
#Change these if cube colours changes?
lower =(20, 100, 100)
upper = (30, 255, 255)
#lower = (0,240,160)
#upper = (30,255,255)
thresh = cv2.inRange(hsv, lower, upper)
# apply morphology
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (9,9))
clean = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (15,15))
clean = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
# get external contours
contours = cv2.findContours(clean, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
result2 = img.copy()
mask = np.zeros(result2.shape, dtype=np.uint8)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
ROI_number = 0
for c in contours:
cv2.drawContours(result2,[c],0,(0,0,0),2)
# get rotated rectangle from contour
rot_rect = cv2.minAreaRect(c)
box = cv2.boxPoints(rot_rect)
box = np.int0(box)
# draw rotated rectangle on copy of img
cv2.drawContours(result2,[box],0,(0,0,0),2)
# Gør noget hvis arealet er større end 1.
# Whats the area of the component?
areal = cv2.contourArea(c)
if(areal > 1):
# get the center of mass
M = cv2.moments(c)
cx = int(M['m10']/M['m00'])
cy = int(M['m01']/M['m00'])
center = (cx, cy)
print("\nx: ",cx,"\ny: ",cy)
color = (0, 0, 255)
cv2.circle(result2, center, 3, color, -1)
cv2.putText(result2, "center", (int(cx) - 10, int(cy) - 20),
cv2.FONT_HERSHEY_SIMPLEX, 1.2, color, 2)
x,y,w,h = cv2.boundingRect(c)
ROI = 255 - thresh[y:y+h, x:x+w]
cv2.drawContours(mask, [c], -1, (255,255,255), -1)
cv2.imwrite('ROI_{}.png'.format(ROI_number), ROI)
#Read saved image (number)
result = getNumber('ROI_{}.png'.format(ROI_number))
print("ROI_number: ", result)
Cubes.append([cx, cy, result])
ROI_number += 1
# save result
cv2.imwrite("4cubes_result2.png",result2)
# display result
imS = cv2.resize(result2, (600, 400))
cv2.imshow("result2", imS)
#cv2.imshow('mask', mask)
#cv2.imshow('thresh', thresh)
SortedCubes = sortNumbers(Cubes)
print("\nFound array [x, y, Cube_num] = ", Cubes)
print("Sorted array [x, y, Cube_num] = ", SortedCubes)
cv2.waitKey(0)
cv2.destroyAllWindows()
I get the following error (it can't detect a number)
Traceback (most recent call last): File "c:/Users/Mads/OneDrive/Universitet/7. semester/ROB1/python/objectDetectiong.py", line 169, in <module> result = getNumber('ROI_{}.png'.format(ROI_number)) File "c:/Users/Mads/OneDrive/Universitet/7. semester/ROB1/python/objectDetectiong.py", line 70, in getNumber predicted_digit = result[0][1] IndexError: list index out of range
This is implementation of my comment. Since, I do not have individual images this code will work with given grid like processed image.
For OCR I used EasyOCR instead of Tesserect. You could also try pytesserect on each output cropped images. Instead of rotating 4 times by 90 degrees by confidence, I went with digit detection on OCR result. If a detection is not a number then only rotate and retry.
Tested on google colab. Replace cv2_imshow(...) with cv2.imshow(...) for working locally. Also remove from google.colab.patches import cv2_imshow import.
This is modified version of my answer on card orientation correction here, OpenCV: using Canny and Shi-Tomasi to detect round corners of a playing card. All previous code is left as comment.
Code
!pip install easyocr
import easyocr
reader = easyocr.Reader(['ch_sim','en']) # need to run only once to load model into memory
"""
Based on my answer of rotated card detection,
https://stackoverflow.com/questions/64860785/opencv-using-canny-and-shi-tomasi-to-detect-round-corners-of-a-playing-card/64862448#64862448
"""
import cv2
import numpy as np
from google.colab.patches import cv2_imshow
img = cv2.imread('1.jpg')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
ret,thresh = cv2.threshold(gray,127,255,0)
#cv2_imshow(thresh)
#cv2.imshow('Thresholded original',thresh)
#cv2.waitKey(0)
## Get contours
contours,h = cv2.findContours(thresh,cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE)
## only draw contour that have big areas
imx = img.shape[0]
imy = img.shape[1]
lp_area = (imx * imy) / 10
#################################################################
# Four point perspective transform
# https://www.pyimagesearch.com/2014/08/25/4-point-opencv-getperspective-transform-example/
#################################################################
def order_points(pts):
# initialzie a list of coordinates that will be ordered
# such that the first entry in the list is the top-left,
# the second entry is the top-right, the third is the
# bottom-right, and the fourth is the bottom-left
rect = np.zeros((4, 2), dtype = "float32")
# the top-left point will have the smallest sum, whereas
# the bottom-right point will have the largest sum
s = pts.sum(axis = 1)
rect[0] = pts[np.argmin(s)]
rect[2] = pts[np.argmax(s)]
# now, compute the difference between the points, the
# top-right point will have the smallest difference,
# whereas the bottom-left will have the largest difference
diff = np.diff(pts, axis = 1)
rect[1] = pts[np.argmin(diff)]
rect[3] = pts[np.argmax(diff)]
# return the ordered coordinates
return rect
def four_point_transform(image, pts):
# obtain a consistent order of the points and unpack them
# individually
rect = order_points(pts)
(tl, tr, br, bl) = rect
# compute the width of the new image, which will be the
# maximum distance between bottom-right and bottom-left
# x-coordiates or the top-right and top-left x-coordinates
widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
maxWidth = max(int(widthA), int(widthB))
# compute the height of the new image, which will be the
# maximum distance between the top-right and bottom-right
# y-coordinates or the top-left and bottom-left y-coordinates
heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
maxHeight = max(int(heightA), int(heightB))
# now that we have the dimensions of the new image, construct
# the set of destination points to obtain a "birds eye view",
# (i.e. top-down view) of the image, again specifying points
# in the top-left, top-right, bottom-right, and bottom-left
# order
dst = np.array([
[0, 0],
[maxWidth - 1, 0],
[maxWidth - 1, maxHeight - 1],
[0, maxHeight - 1]], dtype = "float32")
# compute the perspective transform matrix and then apply it
M = cv2.getPerspectiveTransform(rect, dst)
warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))
# return the warped image
return warped
#################################################################
#print(len(contours))
tmp_img = img.copy()
for cnt in contours:
approx = cv2.approxPolyDP(cnt,0.01 * cv2.arcLength(cnt, True), True)
## calculate number of vertices
#print(len(approx))
## Get the largest contours only
## Side count cannot be used since contours are not all rectangular
if cv2.contourArea(cnt) > lp_area:
#if len(approx) == 4 and cv2.contourArea(cnt) > lp_area:
# print("\n\n")
# print("#################################################")
# print("rectangle")
# print("#################################################")
# print("\n\n")
#tmp_img = img.copy()
#cv2.drawContours(tmp_img, [cnt], 0, (0, 255, 0), 6)
#cv2_imshow(tmp_img)
#cv2.imshow('Contour Borders', tmp_img)
#cv2.waitKey(0)
# tmp_img = img.copy()
# cv2.drawContours(tmp_img, [cnt], 0, (255, 0, 255), -1)
# cv2_imshow(tmp_img)
# #cv2.imshow('Contour Filled', tmp_img)
# #cv2.waitKey(0)
# # Make a hull arround the contour and draw it on the original image
# tmp_img = img.copy()
# mask = np.zeros((img.shape[:2]), np.uint8)
# hull = cv2.convexHull(cnt)
# cv2.drawContours(mask, [hull], 0, (255, 255, 255), -1)
# cv2_imshow(mask)
# #cv2.imshow('Convex Hull Mask', mask)
# #cv2.waitKey(0)
# # Draw minimum area rectangle
# #tmp_img = img.copy()
# rect = cv2.minAreaRect(cnt)
# box = cv2.boxPoints(rect)
# box = np.int0(box)
# cv2.drawContours(tmp_img, [box], 0, (255, 0, 0), 2)
# #cv2_imshow(tmp_img)
# #cv2.imshow('Minimum Area Rectangle', tmp_img)
# #cv2.waitKey(0)
# Draw box corners and minimum area rectangle
#tmp_img = img.copy()
rect = cv2.minAreaRect(cnt)
box = cv2.boxPoints(rect)
box = np.int0(box)
#print(rect)
#print(box)
cv2.drawContours(tmp_img, [box], 0, (0, 50, 255), 3)
cv2.circle(tmp_img, tuple(box[0]), 8, (0, 255, 0), -1)
cv2.circle(tmp_img, tuple(box[1]), 8, (0, 255, 0), -1)
cv2.circle(tmp_img, tuple(box[2]), 8, (0, 255, 0), -1)
cv2.circle(tmp_img, tuple(box[3]), 8, (0, 255, 0), -1)
#cv2_imshow(tmp_img)
#cv2.imshow('Minimum Area Rectangle', tmp_img)
#cv2.waitKey(0)
## Correct orientation and crop
# Link, https://jdhao.github.io/2019/02/23/crop_rotated_rectangle_opencv/
width = int(rect[1][0])
height = int(rect[1][1])
src_pts = box.astype("float32")
dst_pts = np.array([[0, height-1],
[0, 0],
[width-1, 0],
[width-1, height-1]], dtype="float32")
M = cv2.getPerspectiveTransform(src_pts, dst_pts)
warped = cv2.warpPerspective(img, M, (width, height))
#cv2_imshow(warped)
# Run OCR on cropped image
# If the predicted value is digit print else rotate first
result = reader.readtext(warped)
predicted_digit = result[0][1]
print("Detected Text:")
if np.char.isdigit(predicted_digit) == True:
print(result)
print(predicted_digit)
cv2_imshow(warped)
else:
rot_img = warped.copy()
for i in range(0, 3):
rotated_image = cv2.rotate(rot_img, cv2.cv2.ROTATE_90_CLOCKWISE)
result = reader.readtext(rotated_image)
#if np.array(result).size == 0:
# continue
if not result:
rot_img = rotated_image
continue
#if len(result) == 0:
# continue
predicted_digit = result[0][1]
#print(result)
#print(predicted_digit)
#cv2_imshow(rotated_image)
if np.char.isdigit(predicted_digit) == True:
print(result)
print(predicted_digit)
cv2_imshow(rotated_image)
break
rot_img = rotated_image
# # Draw bounding rectangle
# #tmp_img = img.copy()
# x, y, w, h = cv2.boundingRect(cnt)
# cv2.rectangle(tmp_img, (x, y), (x + w, y + h), (255, 0, 0), 2)
# #cv2_imshow(tmp_img)
# #cv2.imshow('Bounding Rectangle', tmp_img)
# #cv2.waitKey(0)
# # Bounding Rectangle and Minimum Area Rectangle
# #tmp_img = img.copy()
# rect = cv2.minAreaRect(cnt)
# box = cv2.boxPoints(rect)
# box = np.int0(box)
# cv2.drawContours(tmp_img, [box], 0, (0, 0, 255), 2)
# x, y, w, h = cv2.boundingRect(cnt)
# cv2.rectangle(tmp_img, (x, y), (x + w, y + h), (0, 255, 0), 2)
# #cv2_imshow(tmp_img)
# #cv2.imshow('Bounding Rectangle', tmp_img)
# #cv2.waitKey(0)
# # determine the most extreme points along the contour
# # https://www.pyimagesearch.com/2016/04/11/finding-extreme-points-in-contours-with-opencv/
# tmp_img = img.copy()
# extLeft = tuple(cnt[cnt[:, :, 0].argmin()][0])
# extRight = tuple(cnt[cnt[:, :, 0].argmax()][0])
# extTop = tuple(cnt[cnt[:, :, 1].argmin()][0])
# extBot = tuple(cnt[cnt[:, :, 1].argmax()][0])
# cv2.drawContours(tmp_img, [cnt], -1, (0, 255, 255), 2)
# cv2.circle(tmp_img, extLeft, 8, (0, 0, 255), -1)
# cv2.circle(tmp_img, extRight, 8, (0, 255, 0), -1)
# cv2.circle(tmp_img, extTop, 8, (255, 0, 0), -1)
# cv2.circle(tmp_img, extBot, 8, (255, 255, 0), -1)
# print("Corner Points: ", extLeft, extRight, extTop, extBot)
# cv2_imshow(tmp_img)
# #cv2.imshow('img contour drawn', tmp_img)
# #cv2.waitKey(0)
# #cv2.destroyAllWindows()
# ## Perspective Transform
# tmp_img = img.copy()
# pts = np.array([extLeft, extRight, extTop, extBot])
# warped = four_point_transform(tmp_img, pts)
# cv2_imshow(tmp_img)
# #cv2.imshow("Warped", warped)
# #cv2.waitKey(0)
cv2_imshow(tmp_img)
#cv2.destroyAllWindows()
Output Prediction
Detected Text:
[([[85, 67], [131, 67], [131, 127], [85, 127]], '1', 0.9992043972015381)]
1
Detected Text:
[([[85, 65], [133, 65], [133, 125], [85, 125]], '2', 0.9991914629936218)]
2
Detected Text:
[([[96, 72], [144, 72], [144, 128], [96, 128]], '4', 0.9996564984321594)]
4
Detected Text:
[([[88, 76], [132, 76], [132, 132], [88, 132]], '3', 0.9973381161689758)]
3
White Region Detection With Corners
Alternate methods,
Try pretrained digit classification model trained from MNIST and others on each large contours exceeding certain area.
Use multitask object detection with rotation. One output of network will be detections another angle regression to predict orientation.
Use text detector like, East and run OCR on each detected text.

How to find the coordinates of the outside corners of these 4 squares? (morphological closing/opening does not conserve squares if image is rotated)

One of the first processing steps in a tool I'm coding is to find the coordinates of the outside corners of 4 big black squares. They will then be used to do a homographic transform, in order to deskew / unrotate the image (a.k.a perspective transform), to finally get a rectangular image. Here is an example of - rotated and noisy - input (download link here):
To keep the big squares only, I'm using morphological transformations like closing/opening:
import cv2, numpy as np
img = cv2.imread('rotatednoisy-cropped.png', cv2.IMREAD_GRAYSCALE)
kernel = np.ones((30, 30), np.uint8)
img = cv2.morphologyEx(img, cv2.MORPH_CLOSE, kernel)
cv2.imwrite('output.png', img)
Input file (download link):
Output, after morphological transform:
Problem: the output squares are not square anymore, and therefore the coordinates of the top left corner of the square will be not precise at all!
I could reduce the kernel size, but then it would keep more unwanted small elements.
Question: how to get a better detection of the corners of the squares?
Note:
As a morphological closing is just a dilatation + an erosion, I found the culprit:
import cv2, numpy as np
img = cv2.imread('rotatednoisy-cropped.png', cv2.IMREAD_GRAYSCALE)
kernel = np.ones((30, 30), np.uint8)
img = cv2.dilate(img, kernel, iterations = 1)
After this step, it's still ok:
Then
img = cv2.erode(img, kernel, iterations = 1)
gives
and it's not ok anymore!
See this link for detailed explanation on how to de-skew an image.
import cv2
import numpy as np
def corners(box):
cx,cy,w,h,angle = box[0][0],box[0][1],box[1][0],box[1][1],box[2]
CV_PI = 22./7.
_angle = angle*CV_PI/180.;
b = np.cos(_angle)*0.5;
a = np.sin(_angle)*0.5;
pt = []
pt.append((int(cx - a*h - b*w),int(cy + b*h - a*w)));
pt.append((int(cx + a*h - b*w),int(cy - b*h - a*w)));
pt.append((int(2*cx - pt[0][0]),int(2*cy - pt[0][1])));
pt.append((int(2*cx - pt[1][0]),int(2*cy - pt[1][1])));
return pt
if __name__ == '__main__':
image = cv2.imread('image.jpg',cv2.IMREAD_UNCHANGED)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
n = 3
sigma = 0.3 * (n/2 - 1) + 0.8
gray = cv2.GaussianBlur(gray, ksize=(n,n), sigmaX=sigma)
ret,binary = cv2.threshold(gray, 0, 255, cv2.THRESH_OTSU+cv2.THRESH_BINARY)
_,contours,_ = cv2.findContours(binary, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
contours.sort(key=lambda x: len(x), reverse=True)
points = []
for i in range(0,4):
shape = cv2.approxPolyDP(contours[i], 0.05*cv2.arcLength(contours[i],True), True)
if len(shape) == 4:
points.append(shape)
points = np.array(points,dtype=np.int32)
points = np.reshape(points, (-1,2))
box = cv2.minAreaRect(points)
pt = corners(box)
for i in range(0,4):
image = cv2.line(image, (pt[i][0],pt[i][1]), (pt[(i+1)%4][0],pt[(i+1)%4][1]), (0,0,255))
(h,w) = image.shape[:2]
(center) = (w//2,h//2)
angle = box[2]
if angle < -45:
angle = (angle+90)
else:
angle = -angle
M = cv2.getRotationMatrix2D(center, angle, 1.0)
rotated = cv2.warpAffine(image, M, (w,h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_CONSTANT)
cv2.imshow('image', image)
cv2.imshow('rotated', rotated)
cv2.waitKey(0)
cv2.destroyAllWindows()
You could try by searching and filtering out your specific contours (black rectangles) and sorting them with a key. Then select the extreme point for each contour (left, right, top, bottom) and you will get the points. Note that this approach is ok for this picture only and if the picture was roteted in other direction, you would have to change the code accordingly. I am not an expert but I hope this helps a bit.
import numpy as np
import cv2
img = cv2.imread("rotate.jpg")
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
ret, threshold = cv2.threshold(gray,150,255,cv2.THRESH_BINARY)
im, contours, hierarchy = cv2.findContours(threshold,cv2.RETR_TREE,cv2.CHAIN_APPROX_NONE)
contours.sort(key=lambda c: np.min(c[:,:,1]))
j = 1
if len(contours) > 0:
for i in range(0, len(contours)):
size = cv2.contourArea(contours[i])
if 90 < size < 140:
if j == 1:
c1 = contours[i]
j += 1
elif j == 2:
c2 = contours[i]
j += 1
elif j == 3:
c3 = contours[i]
j += 1
elif j == 4:
c4 = contours[i]
break
Top = tuple(c1[c1[:, :, 1].argmin()][0])
Right = tuple(c2[c2[:, :, 0].argmax()][0])
Left = tuple(c3[c3[:, :, 0].argmin()][0])
Bottom = tuple(c4[c4[:, :, 1].argmax()][0])
cv2.circle(img, Top, 2, (0, 255, 0), -1)
cv2.circle(img, Right, 2, (0, 255, 0), -1)
cv2.circle(img, Left, 2, (0, 255, 0), -1)
cv2.circle(img, Bottom, 2, (0, 255, 0), -1)
cv2.imshow("Image", img)
cv2.waitKey(0)
Result:
You can extract the squares as single blobs after binarization with a suitable threshold, and select the appropriate ones based on size. You can also first denoise with a median filter if you want.
Then a tight rotated bounding rectangle will give you the corners (you can obtain it by running Rotating Calipers on the Convex Hull).

Categories