I'm trying to detects the numbers found in my sqares, and I thought I could use the libary easyocr, but for some reason I read the wrong values.
This is the console output:
And here I have all my pictures (they are seperated, this is just to show them all)
¨
How can it be that i don't find any numbers?
from PIL import Image
from operator import itemgetter
import numpy as np
import easyocr
import cv2
import re
import imutils
import pytesseract
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract'
reader = easyocr.Reader(['ch_sim','en']) # need to run only once to load model into memory
#Define empty array
Cubes = []
def getNumber(ROI):
img = cv2.imread(ROI)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
ret,thresh = cv2.threshold(gray,127,255,0)
#cv2.imshow(thresh)
#cv2.imshow('Thresholded original',thresh)
#cv2.waitKey(0)
## Get contours
contours,h = cv2.findContours(thresh,cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE)
## only draw contour that have big areas
imx = img.shape[0]
imy = img.shape[1]
lp_area = (imx * imy) / 10
tmp_img = img.copy()
for cnt in contours:
approx = cv2.approxPolyDP(cnt,0.01 * cv2.arcLength(cnt, True), True)
if cv2.contourArea(cnt) > lp_area:
# Draw box corners and minimum area rectangle
rect = cv2.minAreaRect(cnt)
box = cv2.boxPoints(rect)
box = np.int0(box)
#cv2.drawContours(tmp_img, [box], 0, (0, 50, 255), 3)
#cv2.circle(tmp_img, tuple(box[0]), 8, (0, 255, 0), -1)
#cv2.circle(tmp_img, tuple(box[1]), 8, (0, 255, 0), -1)
#cv2.circle(tmp_img, tuple(box[2]), 8, (0, 255, 0), -1)
#cv2.circle(tmp_img, tuple(box[3]), 8, (0, 255, 0), -1)
#cv2.imshow(tmp_img)
#cv2.imshow('Minimum Area Rectangle', tmp_img)
#cv2.waitKey(0)
## Correct orientation and crop
# Link, https://jdhao.github.io/2019/02/23/crop_rotated_rectangle_opencv/
width = int(rect[1][0])
height = int(rect[1][1])
src_pts = box.astype("float32")
dst_pts = np.array([[0, height-1],
[0, 0],
[width-1, 0],
[width-1, height-1]], dtype="float32")
M = cv2.getPerspectiveTransform(src_pts, dst_pts)
warped = cv2.warpPerspective(img, M, (width, height))
# Run OCR on cropped image
# If the predicted value is digit print else rotate first
result = reader.readtext(warped)
print(result)
predicted_digit = result[0][1]
if np.char.isdigit(predicted_digit) == True:
cv2.imshow("warped " + ROI,warped)
else:
rot_img = warped.copy()
for i in range(0, 3):
rotated_image = cv2.rotate(rot_img, cv2.cv2.ROTATE_90_CLOCKWISE)
result = reader.readtext(rotated_image)
#if np.array(result).size == 0:
# continue
if not result:
rot_img = rotated_image
continue
#if len(result) == 0:
# continue
predicted_digit = result[0][1]
#print(result)
#print(predicted_digit)
#cv2.imshow(rotated_image)
if np.char.isdigit(predicted_digit) == True:
cv2.imshow("Image " + ROI, rotated_image)
break
rot_img = rotated_image
return predicted_digit
def sortNumbers(Cubes):
Cubes = sorted(Cubes, key=lambda x: int(x[2]))
#Cubes.sort(key=itemgetter(2)) # In-place sorting
#Cubes = sorted(Cubes, key=itemgetter(2)) # Create a new list
return Cubes
#img = cv2.imread('gulRecNum.jpg')
img = cv2.imread('boxes.jpg')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# convert to HSV, since red and yellow are the lowest hue colors and come before green
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
# create a binary thresholded image on hue between red and yellow
#Change these if cube colours changes?
lower =(20, 100, 100)
upper = (30, 255, 255)
#lower = (0,240,160)
#upper = (30,255,255)
thresh = cv2.inRange(hsv, lower, upper)
# apply morphology
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (9,9))
clean = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (15,15))
clean = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
# get external contours
contours = cv2.findContours(clean, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
result2 = img.copy()
mask = np.zeros(result2.shape, dtype=np.uint8)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
ROI_number = 0
for c in contours:
cv2.drawContours(result2,[c],0,(0,0,0),2)
# get rotated rectangle from contour
rot_rect = cv2.minAreaRect(c)
box = cv2.boxPoints(rot_rect)
box = np.int0(box)
# draw rotated rectangle on copy of img
cv2.drawContours(result2,[box],0,(0,0,0),2)
# Gør noget hvis arealet er større end 1.
# Whats the area of the component?
areal = cv2.contourArea(c)
if(areal > 1):
# get the center of mass
M = cv2.moments(c)
cx = int(M['m10']/M['m00'])
cy = int(M['m01']/M['m00'])
center = (cx, cy)
print("\nx: ",cx,"\ny: ",cy)
color = (0, 0, 255)
cv2.circle(result2, center, 3, color, -1)
cv2.putText(result2, "center", (int(cx) - 10, int(cy) - 20),
cv2.FONT_HERSHEY_SIMPLEX, 1.2, color, 2)
x,y,w,h = cv2.boundingRect(c)
ROI = 255 - thresh[y:y+h, x:x+w]
cv2.drawContours(mask, [c], -1, (255,255,255), -1)
cv2.imwrite('ROI_{}.png'.format(ROI_number), ROI)
#Read saved image (number)
result = getNumber('ROI_{}.png'.format(ROI_number))
print("ROI_number: ", result)
Cubes.append([cx, cy, result])
ROI_number += 1
# save result
cv2.imwrite("4cubes_result2.png",result2)
# display result
imS = cv2.resize(result2, (600, 400))
cv2.imshow("result2", imS)
#cv2.imshow('mask', mask)
#cv2.imshow('thresh', thresh)
SortedCubes = sortNumbers(Cubes)
print("\nFound array [x, y, Cube_num] = ", Cubes)
print("Sorted array [x, y, Cube_num] = ", SortedCubes)
cv2.waitKey(0)
cv2.destroyAllWindows()
EDIT
I also got these two pictures to try with
EDIT 2
y: 160
shape: rectangle
areal: 723.0
['ta'] : image should have been 4
x: 269
y: 155
shape: rectangle
areal: 637.5
['1'] : image should have been 1
x: 64
y: 149
shape: rectangle
areal: 748.5
['20'] : image should have been 2
x: 125
y: 141
shape: rectangle
areal: 714.5
['5.'] : image should have been 5
x: 222
y: 127
shape: rectangle
areal: 662.5
['3'] : image should have been 3
x: 165
y: 101
shape: rectangle
areal: 666.5
['40'] : image should have been 7
x: 58
y: 96
shape: rectangle
areal: 782.5
['L', 'RJ'] : image should have been 8
x: 279
y: 90
shape: rectangle
areal: 653.5
['EJ'] : image should have been 9
x: 107
y: 84
shape: rectangle
areal: 717.5
['C'] : image should have been 6
All the images is cut from this image
You may need a different processing method and you may need to set the page-segmentation-mode (psm) for the tesseract
Here, when you apply adaptive-thresholding (at) with bitwise_notoperation
Now, when you read the image with psm mode 6 (Assume a single uniform block of text.)
Result will be:
['34', '215', '28', '7', '5']
Unfortunately 6 and 9 are not detected, maybe if you change the at parameters you can find a better result.
Code:
import cv2
import pytesseract
img = cv2.imread("u3ZTw.png")
gry = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
thr = cv2.adaptiveThreshold(gry, 255, cv2.ADAPTIVE_THRESH_MEAN_C,
cv2.THRESH_BINARY_INV, 59, 88)
bnt = cv2.bitwise_not(thr)
txt = pytesseract.image_to_string(bnt, config="--psm 6 digits")
txt = txt.strip().split("\n")
print(txt)
cv2.imshow("bnt", bnt)
cv2.waitKey(0)
For photos in the Edit:
You must change the adaptive-threshold parameters for each different image.
To be more specific, you must change the blockSize and C params to get the expected result.
For instance for number 4 and 5, I changed the:
blockSize: 31 (previously 59)
C: 30 (previously 88)
import cv2
import pytesseract
img = cv2.imread("num5.png")
gry = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
thr = cv2.adaptiveThreshold(gry, 255, cv2.ADAPTIVE_THRESH_MEAN_C,
cv2.THRESH_BINARY_INV, 31, 30)
bnt = cv2.bitwise_not(thr)
txt = pytesseract.image_to_string(bnt, config="--psm 6 digits")
txt = txt.strip().split("\n")
print(txt)
cv2.imshow("bnt", bnt)
cv2.waitKey(0)
Images:
Result:
['4']
['5']
Related
I'm trying to detects the numbers found in my sqares, and I thought I could use the libary pytesseract, but for some reason I read the wrong values.
This is the console output:
And here I have all my pictures (they are seperated, this is just to show them all)
import numpy as np
import cv2
import re
from PIL import Image
import pytesseract
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract'
img = cv2.imread('gulRecNum.jpg')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# convert to HSV, since red and yellow are the lowest hue colors and come before green
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
# create a binary thresholded image on hue between red and yellow
lower = (0,240,160)
upper = (30,255,255)
thresh = cv2.inRange(hsv, lower, upper)
# apply morphology
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (9,9))
clean = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (15,15))
clean = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
# get external contours
contours = cv2.findContours(clean, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
result1 = img.copy()
result2 = img.copy()
mask = np.zeros(result2.shape, dtype=np.uint8)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
ROI_number = 0
for c in contours:
cv2.drawContours(result1,[c],0,(0,0,0),2)
# get rotated rectangle from contour
rot_rect = cv2.minAreaRect(c)
box = cv2.boxPoints(rot_rect)
box = np.int0(box)
# draw rotated rectangle on copy of img
cv2.drawContours(result2,[box],0,(0,0,0),2)
# Gør noget hvis arealet er større end 1.
# Whats the area of the component?
areal = cv2.contourArea(c)
if(areal > 1):
# get the center of mass
M = cv2.moments(c)
cx = int(M['m10']/M['m00'])
cy = int(M['m01']/M['m00'])
center = (cx, cy)
print("\nx: ",cx,"\ny: ",cy)
color = (0, 0, 255)
cv2.circle(result2, center, 3, color, -1)
cv2.putText(result2, "center", (int(cx) - 10, int(cy) - 20),
cv2.FONT_HERSHEY_SIMPLEX, 1.2, color, 2)
# LOOK AT THIS PART
x,y,w,h = cv2.boundingRect(c)
ROI = 255 - thresh[y:y+h, x:x+w]
cv2.drawContours(mask, [c], -1, (255,255,255), -1)
cv2.imwrite('ROI_{}.png'.format(ROI_number), ROI)
Number = pytesseract.image_to_string(ROI, config='--psm 13 --oem 3 -c tessedit_char_whitelist=0123456789')
print("Number ", Number)
ROI_number += 1
# save result
cv2.imwrite("4cubes_result2.png",result2)
# display result
imS = cv2.resize(result2, (600, 400))
cv2.imshow("result2", imS)
cv2.waitKey(0)
cv2.destroyAllWindows()
Thought I could write Number = pytesseract.image_to_string(ROI, config='--psm 13 --oem 3 -c tessedit_char_whitelist=0123456789') print(Number)
and then get the number from the image, but I don't, how can that be?
EDIT NEW ERROR
how do i solve it with this picture?
from PIL import Image
from operator import itemgetter
import numpy as np
import easyocr
import cv2
import re
import imutils
import pytesseract
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract'
reader = easyocr.Reader(['ch_sim','en']) # need to run only once to load model into memory
#Define empty array
Cubes = []
def getNumber(ROI):
img = cv2.imread(ROI)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
ret,thresh = cv2.threshold(gray,127,255,0)
#cv2.imshow(thresh)
#cv2.imshow('Thresholded original',thresh)
#cv2.waitKey(0)
## Get contours
contours,h = cv2.findContours(thresh,cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE)
## only draw contour that have big areas
imx = img.shape[0]
imy = img.shape[1]
lp_area = (imx * imy) / 10
tmp_img = img.copy()
for cnt in contours:
approx = cv2.approxPolyDP(cnt,0.01 * cv2.arcLength(cnt, True), True)
if cv2.contourArea(cnt) > lp_area:
# Draw box corners and minimum area rectangle
rect = cv2.minAreaRect(cnt)
box = cv2.boxPoints(rect)
box = np.int0(box)
#cv2.drawContours(tmp_img, [box], 0, (0, 50, 255), 3)
#cv2.circle(tmp_img, tuple(box[0]), 8, (0, 255, 0), -1)
#cv2.circle(tmp_img, tuple(box[1]), 8, (0, 255, 0), -1)
#cv2.circle(tmp_img, tuple(box[2]), 8, (0, 255, 0), -1)
#cv2.circle(tmp_img, tuple(box[3]), 8, (0, 255, 0), -1)
#cv2.imshow(tmp_img)
#cv2.imshow('Minimum Area Rectangle', tmp_img)
#cv2.waitKey(0)
## Correct orientation and crop
# Link, https://jdhao.github.io/2019/02/23/crop_rotated_rectangle_opencv/
width = int(rect[1][0])
height = int(rect[1][1])
src_pts = box.astype("float32")
dst_pts = np.array([[0, height-1],
[0, 0],
[width-1, 0],
[width-1, height-1]], dtype="float32")
M = cv2.getPerspectiveTransform(src_pts, dst_pts)
warped = cv2.warpPerspective(img, M, (width, height))
# Run OCR on cropped image
# If the predicted value is digit print else rotate first
result = reader.readtext(warped)
print(result)
predicted_digit = result[0][1]
if np.char.isdigit(predicted_digit) == True:
cv2.imshow("warped " + ROI,warped)
else:
rot_img = warped.copy()
for i in range(0, 3):
rotated_image = cv2.rotate(rot_img, cv2.cv2.ROTATE_90_CLOCKWISE)
result = reader.readtext(rotated_image)
#if np.array(result).size == 0:
# continue
if not result:
rot_img = rotated_image
continue
#if len(result) == 0:
# continue
predicted_digit = result[0][1]
#print(result)
#print(predicted_digit)
#cv2.imshow(rotated_image)
if np.char.isdigit(predicted_digit) == True:
cv2.imshow("Image " + ROI, rotated_image)
break
rot_img = rotated_image
return predicted_digit
def sortNumbers(Cubes):
Cubes = sorted(Cubes, key=lambda x: int(x[2]))
#Cubes.sort(key=itemgetter(2)) # In-place sorting
#Cubes = sorted(Cubes, key=itemgetter(2)) # Create a new list
return Cubes
#img = cv2.imread('gulRecNum.jpg')
img = cv2.imread('webcam7.png')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# convert to HSV, since red and yellow are the lowest hue colors and come before green
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
# create a binary thresholded image on hue between red and yellow
#Change these if cube colours changes?
lower =(20, 100, 100)
upper = (30, 255, 255)
#lower = (0,240,160)
#upper = (30,255,255)
thresh = cv2.inRange(hsv, lower, upper)
# apply morphology
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (9,9))
clean = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (15,15))
clean = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
# get external contours
contours = cv2.findContours(clean, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
result2 = img.copy()
mask = np.zeros(result2.shape, dtype=np.uint8)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
ROI_number = 0
for c in contours:
cv2.drawContours(result2,[c],0,(0,0,0),2)
# get rotated rectangle from contour
rot_rect = cv2.minAreaRect(c)
box = cv2.boxPoints(rot_rect)
box = np.int0(box)
# draw rotated rectangle on copy of img
cv2.drawContours(result2,[box],0,(0,0,0),2)
# Gør noget hvis arealet er større end 1.
# Whats the area of the component?
areal = cv2.contourArea(c)
if(areal > 1):
# get the center of mass
M = cv2.moments(c)
cx = int(M['m10']/M['m00'])
cy = int(M['m01']/M['m00'])
center = (cx, cy)
print("\nx: ",cx,"\ny: ",cy)
color = (0, 0, 255)
cv2.circle(result2, center, 3, color, -1)
cv2.putText(result2, "center", (int(cx) - 10, int(cy) - 20),
cv2.FONT_HERSHEY_SIMPLEX, 1.2, color, 2)
x,y,w,h = cv2.boundingRect(c)
ROI = 255 - thresh[y:y+h, x:x+w]
cv2.drawContours(mask, [c], -1, (255,255,255), -1)
cv2.imwrite('ROI_{}.png'.format(ROI_number), ROI)
#Read saved image (number)
result = getNumber('ROI_{}.png'.format(ROI_number))
print("ROI_number: ", result)
Cubes.append([cx, cy, result])
ROI_number += 1
# save result
cv2.imwrite("4cubes_result2.png",result2)
# display result
imS = cv2.resize(result2, (600, 400))
cv2.imshow("result2", imS)
#cv2.imshow('mask', mask)
#cv2.imshow('thresh', thresh)
SortedCubes = sortNumbers(Cubes)
print("\nFound array [x, y, Cube_num] = ", Cubes)
print("Sorted array [x, y, Cube_num] = ", SortedCubes)
cv2.waitKey(0)
cv2.destroyAllWindows()
I get the following error (it can't detect a number)
Traceback (most recent call last): File "c:/Users/Mads/OneDrive/Universitet/7. semester/ROB1/python/objectDetectiong.py", line 169, in <module> result = getNumber('ROI_{}.png'.format(ROI_number)) File "c:/Users/Mads/OneDrive/Universitet/7. semester/ROB1/python/objectDetectiong.py", line 70, in getNumber predicted_digit = result[0][1] IndexError: list index out of range
This is implementation of my comment. Since, I do not have individual images this code will work with given grid like processed image.
For OCR I used EasyOCR instead of Tesserect. You could also try pytesserect on each output cropped images. Instead of rotating 4 times by 90 degrees by confidence, I went with digit detection on OCR result. If a detection is not a number then only rotate and retry.
Tested on google colab. Replace cv2_imshow(...) with cv2.imshow(...) for working locally. Also remove from google.colab.patches import cv2_imshow import.
This is modified version of my answer on card orientation correction here, OpenCV: using Canny and Shi-Tomasi to detect round corners of a playing card. All previous code is left as comment.
Code
!pip install easyocr
import easyocr
reader = easyocr.Reader(['ch_sim','en']) # need to run only once to load model into memory
"""
Based on my answer of rotated card detection,
https://stackoverflow.com/questions/64860785/opencv-using-canny-and-shi-tomasi-to-detect-round-corners-of-a-playing-card/64862448#64862448
"""
import cv2
import numpy as np
from google.colab.patches import cv2_imshow
img = cv2.imread('1.jpg')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
ret,thresh = cv2.threshold(gray,127,255,0)
#cv2_imshow(thresh)
#cv2.imshow('Thresholded original',thresh)
#cv2.waitKey(0)
## Get contours
contours,h = cv2.findContours(thresh,cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE)
## only draw contour that have big areas
imx = img.shape[0]
imy = img.shape[1]
lp_area = (imx * imy) / 10
#################################################################
# Four point perspective transform
# https://www.pyimagesearch.com/2014/08/25/4-point-opencv-getperspective-transform-example/
#################################################################
def order_points(pts):
# initialzie a list of coordinates that will be ordered
# such that the first entry in the list is the top-left,
# the second entry is the top-right, the third is the
# bottom-right, and the fourth is the bottom-left
rect = np.zeros((4, 2), dtype = "float32")
# the top-left point will have the smallest sum, whereas
# the bottom-right point will have the largest sum
s = pts.sum(axis = 1)
rect[0] = pts[np.argmin(s)]
rect[2] = pts[np.argmax(s)]
# now, compute the difference between the points, the
# top-right point will have the smallest difference,
# whereas the bottom-left will have the largest difference
diff = np.diff(pts, axis = 1)
rect[1] = pts[np.argmin(diff)]
rect[3] = pts[np.argmax(diff)]
# return the ordered coordinates
return rect
def four_point_transform(image, pts):
# obtain a consistent order of the points and unpack them
# individually
rect = order_points(pts)
(tl, tr, br, bl) = rect
# compute the width of the new image, which will be the
# maximum distance between bottom-right and bottom-left
# x-coordiates or the top-right and top-left x-coordinates
widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
maxWidth = max(int(widthA), int(widthB))
# compute the height of the new image, which will be the
# maximum distance between the top-right and bottom-right
# y-coordinates or the top-left and bottom-left y-coordinates
heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
maxHeight = max(int(heightA), int(heightB))
# now that we have the dimensions of the new image, construct
# the set of destination points to obtain a "birds eye view",
# (i.e. top-down view) of the image, again specifying points
# in the top-left, top-right, bottom-right, and bottom-left
# order
dst = np.array([
[0, 0],
[maxWidth - 1, 0],
[maxWidth - 1, maxHeight - 1],
[0, maxHeight - 1]], dtype = "float32")
# compute the perspective transform matrix and then apply it
M = cv2.getPerspectiveTransform(rect, dst)
warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))
# return the warped image
return warped
#################################################################
#print(len(contours))
tmp_img = img.copy()
for cnt in contours:
approx = cv2.approxPolyDP(cnt,0.01 * cv2.arcLength(cnt, True), True)
## calculate number of vertices
#print(len(approx))
## Get the largest contours only
## Side count cannot be used since contours are not all rectangular
if cv2.contourArea(cnt) > lp_area:
#if len(approx) == 4 and cv2.contourArea(cnt) > lp_area:
# print("\n\n")
# print("#################################################")
# print("rectangle")
# print("#################################################")
# print("\n\n")
#tmp_img = img.copy()
#cv2.drawContours(tmp_img, [cnt], 0, (0, 255, 0), 6)
#cv2_imshow(tmp_img)
#cv2.imshow('Contour Borders', tmp_img)
#cv2.waitKey(0)
# tmp_img = img.copy()
# cv2.drawContours(tmp_img, [cnt], 0, (255, 0, 255), -1)
# cv2_imshow(tmp_img)
# #cv2.imshow('Contour Filled', tmp_img)
# #cv2.waitKey(0)
# # Make a hull arround the contour and draw it on the original image
# tmp_img = img.copy()
# mask = np.zeros((img.shape[:2]), np.uint8)
# hull = cv2.convexHull(cnt)
# cv2.drawContours(mask, [hull], 0, (255, 255, 255), -1)
# cv2_imshow(mask)
# #cv2.imshow('Convex Hull Mask', mask)
# #cv2.waitKey(0)
# # Draw minimum area rectangle
# #tmp_img = img.copy()
# rect = cv2.minAreaRect(cnt)
# box = cv2.boxPoints(rect)
# box = np.int0(box)
# cv2.drawContours(tmp_img, [box], 0, (255, 0, 0), 2)
# #cv2_imshow(tmp_img)
# #cv2.imshow('Minimum Area Rectangle', tmp_img)
# #cv2.waitKey(0)
# Draw box corners and minimum area rectangle
#tmp_img = img.copy()
rect = cv2.minAreaRect(cnt)
box = cv2.boxPoints(rect)
box = np.int0(box)
#print(rect)
#print(box)
cv2.drawContours(tmp_img, [box], 0, (0, 50, 255), 3)
cv2.circle(tmp_img, tuple(box[0]), 8, (0, 255, 0), -1)
cv2.circle(tmp_img, tuple(box[1]), 8, (0, 255, 0), -1)
cv2.circle(tmp_img, tuple(box[2]), 8, (0, 255, 0), -1)
cv2.circle(tmp_img, tuple(box[3]), 8, (0, 255, 0), -1)
#cv2_imshow(tmp_img)
#cv2.imshow('Minimum Area Rectangle', tmp_img)
#cv2.waitKey(0)
## Correct orientation and crop
# Link, https://jdhao.github.io/2019/02/23/crop_rotated_rectangle_opencv/
width = int(rect[1][0])
height = int(rect[1][1])
src_pts = box.astype("float32")
dst_pts = np.array([[0, height-1],
[0, 0],
[width-1, 0],
[width-1, height-1]], dtype="float32")
M = cv2.getPerspectiveTransform(src_pts, dst_pts)
warped = cv2.warpPerspective(img, M, (width, height))
#cv2_imshow(warped)
# Run OCR on cropped image
# If the predicted value is digit print else rotate first
result = reader.readtext(warped)
predicted_digit = result[0][1]
print("Detected Text:")
if np.char.isdigit(predicted_digit) == True:
print(result)
print(predicted_digit)
cv2_imshow(warped)
else:
rot_img = warped.copy()
for i in range(0, 3):
rotated_image = cv2.rotate(rot_img, cv2.cv2.ROTATE_90_CLOCKWISE)
result = reader.readtext(rotated_image)
#if np.array(result).size == 0:
# continue
if not result:
rot_img = rotated_image
continue
#if len(result) == 0:
# continue
predicted_digit = result[0][1]
#print(result)
#print(predicted_digit)
#cv2_imshow(rotated_image)
if np.char.isdigit(predicted_digit) == True:
print(result)
print(predicted_digit)
cv2_imshow(rotated_image)
break
rot_img = rotated_image
# # Draw bounding rectangle
# #tmp_img = img.copy()
# x, y, w, h = cv2.boundingRect(cnt)
# cv2.rectangle(tmp_img, (x, y), (x + w, y + h), (255, 0, 0), 2)
# #cv2_imshow(tmp_img)
# #cv2.imshow('Bounding Rectangle', tmp_img)
# #cv2.waitKey(0)
# # Bounding Rectangle and Minimum Area Rectangle
# #tmp_img = img.copy()
# rect = cv2.minAreaRect(cnt)
# box = cv2.boxPoints(rect)
# box = np.int0(box)
# cv2.drawContours(tmp_img, [box], 0, (0, 0, 255), 2)
# x, y, w, h = cv2.boundingRect(cnt)
# cv2.rectangle(tmp_img, (x, y), (x + w, y + h), (0, 255, 0), 2)
# #cv2_imshow(tmp_img)
# #cv2.imshow('Bounding Rectangle', tmp_img)
# #cv2.waitKey(0)
# # determine the most extreme points along the contour
# # https://www.pyimagesearch.com/2016/04/11/finding-extreme-points-in-contours-with-opencv/
# tmp_img = img.copy()
# extLeft = tuple(cnt[cnt[:, :, 0].argmin()][0])
# extRight = tuple(cnt[cnt[:, :, 0].argmax()][0])
# extTop = tuple(cnt[cnt[:, :, 1].argmin()][0])
# extBot = tuple(cnt[cnt[:, :, 1].argmax()][0])
# cv2.drawContours(tmp_img, [cnt], -1, (0, 255, 255), 2)
# cv2.circle(tmp_img, extLeft, 8, (0, 0, 255), -1)
# cv2.circle(tmp_img, extRight, 8, (0, 255, 0), -1)
# cv2.circle(tmp_img, extTop, 8, (255, 0, 0), -1)
# cv2.circle(tmp_img, extBot, 8, (255, 255, 0), -1)
# print("Corner Points: ", extLeft, extRight, extTop, extBot)
# cv2_imshow(tmp_img)
# #cv2.imshow('img contour drawn', tmp_img)
# #cv2.waitKey(0)
# #cv2.destroyAllWindows()
# ## Perspective Transform
# tmp_img = img.copy()
# pts = np.array([extLeft, extRight, extTop, extBot])
# warped = four_point_transform(tmp_img, pts)
# cv2_imshow(tmp_img)
# #cv2.imshow("Warped", warped)
# #cv2.waitKey(0)
cv2_imshow(tmp_img)
#cv2.destroyAllWindows()
Output Prediction
Detected Text:
[([[85, 67], [131, 67], [131, 127], [85, 127]], '1', 0.9992043972015381)]
1
Detected Text:
[([[85, 65], [133, 65], [133, 125], [85, 125]], '2', 0.9991914629936218)]
2
Detected Text:
[([[96, 72], [144, 72], [144, 128], [96, 128]], '4', 0.9996564984321594)]
4
Detected Text:
[([[88, 76], [132, 76], [132, 132], [88, 132]], '3', 0.9973381161689758)]
3
White Region Detection With Corners
Alternate methods,
Try pretrained digit classification model trained from MNIST and others on each large contours exceeding certain area.
Use multitask object detection with rotation. One output of network will be detections another angle regression to predict orientation.
Use text detector like, East and run OCR on each detected text.
I'm new one to openCV and computer-vision. Just now i'm trying to crop the Tiff scan after detecting the corners and then extract information from it based on exact coordinates x:y using python, openCV, numpy and OCR with Tesseract.
What i achieved right now is that i upload an image (scan), binarize it, fix rotation and remove empty spaces. Result is already good, but not good enough. My image is still always rotated little bit. Here is image example Example
Example(w/o Arrows)
The question is: How to detect these corners and crop everything outside them?
Here is my current code:
for filenumber in range(2,7):
img = cv2.imread('img' + str(filenumber) + '.tif')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
img2 = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
gray = cv2.bitwise_not(gray)
img = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 25, 11)
kernel = np.ones((2, 2), np.uint8)
img = cv2.erode(img, kernel, iterations=3)
thresh = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
coords = np.column_stack(np.where(thresh > 0))
angle = cv2.minAreaRect(coords)[-1]
if angle < -45:
angle = -(90 + angle)
else:
angle = -angle
# rotate the image to deskew it
(h, w) = img.shape[:500]
center = (w // 400, h // 400)
M = cv2.getRotationMatrix2D(center, angle, 1)
rotated = cv2.warpAffine(img, M, (w, h),
flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
# draw the correction angle on the image so we can validate it
cv2.putText(rotated, "Angle: {:.2f} degrees".format(angle),
(100, 400), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0, 0, 255), 2)
img = rotated
th, threshed = cv2.threshold(img, 240, 255, cv2.THRESH_BINARY_INV)
## (2) Morph-op to remove noise
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (11,11))
morphed = cv2.morphologyEx(threshed, cv2.MORPH_CLOSE, kernel)
## (3) Find the max-area contour
cnts = cv2.findContours(morphed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[-2]
cnt = sorted(cnts, key=cv2.contourArea)[-1]
## (4) Crop and save it
x,y,w,h = cv2.boundingRect(cnt)
dst = img2[y:y+h, x:x+w]
img = dst
size_multiplier = szm = 1
cv2.imwrite('img_' + str(filenumber) + '_Cropped' + '.jpg', img)
#Configs for OCR segments
for nnumb in range(2, 7):
print('[INFO2]: File=' + str(filenumber) + '; nnumb=' + str(nnumb))
if nnumb == 1:
sub_image = img[130:130 + 90, 1220:1220 + 600]
config = ('-l rus --oem 0 --psm 3 -c tessedit_char_whitelist="0123456789"')
if nnumb == 2:
sub_image = img[150:150 + 60, 1980:1980 + 460]
config = ('-l rus --oem 1 --psm 3 -c tessedit_char_whitelist="0123456789"')
if nnumb == 3:
sub_image = img[230:230 + 70, 620:620 + 3000]
config = ('-l rus --oem 0 --psm 3')
if nnumb == 4:
sub_image = img[410:410 + 70, 835:835 + 470]
config = ('-l rus --oem 0 --psm 1 -c tessedit_char_whitelist="0123456789"')
if nnumb == 5:
sub_image = img[480:480 + 220, 610:610 + 1300]
config = ('-l rus --oem 0 --psm 3')
if nnumb == 6:
sub_image = img[720:720 + 70, 110:110 + 500]
config = ('-l rus --oem 0 --psm 3 -c tessedit_char_whitelist="0123456789"')
[
UPDATE: Final Code
def cornersandcrop(img):
main_image = img
main_imageF = main_image.copy()
gray_image = main_image.copy()
#Remove parts of image except corners
gray_image[70:70 + 500, 70:70 + 500] = [255, 255, 255]
gray_image[44:44 + 100, 1900:1900 + 550] = [255, 255, 255]
gray_image[2270:2270 + 700, 45:45 + 200] = [255, 255, 255]
gray_image[140:2880, 0:2500] = [255, 255, 255]
gray_image[0:3000, 150:2350] = [255, 255, 255]
gray_image = cv2.cvtColor(gray_image, cv2.COLOR_BGR2GRAY)
gray_image = cv2.medianBlur(gray_image, 5)
gray_image = cv2.adaptiveThreshold(gray_image, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY,11,20)
kernel = np.ones((2, 2), np.uint8)
gray_image = cv2.erode(gray_image, kernel, iterations=5)
gray_image = cv2.dilate(gray_image, kernel, iterations=2)
gray_image = cv2.morphologyEx(gray_image, cv2.MORPH_OPEN, np.ones((1, 1), np.uint8))
template = cv2.imread('Templates\\Template_Corner_Top_Left.png', 0)
template2 = cv2.imread('Templates\\Template_Corner_Top_Right.png', 0)
template3 = cv2.imread('Templates\\Template_Corner_Bot_Right.png', 0)
template4 = cv2.imread('Templates\\Template_Corner_Bot_Left.png', 0)
width, height = template.shape[::-1] #get the width and height
width2, height2 = template2.shape[::-1]
width3, height3 = template3.shape[::-1]
width4, height4 = template4.shape[::-1]
match = cv2.matchTemplate(gray_image, template, cv2.TM_CCOEFF_NORMED)
match2 = cv2.matchTemplate(gray_image, template2, cv2.TM_CCOEFF_NORMED)
match3 = cv2.matchTemplate(gray_image, template3, cv2.TM_CCOEFF_NORMED)
match4 = cv2.matchTemplate(gray_image, template4, cv2.TM_CCOEFF_NORMED)
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(match)
top_Pos1 = max_loc
Pos1 = (top_Pos1[0] + width-115, top_Pos1[1] + height-115)
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(match2)
top_Pos2 = max_loc
Pos2 = (top_Pos2[0] + width2-5, top_Pos2[1] + height2-115)
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(match3)
top_Pos3 = max_loc
Pos3 = (top_Pos3[0] + width3-5, top_Pos3[1] + height3-5)
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(match4)
top_Pos4 = max_loc
Pos4 = (top_Pos4[0] + width4-115, top_Pos4[1] + height4-5)
src_pts = np.array([Pos1, Pos2, Pos3, Pos4], dtype=np.float32)
dst_pts = np.array([[0, 0], [3000, 0], [3000, 2500], [0, 2500]], dtype=np.float32)
M = cv2.getPerspectiveTransform(src_pts, dst_pts)
warp = cv2.warpPerspective(main_imageF, M, (3000, 2500))
warp = cv2.resize(warp, (int(2500), int(3000)),fx=1, fy=1, interpolation = cv2.INTER_CUBIC)
return (warp)
This works for me in Python/OpenCV for locating one corner using template matching. Just make the template image larger than your corner so that there is some white around it.
Input:
Template:
import cv2
import numpy as np
# read image
img = cv2.imread('drawing.jpg')
# convert img to grayscale
img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# read template as grayscale
tmplt = cv2.imread('corner_ul.png', cv2.IMREAD_GRAYSCALE)
hh, ww = tmplt.shape
# define corner intersection in template
offset_x = 23
offset_y = 28
# do template matching
corrimg = cv2.matchTemplate(img_gray,tmplt,cv2.TM_CCOEFF_NORMED)
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(corrimg)
max_val_ncc = '{:.3f}'.format(max_val)
print("normalize_cross_correlation: " + max_val_ncc)
xx = max_loc[0]
yy = max_loc[1]
corner_x = xx + offset_x
corner_y = yy + offset_y
print('xmatchloc =',xx,'ymatch =',yy)
print('cornerlocx =',corner_x,'cornerlocy =',corner_y)
# draw template bounds and corner intersection in red onto img
result = img.copy()
cv2.rectangle(result, (xx, yy), (xx+ww, yy+hh), (0, 0, 255), 2)
cv2.circle(result, (corner_x,corner_y), 1, (0, 0, 255), 2)
cv2.imshow('image', img)
cv2.imshow('template', tmplt)
cv2.imshow('result', result)
cv2.waitKey(0)
cv2.destroyAllWindows()
# save results
cv2.imwrite('drawing_template_match_location.jpg', result)
Information:
normalize_cross_correlation: 1.000
xmatchloc = 0 ymatch = 318
cornerlocx = 23 cornerlocy = 346
Result:
You can also refine the result to get sub-pixel accuracy. See https://www.bbsmax.com/A/lk5aBbGod1/
Rotate the template 3 times each by 90 degrees to form the other 3 templates, measure or compute the offsets from the top left corner to the intersection of the corner lines and do the template matching. Then once you have all 4 matches, you can do your cropping using numpy slicing.
I suggest using template matching by "adjusted filter":
Convert image to binary image (as you did), and use closing instead of erode.
Convert image to "-1"s and "1"s: replace 0 with -1 and 255 with 1.
Build L shaped kernel h (for finding the bottom left corner):
Place -1 where value in im needs to be -1, and 1 when value needs to be 1.
Make sure the corner of the L shape in h is at the center (it's a bit of a waste - you may avoid it, and fix the position later).
Example for kernel (small scale):
0 -1 1 0 0
0 -1 1 0 0
0 -1 1 1 1
0 -1 -1 -1 -1
0 0 0 0 0
Filter im with kernel h - the maximum value of the output is the position that best matches h.
Find x, y coordinate of maximum value of filtered image.
Here is a code sample that finds the bottom left corner:
import numpy as np
import cv2
img = cv2.imread('img1.tif')
orig_img = img.copy()
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
gray = cv2.bitwise_not(gray)
img = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 25, 11)
img = cv2.morphologyEx(img, cv2.MORPH_OPEN, np.ones((3, 3), np.uint8)) # Opening - remove white dots around corners.
# cv2.imwrite('img.tif', img)
# Convert from 0 to -1 and 255 to 1 (im is "binary" image with "-1"s and "1"s).
im = img.astype(float) / 127.5 - 1
# Build L shape kernel h that matches the L shape we want to search.
# Place "-1" where value in im needs to be "-1", and "1" when value needs to be "1".
# Make sure the corner of the L shape in h is at the center (it's a bit of a waste).
# Example for kernel (small scale):
# 0 -1 1 0 0
# 0 -1 1 0 0
# 0 -1 1 1 1
# 0 -1 -1 -1 -1
# 0 0 0 0 0
h = np.zeros((75, 75)) # Kernel size is 75x75
h[0:37, 37:39] = 1 # Two columns of "1"s from top to center
h[36:38, 37:] = 1 # Two rows of "1"s from center to right side
h[0:39, 36] = -1 # One column of "-1"s
h[38, 36:] = -1 # One row of "-1"s
# Save h kernel as an image for testing
h2 = h.copy()
h2 = ((h2+1)*127.5).astype(np.uint8)
cv2.imwrite('h2.png', h2)
# Filter im with kernel h - the maximum value of the output is the position that best matches h
imf = cv2.filter2D(im, -1, h)
# Find index of maximum value from 2D numpy array
pos_y, pos_x = np.where(imf == np.amax(imf))
# Draw red circle around coordinate (pos_x, pos_y) for testing.
cv2.circle(orig_img, (int(pos_x), int(pos_y)), 8, (0, 0, 255), thickness=2)
cv2.imwrite('circled_im.png', orig_img) # Save image for testing
Result (bottom left corner):
Filter kernel (as an image):
Update:
In case there are other "L shaped" objects, you may need to use more "aggressive" kernel.
Example:
# More "aggressive" kernel
h = np.zeros((75, 75)) # Kernel size is 75x75
h[0:37, 37:41] = 1 # 4 columns of "1"s from top to center
h[34:38, 37:] = 1 # 4 rows of "1"s from center to right side
h[0:39, 36] = -1 # One column of "-1"s
h[38, 36:] = -1 # One row of "-1"s
h[0:34, 41] = -1 # 1 columns of "-1"s from top to center
h[33, 41:] = -1 # 1 rows of "-1"s from center to right side
I've written some code, to crop an object (in this case the Data Matrix Code) from an image:
import numpy as np
import cv2
image = cv2.imread("datamatrixc.png")
img_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
img_height, img_width = image.shape[:2]
WHITE = [255, 255, 255]
# Threshold filter
ret, thresh = cv2.threshold(img_gray, 127, 255, cv2.THRESH_BINARY_INV)
# Get Contours
_, contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# Get Last element of the contours object
max = len(contours) - 1
cnt = contours[max]
# Get coordinates for the bounding box
x, y, w, h = cv2.boundingRect(cnt)
image_region = image[ int(((img_height / 2) - h) / 2) : int(((img_height / 2) - h) / 2 + h), int(x): int(x + w) ]
dmc = cv2.copyMakeBorder(image_region, 10, 10, 10, 10, cv2.BORDER_CONSTANT, value = WHITE)
cv2.imshow("Test", dmc)
cv2.waitKey(0)
cv2.destroyAllWindows()
The code works fine and I received as result:
However, the next image is a little more complicated.
I receive the same result as in the previous image, but I have no idea how to detect the two other objects.
Is there an easier way every object showing in its window?
For this specific image take the biggest contours you have and check if the object is 4 sided shape.If the half-point between the bounding box's corners (see pairs below) is in the contour array then voila, problem solved.
Pairs : TopRight-TopLeft, TopRight-BottomRight, TopLeft-BottomLeft, BottomLeft-BottomRight
Or you could check if there pixels that are not black/white inside the bounding box ?
And for the ploting individualy just slap a for on what you allready have
How about this?
import numpy as np
import cv2
image = cv2.imread("datamatrixc.png")
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
ret, bin_img = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
kernel = np.ones((3,3),np.uint8)
closing = cv2.morphologyEx(bin_img, cv2.MORPH_CLOSE, kernel, iterations=4)
n_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(bin_img)
size_thresh = 5000
for i in range(1, n_labels):
if stats[i, cv2.CC_STAT_AREA] >= size_thresh:
print(stats[i, cv2.CC_STAT_AREA])
x = stats[i, cv2.CC_STAT_LEFT]
y = stats[i, cv2.CC_STAT_TOP]
w = stats[i, cv2.CC_STAT_WIDTH]
h = stats[i, cv2.CC_STAT_HEIGHT]
cv2.imshow('img', image[y:y+h, x:x+w])
cv2.waitKey(0)
How can I crop a concave polygon from an image. My Input image look like
.
and the coordinates of closed polygon are
[10,150],[150,100],[300,150],[350,100],[310,20],[35,10]. I want region bounded by concave polygon to be cropped using opencv. I searched for other similar questions but I did not able to find correct answer. That's why I am asking it ? Can you help me.
Any help would be highly appreciated.!!!
Steps
find region using the poly points
create mask using the poly points
do mask op to crop
add white bg if needed
The code:
# 2018.01.17 20:39:17 CST
# 2018.01.17 20:50:35 CST
import numpy as np
import cv2
img = cv2.imread("test.png")
pts = np.array([[10,150],[150,100],[300,150],[350,100],[310,20],[35,10]])
## (1) Crop the bounding rect
rect = cv2.boundingRect(pts)
x,y,w,h = rect
croped = img[y:y+h, x:x+w].copy()
## (2) make mask
pts = pts - pts.min(axis=0)
mask = np.zeros(croped.shape[:2], np.uint8)
cv2.drawContours(mask, [pts], -1, (255, 255, 255), -1, cv2.LINE_AA)
## (3) do bit-op
dst = cv2.bitwise_and(croped, croped, mask=mask)
## (4) add the white background
bg = np.ones_like(croped, np.uint8)*255
cv2.bitwise_not(bg,bg, mask=mask)
dst2 = bg+ dst
cv2.imwrite("croped.png", croped)
cv2.imwrite("mask.png", mask)
cv2.imwrite("dst.png", dst)
cv2.imwrite("dst2.png", dst2)
Source image:
Result:
You can do it in 3 steps:
Create a mask out of the image
mask = np.zeros((height, width))
points = np.array([[[10,150],[150,100],[300,150],[350,100],[310,20],[35,10]]])
cv2.fillPoly(mask, points, (255))
Apply mask to original image
res = cv2.bitwise_and(img,img,mask = mask)
Optionally you can remove the crop the image to have a smaller one
rect = cv2.boundingRect(points) # returns (x,y,w,h) of the rect
cropped = res[rect[1]: rect[1] + rect[3], rect[0]: rect[0] + rect[2]]
With this you should have at the end the image cropped
UPDATE
For the sake of completeness here is the complete code:
import numpy as np
import cv2
img = cv2.imread("test.png")
height = img.shape[0]
width = img.shape[1]
mask = np.zeros((height, width), dtype=np.uint8)
points = np.array([[[10,150],[150,100],[300,150],[350,100],[310,20],[35,10]]])
cv2.fillPoly(mask, points, (255))
res = cv2.bitwise_and(img,img,mask = mask)
rect = cv2.boundingRect(points) # returns (x,y,w,h) of the rect
cropped = res[rect[1]: rect[1] + rect[3], rect[0]: rect[0] + rect[2]]
cv2.imshow("cropped" , cropped )
cv2.imshow("same size" , res)
cv2.waitKey(0)
For the colored background version use the code like this:
import numpy as np
import cv2
img = cv2.imread("test.png")
height = img.shape[0]
width = img.shape[1]
mask = np.zeros((height, width), dtype=np.uint8)
points = np.array([[[10,150],[150,100],[300,150],[350,100],[310,20],[35,10]]])
cv2.fillPoly(mask, points, (255))
res = cv2.bitwise_and(img,img,mask = mask)
rect = cv2.boundingRect(points) # returns (x,y,w,h) of the rect
im2 = np.full((res.shape[0], res.shape[1], 3), (0, 255, 0), dtype=np.uint8 ) # you can also use other colors or simply load another image of the same size
maskInv = cv2.bitwise_not(mask)
colorCrop = cv2.bitwise_or(im2,im2,mask = maskInv)
finalIm = res + colorCrop
cropped = finalIm[rect[1]: rect[1] + rect[3], rect[0]: rect[0] + rect[2]]
cv2.imshow("cropped" , cropped )
cv2.imshow("same size" , res)
cv2.waitKey(0)
For the blured image background version use the code like this:
img = cv2.imread(img_path)
box = <box points>
# -- background
blur_bg = cv2.blur(img, (h, w))
mask1 = np.zeros((h, w, 3), np.uint8)
mask2 = np.ones((h, w, 3), np.uint8) * 255
cv2.fillPoly(mask1, box, (255, 255, 255))
# -- indexing
img_idx = np.where(mask1 == mask2)
bg_idx = np.where(mask1 != mask2)
# -- fill box
res = np.zeros((h, w, 3), np.int64)
res[img_idx] = img[img_idx]
res[bg_idx] = blur_bg[bg_idx]
res = res[y1:y2, x1:x2, :]
I have a lot of images to use. But this question I am going to just use the first and last image.
Image 1 of 83
Image 83 of 83
I ran Image 1 through this code
import numpy as np
import cv2
def process(filename, key):
gwash = cv2.imread(filename)
gwashBW = cv2.cvtColor(gwash, cv2.COLOR_BGR2GRAY)
ret,thresh1 = cv2.threshold(gwashBW,179,255,cv2.THRESH_BINARY)
kernel = np.ones((1,1),np.uint8)
erosion = cv2.erode(thresh1, kernel,iterations = 1)
opening = cv2.morphologyEx(erosion, cv2.MORPH_OPEN, kernel)
closing = cv2.morphologyEx(opening, cv2.MORPH_CLOSE, kernel)
_, contours ,_ = cv2.findContours(closing,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
areas = [cv2.contourArea(c) for c in contours]
idx = np.argmax(areas)
cnt = contours[idx]
mask = np.zeros_like(gwash)
cv2.drawContours(mask, contours, idx, (255,255,255), -1)
out = np.zeros_like(gwash)
out[mask == 255] = gwash[mask == 255]
cv2.imwrite('img/out{}.jpg'.format(key),out)
print idx
This one result I have gotten better and worst results.
Here is the contour code I used and result
import cv2
import numpy as np
import imutils
from matplotlib import pyplot as plt
def process(filename, key):
image = cv2.imread(filename)
resized = imutils.resize(image, width=600)
ratio = image.shape[0] / float(resized.shape[0])
blurred = cv2.GaussianBlur(resized, (5, 5), 0)
gray = cv2.cvtColor(blurred, cv2.COLOR_BGR2GRAY)
lab = cv2.cvtColor(resized, cv2.COLOR_BGR2LAB)
thresh = cv2.threshold(gray, 180, 255, cv2.THRESH_BINARY)[1]
imagem = cv2.bitwise_not(thresh)
th4 = cv2.threshold(gray,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)[1]
th3 = cv2.adaptiveThreshold(gray,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY,11,2)
gray_2 = cv2.cvtColor(resized, cv2.COLOR_BGR2GRAY)
gray_blur = cv2.GaussianBlur(gray, (15, 15), 0)
thresh_2 = cv2.adaptiveThreshold(gray_blur, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY_INV, 11, 1)
kernel = np.ones((1, 1), np.uint8)
closing = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=500)
cnts = cv2.findContours(closing.copy(), cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
cnts2 = cv2.findContours(closing.copy(), cv2.RETR_EXTERNAL ,cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if imutils.is_cv2() else cnts[1]
cnts2 = cnts2[0] if imutils.is_cv2() else cnts2[1]
# loop over the contours
for c in cnts:
# compute the center of the contour
M = cv2.moments(c)
if M["m00"] != 0:
cX = int(M["m10"] / M["m00"])
cY = int(M["m01"] / M["m00"])
else:
cX, cY = 0, 0
# multiply the contour (x, y)-coordinates by the resize ratio,
# then draw the contours and the name of the shape and labeled
# color on the image
c = c.astype("float")
c *= ratio
c = c.astype("int")
cX *= ratio
cY *= ratio
cv2.drawContours(image, [c], -1, (0, 255, 0), 5)
cv2.circle(image, (int(cX),int(cY)),5,300,3)
#r = 100.0 / image.shape[1]
#dim = (100, int(image.shape[0] *r))
#imageresized = cv2.resize(image,(2048,2048),dim,interpolation = cv2.INTER_AREA)
cv2.imwrite( 'i/image_{}.jpg'.format(key) ,QR_final )
print 'image_{}.jpg'.format(key)
So my question is what would the best approach be to accurately find the canvas shape in all the photos using python ?