Labyrinth recognition is slow and recognition is not always acurate - python

I'm making a Python script that:
gets the image from file
Looks for a papersheet (done by openCV)
Warps the image
crops the margins of papersheet
Looks for lowest amount of white pixels in a single row of pixels which makes up for width/height of a single field of a grid (goes pixel by pixel)
Recreates the labyrinth based on what are the average pixel RGB Values in each field (goes pixel by pixel) and saves it to file
It shows me the grid of 0 and 1s in the CLI
When I do it on original image (5.2MB, 4000x3000) , it takes up to 20 seconds (although I have pretty snappy 8 core ryzen cpu), actually I only need the output (0 or 1 grid). How can I speed up the process as it will run on raspberry pi and it needs to take only about few seconds to finish? I know that the code is quite long, however it's split into sections that are described so it should be fairly easy to read.
#LIBRARY IMPORT
import math
import turtle
import time
import sys
from collections import deque
import numpy as np
from skimage import exposure
import argparse
import imutils
import cv2
from PIL import Image, ImageDraw
#LOAD THE FIRST IMAGE
image = cv2.imread("./image3.jpg")
ratio = image.shape[0] / 600.0
orig = image.copy()
image = imutils.resize(image, height = 600)
#Range of the colors of paper
lower = [160, 160, 160]
upper = [255,255,255]
#create array from ranges
lower = np.array(lower, dtype="uint8")
upper = np.array(upper, dtype="uint8")
#finding contours
mask = cv2.inRange(image, lower, upper)
output = cv2.bitwise_and(image, image, mask=mask)
ret,thresh = cv2.threshold(mask, 40, 255, 0)
contours, hierarchy = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
contours = max(contours, key = cv2.contourArea)
cnt = contours
rect = cv2.minAreaRect(cnt)
box = cv2.boxPoints(rect)
box = np.int0(box)
for c in [contours]:
#contour approximation
peri = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.01 * peri, True)
#crop of contour with 4 points
if len(approx) == 4:
screenCnt = approx
break
img = cv2.drawContours(image,[screenCnt],0,(0,0,255),2)
print(screenCnt)
pts = screenCnt.reshape(4, 2)
rect = np.zeros((4, 2), dtype = "float32")
s = pts.sum(axis = 1)
rect[0] = pts[np.argmin(s)]
rect[2] = pts[np.argmax(s)]
diff = np.diff(pts, axis = 1)
rect[1] = pts[np.argmin(diff)]
rect[3] = pts[np.argmax(diff)]
rect *= ratio
(tl, tr, br, bl) = rect
widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
maxWidth = max(int(widthA), int(widthB))
maxHeight = max(int(heightA), int(heightB))
dst = np.array([
[0, 0],
[maxWidth - 1, 0],
[maxWidth - 1, maxHeight - 1],
[0, maxHeight - 1]], dtype = "float32")
M = cv2.getPerspectiveTransform(rect, dst)
warp = cv2.warpPerspective(orig, M, (maxWidth, maxHeight))
warp = imutils.resize(warp)
cv2.imshow("image", image)
cv2.moveWindow("image",0,450)
cv2.imwrite("warp.jpg",warp)
#warping of found labyrinth
warp = cv2.imread("warp.jpg",0)
warp = cv2.medianBlur(warp,5)
ret,warp = cv2.threshold(warp,170,255,cv2.THRESH_BINARY)
cv2.imwrite("warp.jpg",warp)
image_file = "warp.jpg"
im = Image.open(image_file)
width, height = im.size
T = height*(5/210)
B = height-(height*(5/210))
L = width*(5/210)
R = width-(width*(5/210))
im_crop = im.crop((L, T, R, B))
im=im_crop
im_crop.save('warp.png', quality=100)
white=0
#finding field-size
im = Image.open("warp.png")
width, height = im.size
minimalGridWidth2 = width
minimalGridWidth = width
MaximumGridWidth = width
for y in range (0, height):
for x in range (0, width):
if im.getpixel((x,y)) > 200:
white = white+1
if white <= minimalGridWidth:
minimalGridWidth = white
#checks out how many X how many fields the labirynt has, checks what's the average value (more black or more white) is there on that field and recreates the new "ideal" labyrinth from this data
gridWidth = int(round(width/minimalGridWidth))
gridHeight = int(round(height/minimalGridWidth))
print(gridWidth)|
print(gridHeight)
newHeight = 0
newWidth = 0
newHeight=(minimalGridWidth*gridHeight)
newWidth=(minimalGridWidth*gridWidth)
print(minimalGridWidth)
print(newWidth)
print(newHeight)
im = im.resize((newWidth, newHeight), Image.ANTIALIAS)
i=0
x, y = gridWidth, gridHeight
pixelcount = [[0 for x in range(0,gridWidth)] for y in range(0,gridHeight)]
pixelavg = [[0 for x in range(0,gridWidth)] for y in range(0,gridHeight)]
print(pixelcount)
for y in range (0, gridHeight):
for x in range (0, gridWidth):
i=0
pixel=0
for v in range (0, minimalGridWidth):
for w in range (0, minimalGridWidth):
pixel=pixel+im.getpixel((((x*minimalGridWidth)+w),((y*minimalGridWidth)+v)))
i=i+1
if (pixel/i)<127:
pixelavg[y][x]=1
elif (pixel/i)>127:
pixelavg[y][x]=0
print(np.array(pixelavg))
y,x,v,w,i=0,0,0,0,0
im2 = Image.new('RGB',(newWidth,newHeight),'white')
for y in range (0, gridHeight):
for x in range (0, gridWidth):
for v in range (0, minimalGridWidth):
for w in range (0, minimalGridWidth):
pixelx=pixelavg[y][x]
if pixelx==0:
pixelDoc=(255,255,255)
if pixelx==1:
pixelDoc=(0,0,0)
Xw=((x*minimalGridWidth)+w)
Yh=((y*minimalGridWidth)+v)
im2.putpixel((Xw,Yh),pixelDoc)
im2.save('warp3.png',quality=100)
imx=cv2.imread('warp3.png',0)
cv2.imshow('finito',imx)
cv2.imwrite('koniec.png',imx)
cv2.moveWindow("finito",750,450)
warp=cv2.imread("warp.png",0)
cv2.imshow("warp",warp)
cv2.moveWindow("warp",450,450)

When you are concerned with performance or looking to optimize code, it can help to profile your program.
You could use Python's profiler or an IDE with a profiler like PyCharm.
When profiling your code on a 4000x2665 image, I found the following:
As you can see, the getpixel and putpixel functions take ~60% of the total execution time.
This makes sense, as they are called for every image pixel in a nested loop:
for y in range (0, height):
white = 0
for x in range (0, width):
if im.getpixel((x,y)) > 200:
white = white+1
if white <= minimalGridWidth:
minimalGridWidth = white
The above code can be fixed by replacing the nested loops by image-wide operations.
np_im = np.array(im)
white_per_row = np.sum(np_im > 200, axis=1)
minimalGridWidth = np.min(white_per_row)
Replacing this single operation cuts down the total execution time by 5644 ms or ~32%

Related

Error : Invalid number of channels in input image: 'VScn::contains(scn)' using opencv?

I am trying to unshear the image , just like cam scanner does, It is working for some images , IfI give any random image it is not working , The image named as new_image.jpeg is not working and image named as 1111.jpeg is working . Although the picture is totally same .
Code:
import numpy as np
import cv2
import re
from matplotlib import pyplot as plt
import warnings
warnings.filterwarnings('ignore')
# ## **Use Gaussian Blurring combined with Adaptive Threshold**
def blur_and_threshold(gray):
gray = cv2.GaussianBlur(gray,(3,3),2)
threshold = cv2.adaptiveThreshold(gray,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY,11,2)
threshold = cv2.fastNlMeansDenoising(threshold, 11, 31, 9)
return threshold
# ## **Find the Biggest Contour**
# **Note: We made sure the minimum contour is bigger than 1/10 size of the whole picture. This helps in removing very small contours (noise) from our dataset**
def biggest_contour(contours,min_area):
biggest = None
max_area = 0
biggest_n=0
approx_contour=None
for n,i in enumerate(contours):
area = cv2.contourArea(i)
if area > min_area/10:
peri = cv2.arcLength(i,True)
approx = cv2.approxPolyDP(i,0.02*peri,True)
if area > max_area and len(approx)==4:
biggest = approx
max_area = area
biggest_n=n
approx_contour=approx
return biggest_n,approx_contour
def order_points(pts):
# initialzie a list of coordinates that will be ordered
# such that the first entry in the list is the top-left,
# the second entry is the top-right, the third is the
# bottom-right, and the fourth is the bottom-left
pts=pts.reshape(4,2)
rect = np.zeros((4, 2), dtype = "float32")
# the top-left point will have the smallest sum, whereas
# the bottom-right point will have the largest sum
s = pts.sum(axis = 1)
rect[0] = pts[np.argmin(s)]
rect[2] = pts[np.argmax(s)]
# now, compute the difference between the points, the
# top-right point will have the smallest difference,
# whereas the bottom-left will have the largest difference
diff = np.diff(pts, axis = 1)
rect[1] = pts[np.argmin(diff)]
rect[3] = pts[np.argmax(diff)]
# return the ordered coordinates
return rect
### Find the exact (x,y) coordinates of the biggest contour and crop it out
def four_point_transform(image, pts):
# obtain a consistent order of the points and unpack them
# individually
rect = order_points(pts)
(tl, tr, br, bl) = rect
# compute the width of the new image, which will be the
# maximum distance between bottom-right and bottom-left
# x-coordiates or the top-right and top-left x-coordinates
widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
maxWidth = max(int(widthA), int(widthB))
# compute the height of the new image, which will be the
# maximum distance between the top-right and bottom-right
# y-coordinates or the top-left and bottom-left y-coordinates
heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
maxHeight = max(int(heightA), int(heightB))
# now that we have the dimensions of the new image, construct
# the set of destination points to obtain a "birds eye view",
# (i.e. top-down view) of the image, again specifying points
# in the top-left, top-right, bottom-right, and bottom-left
# order
dst = np.array([
[0, 0],
[maxWidth - 1, 0],
[maxWidth - 1, maxHeight - 1],
[0, maxHeight - 1]], dtype = "float32")
# compute the perspective transform matrix and then apply it
M = cv2.getPerspectiveTransform(rect, dst)
warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))
# return the warped image
return warped
# # Transformation the image
# **1. Convert the image to grayscale**
# **2. Remove noise and smoothen out the image by applying blurring and thresholding techniques**
# **3. Use Canny Edge Detection to find the edges**
# **4. Find the biggest contour and crop it out**
def transformation(image):
image=image.copy()
height, width, channels = image.shape
gray=cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
image_size=gray.size
threshold=blur_and_threshold(gray)
# We need two threshold values, minVal and maxVal. Any edges with intensity gradient more than maxVal
# are sure to be edges and those below minVal are sure to be non-edges, so discarded.
# Those who lie between these two thresholds are classified edges or non-edges based on their connectivity.
# If they are connected to "sure-edge" pixels, they are considered to be part of edges.
# Otherwise, they are also discarded
edges = cv2.Canny(threshold,50,150,apertureSize = 7)
contours, hierarchy = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
simplified_contours = []
for cnt in contours:
hull = cv2.convexHull(cnt)
simplified_contours.append(cv2.approxPolyDP(hull,
0.001*cv2.arcLength(hull,True),True))
simplified_contours = np.array(simplified_contours)
biggest_n,approx_contour = biggest_contour(simplified_contours,image_size)
threshold = cv2.drawContours(image, simplified_contours ,biggest_n, (0,255,0), 1)
dst = 0
if approx_contour is not None and len(approx_contour)==4:
approx_contour=np.float32(approx_contour)
dst=four_point_transform(threshold,approx_contour)
croppedImage = dst
return croppedImage
# **Increase the brightness of the image by playing with the "V" value (from HSV)**
def increase_brightness(img, value=30):
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
h, s, v = cv2.split(hsv)
lim = 255 - value
v[v > lim] = 255
v[v <= lim] += value
final_hsv = cv2.merge((h, s, v))
img = cv2.cvtColor(final_hsv, cv2.COLOR_HSV2BGR)
return img
# **Sharpen the image using Kernel Sharpening Technique**
def final_image(rotated):
# Create our shapening kernel, it must equal to one eventually
kernel_sharpening = np.array([[0,-1,0],
[-1, 5,-1],
[0,-1,0]])
# applying the sharpening kernel to the input image & displaying it.
sharpened = cv2.filter2D(rotated, -1, kernel_sharpening)
sharpened=increase_brightness(sharpened,30)
return sharpened
# ## 1. Pass the image through the transformation function to crop out the biggest contour
# ## 2. Brighten & Sharpen the image to get a final cleaned image
path = "/home/hamza/Desktop/"
image = cv2.imread("path of image")
blurred_threshold = transformation(image)
cleaned_image = final_image(blurred_threshold)
cv2.imwrite(path + "Final_Image4.jpg", cleaned_image)
Pictures
first pic
second image
Edit 1:
Picture 1 test image 1
Picture 2 test image 2
Picture 3 test image 3
Picture 4 test image 4
Edit 2:
If I just pass only black and white image can it remove shearness , you may try it if it works please share with me .
Pic:
black and white image
Note: If image comes unsheared , then that particular image should displayed exactly same means code should not touched it . Hope so you got the point.
Nothing is wrong with your images, I tested your code and indeed the first image works with the code as you wrote it, for the second image, with some quick debugging you can figure out that no contour was found in your transformation function - you have the block :
if approx_contour is not None and len(approx_contour)==4:
approx_contour=np.float32(approx_contour)
dst=four_point_transform(threshold,approx_contour)
Just add:
else:
print("no contour found")
to see for yourself.
The problem is with your Canny filter. With apertureSize = 7 your first image works but not the second one, with apertureSize = 3 your second image work but not the first one.
So both of your image work, but not with the same parameters. If processing time is not an issue for your task, you could iterate several values of the parameters, or else steer away from the Canny method. On both your images the paper is a lot brighter than the background so a convexHull on the threshold image would work.
Since your initial image is in color, another approach is to use the H place to detect the 'white color' of the paper. I have made some adjustments in the code and it now works for both your images.
import numpy as np
import cv2
import imutils
def order_points(pts):
""" Return sorted list of corners, from top-left then clockwise """
pts = np.reshape(pts, (6,2))
rect = np.zeros((4, 2), dtype = "float32")
s = pts.sum(axis = 1)
rect[0] = pts[np.argmin(s)]
rect[2] = pts[np.argmax(s)]
diff = np.diff(pts, axis = 1)
rect[1] = pts[np.argmin(diff)]
rect[3] = pts[np.argmax(diff)]
return rect
def four_point_transform(image, pts):
(tl, tr, br, bl) = rect
widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
maxWidth = max(int(widthA), int(widthB))
heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
maxHeight = max(int(heightA), int(heightB))
dst = np.array([
[0, 0],
[maxWidth - 1, 0],
[maxWidth - 1, maxHeight - 1],
[0, maxHeight - 1]], dtype = "float32")
M = cv2.getPerspectiveTransform(rect, dst)
warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))
return warped
image1 = cv2.imread('im1.jpg')
mask = np.zeros(image1.shape, np.uint8)
gray = cv2.cvtColor(image1,cv2.COLOR_BGR2GRAY)
# Hue thresholding for the white paper
HSV = cv2.cvtColor(image1, cv2.COLOR_BGR2HSV)
lo_H = 100
hi_H = 200
thresh = cv2.inRange(HSV, (lo_H, 0, 0), (hi_H, 255, 255))
# Find the contour of the paper sheet - use convexhull
contours, hierarchy = cv2.findContours(thresh,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
cnt = sorted(contours, key=cv2.contourArea, reverse=True)
hull = []
for i in range(len(cnt)):
hull.append(cv2.convexHull(cnt[i], False))
cv2.drawContours(mask, hull, 0, (255, 255, 255), thickness=cv2.FILLED)
mask = cv2.cvtColor(mask,cv2.COLOR_BGR2GRAY)
gray = cv2.bitwise_and(gray, gray, mask=mask)
# Corner detection - keep 6 best corners
corners = cv2.goodFeaturesToTrack(mask, 6, 0.01, 50)
corners = np.int0(corners)
# Order the corners and keep 4
rect = order_points(corners)
wrap = four_point_transform(gray, rect)
# Display results
cv2.drawContours(image1, hull, 0, (0,255,125), 3)
cv2.imshow("image", image1)
#cv2.imshow('thresh', thresh)
#cv2.imshow("mask", mask)
cv2.imshow("gray", gray)
cv2.imshow("wrap", wrap)

Python OpenCV sorting contours in clockwise

I'm putting together an image processing tool to follow the deformation of a part using images. The part has rectangular markers that get detected with image segmentation and cv2.findContours function. Contour centers are then used to calculate distances and to bend radiuses. Everything seems to work fine, but I found out that the contours aren't sorted how I would like to sort them when reviewing results.
The part is repeatedly bent, and the contours are positioned in a circle.
I found this article that describes the sorting horizontally and vertically:
https://www.pyimagesearch.com/2015/04/20/sorting-contours-using-python-and-opencv/
Does anyone have any idea how to sort the contours in a clockwise direction?
The code is below.
import os
import exifread
import cv2
import numpy as np
import scipy
from matplotlib import pyplot as plt
import imutils
import pandas as pd
#---------- INPUT ----------
# Define the image filename
img_filename = 'frame397.jpg'
img_path = img_filename
# Define values for cropping
x = 0
y = 200
w = 1200
h = 800
# Define color values for segmentation
# the values can be probed with GIMP
h1 = 0
s1 = 70
v1 = 120
h2 = 255
s2 = 255
v2 = 255
red_lower = np.array([h1,s1,v1])
red_upper = np.array([h2,s2,v2])
# Define desired area size
# desired area size is pixel count - use GIMP for probe
s1 = 500
s2 = 10000
#---------- PROCESS IMAGES ----------
# Create an empty dataframe for storing results
# in shape of (image_name,time,angle,angle_smooth,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11)
# Define the results dataframe shape and column names
results_df = pd.DataFrame(columns=['image_name','alpha','r1','r2','r3','r4','r5','r6','r7','r8','r9','r10','r11',
'center_dist1', 'center_dist2','center_dist3','center_dist4',
'center_dist5','center_dist6','center_dist7','center_dist8',
'center_dist9','center_dist10','center_dist11'])
# Open image, make it black and white and find contours
img = cv2.imread(img_path)
crop = img[y:y+h, x:x+w]
blur = cv2.blur(crop,(2,2))
hsv = cv2.cvtColor(blur,cv2.COLOR_BGR2HSV)
mask = cv2.inRange(hsv, red_lower, red_upper)
mask_copy = mask.copy()
cnts = cv2.findContours(mask_copy,cv2.RETR_LIST,cv2.CHAIN_APPROX_SIMPLE)
cnts = imutils.grab_contours(cnts)
#print cnts
x = []
y = []
# Loop through contours, calculate the centers and prepare the
#contours and contour centers display
#define the font for the text on the image
font = cv2.FONT_HERSHEY_SIMPLEX
for cnt in cnts:
area = cv2.contourArea(cnt)
moment = cv2.moments(cnt)
if s1<area<s2:
print area
c_x = int(moment["m10"]/moment["m00"])
c_y = int(moment["m01"]/moment["m00"])
#draw contours
cv2.drawContours(crop, cnt, -1, (0,255,0),3)
#draw a circle in the center of every contour, -1 is for thickness, this means
#that the cirlce will get filled in
cv2.circle(crop, (c_x,c_y), 10, (0,255,0),-1)
#display center coordinates on the image
string = str(c_x) + ',' + str(c_y)
cv2.putText(crop,string,(c_x,c_y),font,0.5,(255,255,255),2)
x.append(float(c_x))
y.append(float(c_y))
print (c_x, c_y)
print x
print y
# Display image
cv2.namedWindow('Contours', cv2.WINDOW_NORMAL)
cv2.resizeWindow('Contours', 1200,900)
cv2.imshow('Contours', crop)
# Wait for windows closing
cv2.waitKey() & 0xFF
cv2.destroyAllWindows
Image is here:
I used openCV's minEnclosingCircle to "fit" a circle to the points (it's not actually a fit, but it's good enough for finding a point inside the curvature of the markers). Marking each contour with the angle from its centroid to the circle's center gave me a set of angles that I could sort with.
import cv2
import numpy as np
import math
# 2d distance
def dist2D(one, two):
dx = one[0] - two[0];
dy = one[1] - two[1];
return math.sqrt(dx*dx + dy*dy);
# angle between three points (the last point is the middle)
def angle3P(p1, p2, p3):
# get distances
a = dist2D(p3, p1);
b = dist2D(p3, p2);
c = dist2D(p1, p2);
# calculate angle // assume a and b are nonzero
# (law of cosines)
numer = c**2 - a**2 - b**2;
denom = -2 * a * b;
if denom == 0:
denom = 0.000001;
rads = math.acos(numer / denom);
degs = math.degrees(rads);
# check if past 180 degrees
if p1[1] > p3[1]:
degs = 360 - degs;
return degs;
# load image
img = cv2.imread("slinky.jpg");
# rescale
scale = 0.5;
h, w = img.shape[:2];
h = int(h * scale);
w = int(w * scale);
img = cv2.resize(img, (w,h));
# change color space
lab = cv2.cvtColor(img, cv2.COLOR_BGR2LAB);
l,a,b = cv2.split(lab);
# threshold
thresh = cv2.inRange(a, 140, 255);
# get rid of little dots
kernel = np.ones((3,3),np.uint8)
thresh = cv2.erode(thresh,kernel,iterations = 1);
thresh = cv2.dilate(thresh,kernel, iterations = 1);
# contours
_, contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE);
# get centroids
centroids = [];
centers = [];
for con in contours:
m = cv2.moments(con);
cx = int(m['m10'] / m['m00']);
cy = int(m['m01'] / m['m00']);
centers.append([cx, cy]);
centroids.append([[cx, cy], con]);
img = cv2.circle(img, (cx, cy), 10, (0,0,255), -1);
# find circle around points
# NOTE: this doesn't "fit" a circle to the points
# I'm just using this to find a "good enough" center
# that's in the direction of the curve
numped = np.array(centers);
(x, y), radius = cv2.minEnclosingCircle(numped);
img = cv2.circle(img, (int(x), int(y)), int(radius), (255,0,0), 2);
middle = [x,y];
offshoot = [x + 100, y];
# get angles
angles = [];
for cen in centroids:
center, contour = cen;
angle = angle3P(center, offshoot, middle);
angles.append([angle, center, contour]);
# sort by angle
final = sorted(angles, key = lambda a: a[0], reverse = True);
# pull out just the contours
contours = [clump[2] for clump in final];
# draw contours in order
marked = img.copy();
counter = 0;
for con in contours:
cv2.drawContours(marked, [con], -1, (0, 255, 0), 2);
cv2.imshow("marked", marked);
cv2.imwrite("marking_seq/" + str(counter) + ".png", marked);
counter += 1;
cv2.waitKey(0);
# show
cv2.imshow("orig", img);
cv2.imshow("a", a);
cv2.imshow("thresh", thresh);
cv2.waitKey(0);

OpenCV Find a middle line of a contour [Python]

In my image processing project, I have already obtained a masked image (black-and-white image) and its contours using the cv.findContours function. My goal now is to create an algorithm that can draw a middle line for this contour. The masked image and its contour are shown in the following images.
Masked image:
Contour:
In my imagination, for that contour, I would like to create a middle line which is near horizontal. I have manually marked my ideal middle line in red. Please check the following image for the red middle line that I have mentioned.
Contour with the middle line:
It is noticeable that my ultimate goal is to find the tip point that I have marked in yellow. If you have other ideas that can directly find the yellow tip point, please also let me know. For finding the yellow tip point, I have tried two approaches cv.convexHull and cv.minAreaRect, but the issue is the robustness. I made these two approaches worked for some images but for some other images in my dataset, they are not working very well. Therefore, I think to find the middle line might be a good approach that I can try.
I believe you're trying to determine the contour's center of gravity and orientation. We can easily do this using Central Moments. More info on that here.
The code below generates this plot. Is this the result you wanted?
# Determine contour
img = cv2.imread(img_file, cv2.IMREAD_GRAYSCALE)
img_bin = (img>128).astype(np.uint8)
contours, _ = cv2.findContours(img_bin, mode=cv2.RETR_EXTERNAL, method=cv2.CHAIN_APPROX_NONE)
# Determine center of gravity and orientation using Moments
M = cv2.moments(contours[0])
center = (int(M["m10"] / M["m00"]), int(M["m01"] / M["m00"]))
theta = 0.5*np.arctan2(2*M["mu11"],M["mu20"]-M["mu02"])
endx = 600 * np.cos(theta) + center[0] # linelength 600
endy = 600 * np.sin(theta) + center[1]
# Display results
plt.imshow(img_bin, cmap='gray')
plt.scatter(center[0], center[1], marker="X")
plt.plot([center[0], endx], [center[1], endy])
plt.show()
My goal right now is to create an algorithm that can draw a middle line for this contour.
If you detect the upper and lower bounds of your horizontal-lines, then you can calculate the middle-line coordinates.
For instance:
Middle-line will be:
If you change the size to the width of the image:
Code:
import cv2
img = cv2.imread("contour.jpg")
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
(h, w) = img.shape[:2]
x1_upper = h
x1_lower = 0
x2_upper = h
x2_lower = 0
y1_upper = h
y1_lower = 0
y2_upper = h
y2_lower = 0
lines = cv2.ximgproc.createFastLineDetector().detect(gray)
for cur in lines:
x1 = cur[0][0]
y1 = cur[0][1]
x2 = cur[0][2]
y2 = cur[0][3]
# upper-bound coords
if y1 < y1_upper and y2 < y2_upper:
y1_upper = y1
y2_upper = y2
x1_upper = x1
x2_upper = x2
elif y1 > y1_lower and y2 > y2_lower:
y1_lower = y1
y2_lower = y2
x1_lower = x1
x2_lower = x2
print("\n\n-lower-bound-\n")
print("({}, {}) - ({}, {})".format(x1_lower, y1_lower, x2_lower, y2_lower))
print("\n\n-upper-bound-\n")
print("({}, {}) - ({}, {})".format(x1_upper, y1_upper, x2_upper, y2_upper))
cv2.line(img, (x1_lower, y1_lower), (x2_lower, y2_lower), (0, 255, 0), 5)
cv2.line(img, (x1_upper, y1_upper), (x2_upper, y2_upper), (0, 0, 255), 5)
x1_avg = int((x1_lower + x1_upper) / 2)
y1_avg = int((y1_lower + y1_upper) / 2)
x2_avg = int((x2_lower + x2_upper) / 2)
y2_avg = int((y2_lower + y2_upper) / 2)
cv2.line(img, (0, y1_avg), (w, y2_avg), (255, 0, 0), 5)
cv2.imshow("result", img)
cv2.waitKey(0)
cv2.destroyAllWindows()
I beleive skeleton is what you are looking for.
import cv2
import timeit
img = cv2.imread('Ggh8d - Copy.jpg',0)
s = timeit.default_timer()
thinned = cv2.ximgproc.thinning(img, thinningType = cv2.ximgproc.THINNING_ZHANGSUEN)
e = timeit.default_timer()
print(e-s)
cv2.imwrite("thinned1.png", thinned)
if smooth the edge a little bit
Actually the line will not torch the yellow point, since the algorithm have to check distance from edges, yellow point is located on the edge.
Here is another way to do that by computing the centerline of the rotated bounding box about your object in Python/OpenCV.
Input:
import cv2
import numpy as np
# load image
img = cv2.imread("blob_mask.jpg")
# convert to gray
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# threshold the grayscale image
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY)[1]
# get coordinates of all non-zero pixels
# NOTE: must transpose since numpy coords are y,x and opencv uses x,y
coords = np.column_stack(np.where(thresh.transpose() > 0))
# get rotated rectangle from
rotrect = cv2.minAreaRect(coords)
box = cv2.boxPoints(rotrect)
box = np.int0(box)
print (box)
# get center line from box
# note points are clockwise from bottom right
x1 = (box[0][0] + box[3][0]) // 2
y1 = (box[0][1] + box[3][1]) // 2
x2 = (box[1][0] + box[2][0]) // 2
y2 = (box[1][1] + box[2][1]) // 2
# draw rotated rectangle on copy of img as result
result = img.copy()
cv2.drawContours(result, [box], 0, (0,0,255), 2)
cv2.line(result, (x1,y1), (x2,y2), (255,0,0), 2)
# write result to disk
cv2.imwrite("blob_mask_rotrect.png", result)
# display results
cv2.imshow("THRESH", thresh)
cv2.imshow("RESULT", result)
cv2.waitKey(0)
cv2.destroyAllWindows()
Result:

How to fix/optimize document detection with a lot of noise

I build a python script to detect and extract documents/citizen cards from a image, it was working as i wanted until i tried a spanish citizen card (DNI 3.0).
When i perform canny edge algorythm on this card, it has a lot of noise/lines/things that merge into the edges of the card and thats what i think its causing the issue.
Canny edge applied:
Code:
def attemptPerspectiveTransform(openCVImage, debug_mode):
image = openCVImage
ratio = image.shape[0] / 300.0
orig = image.copy()
image = imutils.resize(image, height = 300)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
gray = cv2.bilateralFilter(gray, 11, 17, 17)
edged = cv2.Canny(gray, 0, 200)
if (debug_mode == True):
cv2.imshow("gray", gray)
cv2.imshow("edged", edged)
cv2.waitKey(0)
cnts = cv2.findContours(edged.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
cnts = imutils.grab_contours(cnts)
cnts = sorted(cnts, key = cv2.contourArea, reverse = True)[:10]
screenCnt = None
for c in cnts:
peri = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.015 * peri, True)
print('APPROX')
print(len(approx))
# if our approximated contour has four points, should be the document we are looking for
if len(approx) == 4:
screenCnt = approx
break
# now that we have our screen contour, we need to determine
# the top-left, top-right, bottom-right, and bottom-left
# points so that we can later warp the image -- we'll start
# by reshaping our contour to be our finals and initializing
# our output rectangle in top-left, top-right, bottom-right,
# and bottom-left order
pts = screenCnt.reshape(4, 2)
rect = np.zeros((4, 2), dtype = "float32")
# the top-left point has the smallest sum whereas the
# bottom-right has the largest sum
s = pts.sum(axis = 1)
rect[0] = pts[np.argmin(s)]
rect[2] = pts[np.argmax(s)]
# compute the difference between the points -- the top-right
# will have the minumum difference and the bottom-left will
# have the maximum difference
diff = np.diff(pts, axis = 1)
rect[1] = pts[np.argmin(diff)]
rect[3] = pts[np.argmax(diff)]
# multiply the rectangle by the original ratio
rect *= ratio
# now that we have our rectangle of points, let's compute
# the width of our new image
(tl, tr, br, bl) = rect
widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
# ...and now for the height of our new image
heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
# take the maximum of the width and height values to reach
# our final dimensions
maxWidth = max(int(widthA), int(widthB))
maxHeight = max(int(heightA), int(heightB))
# construct our destination points which will be used to
# map the screen to a top-down, "birds eye" view
dst = np.array([
[0, 0],
[maxWidth - 1, 0],
[maxWidth - 1, maxHeight - 1],
[0, maxHeight - 1]], dtype = "float32")
# calculate the perspective transform matrix and warp
# the perspective to grab the screen
M = cv2.getPerspectiveTransform(rect, dst)
warp = cv2.warpPerspective(orig, M, (maxWidth, maxHeight))
# convert the warped image to grayscale and then adjust
# the intensity of the pixels to have minimum and maximum
# values of 0 and 255, respectively
warp = cv2.cvtColor(warp, cv2.COLOR_BGR2GRAY)
warp = exposure.rescale_intensity(warp, out_range = (0, 255))
height, width = warp.shape
if (height > width):
warp = rotate_image(warp, 90)
if (debug_mode == True):
# show images
cv2.imshow("image", image)
cv2.imshow("edge", edged)
cv2.imshow("warp", imutils.resize(warp, height = 300))
cv2.imshow("warp pure", imutils.resize(warp, height = 300))
cv2.imwrite('cropped_final.jpg', warp)
cv2.waitKey(0)
return warp
Results in error:
Traceback (most recent call last):
File "main.py", line 60, in <module>
cv_front_cropped_path = image_preprocessing.processImage(front_img, debug_mode)
File "/Users/duarteandrade/Desktop/OCR-DEMO/PythonOCR/pyocr/image_preprocessing.py", line 179, in processImage
image = perspective_transform.attemptPerspectiveTransform(image, debug_mode)
File "/Users/duarteandrade/Desktop/OCR-DEMO/PythonOCR/pyocr/perspective_transform.py", line 84, in attemptPerspectiveTransform
pts = screenCnt.reshape(4, 2)
AttributeError: 'NoneType' object has no attribute 'reshape'
It works good with the portuguese citizen card with worse images, i guess because it doesnt have those lines at the top of the card and all the noise that can be seen in the spanish card.
How can i fix this ?
**Original image : **

How to detect if text is rotated 180 degrees or flipped upside down

I am working on a text recognition project. There is a chance the text is rotated 180 degrees. I have tried tesseract-ocr on terminal, but no luck. Is there any way to detect it and correct it? An example of the text is shown below.
tesseract input.png output
tesseract input.png - --psm 0 -c min_characters_to_try=10
Warning. Invalid resolution 0 dpi. Using 70 instead.
Page number: 0
Orientation in degrees: 180
Rotate: 180
Orientation confidence: 0.74
Script: Latin
Script confidence: 1.67
One simple approach to detect if text is rotated 180 degrees is to use the observation that text tends to be skewed towards the bottom. Here's the strategy:
Convert image to grayscale
Gaussian blur
Threshold image
Find the top/bottom half ROIs of thresholded image
Count non-zero array elements for each half
Threshold image
Find ROIs of top and bottom half
Next we split the top/bottom sections
With each half we count non-zero array elements using cv2.countNonZero(). We get this
('top', 4035)
('bottom', 3389)
By comparing the values between the two halves, if the top half has more pixels than the bottom half, it is upside down by 180 degrees. If it has less, it is correctly oriented.
Now that we have detected if it is upside down, we can rotate it using this function
def rotate(image, angle):
# Obtain the dimensions of the image
(height, width) = image.shape[:2]
(cX, cY) = (width / 2, height / 2)
# Grab the rotation components of the matrix
matrix = cv2.getRotationMatrix2D((cX, cY), -angle, 1.0)
cos = np.abs(matrix[0, 0])
sin = np.abs(matrix[0, 1])
# Find the new bounding dimensions of the image
new_width = int((height * sin) + (width * cos))
new_height = int((height * cos) + (width * sin))
# Adjust the rotation matrix to take into account translation
matrix[0, 2] += (new_width / 2) - cX
matrix[1, 2] += (new_height / 2) - cY
# Perform the actual rotation and return the image
return cv2.warpAffine(image, matrix, (new_width, new_height))
Rotating the image
rotated = rotate(original_image, 180)
cv2.imshow("rotated", rotated)
which gives us the correct result
This is the pixel result if the image was correctly oriented
('top', 3209)
('bottom', 4206)
Full code
import numpy as np
import cv2
def rotate(image, angle):
# Obtain the dimensions of the image
(height, width) = image.shape[:2]
(cX, cY) = (width / 2, height / 2)
# Grab the rotation components of the matrix
matrix = cv2.getRotationMatrix2D((cX, cY), -angle, 1.0)
cos = np.abs(matrix[0, 0])
sin = np.abs(matrix[0, 1])
# Find the new bounding dimensions of the image
new_width = int((height * sin) + (width * cos))
new_height = int((height * cos) + (width * sin))
# Adjust the rotation matrix to take into account translation
matrix[0, 2] += (new_width / 2) - cX
matrix[1, 2] += (new_height / 2) - cY
# Perform the actual rotation and return the image
return cv2.warpAffine(image, matrix, (new_width, new_height))
image = cv2.imread("1.PNG")
original_image = image.copy()
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blurred = cv2.GaussianBlur(gray, (3,3), 0)
thresh = cv2.threshold(blurred, 110, 255, cv2.THRESH_BINARY_INV)[1]
cv2.imshow("thresh", thresh)
x, y, w, h = 0, 0, image.shape[1], image.shape[0]
top_half = ((x,y), (x+w, y+h/2))
bottom_half = ((x,y+h/2), (x+w, y+h))
top_x1,top_y1 = top_half[0]
top_x2,top_y2 = top_half[1]
bottom_x1,bottom_y1 = bottom_half[0]
bottom_x2,bottom_y2 = bottom_half[1]
# Split into top/bottom ROIs
top_image = thresh[top_y1:top_y2, top_x1:top_x2]
bottom_image = thresh[bottom_y1:bottom_y2, bottom_x1:bottom_x2]
cv2.imshow("top_image", top_image)
cv2.imshow("bottom_image", bottom_image)
# Count non-zero array elements
top_pixels = cv2.countNonZero(top_image)
bottom_pixels = cv2.countNonZero(bottom_image)
print('top', top_pixels)
print('bottom', bottom_pixels)
# Rotate if upside down
if top_pixels > bottom_pixels:
rotated = rotate(original_image, 180)
cv2.imshow("rotated", rotated)
cv2.waitKey(0)
I kind of liked the pytessaract solution.
import cv2
import pytesseract
from scipy.ndimage import rotate as Rotate
def float_convertor(x):
if x.isdigit():
out= float(x)
else:
out= x
return out
def tesseract_find_rotatation(img: str):
img = cv2.imread(img) if isinstance(img, str) else img
k = pytesseract.image_to_osd(img)
out = {i.split(":")[0]: float_convertor(i.split(":")[-1].strip()) for i in k.rstrip().split("\n")}
img_rotated = Rotate(img, 360-out["Rotate"])
return img_rotated, out
usage
img_loc = ""
img_rotated, out = tessaract_find_rotation(img_loc)

Categories