I have the following image containing a dartboard
After processing the image looks as follows:
In addition, I have a function that creates a theoretical dartboard:
import cv2
import numpy as np
def draw_dartboard():
IMG = np.ones((400, 400), 'uint8') * 255
center = (int(IMG.shape[0] // 2), int(IMG.shape[1] // 2))
size_dartboard = int(340)
r_board = int(170)
r_db = int(6.35)
r_sb = int(15.9)
r_doubles = int(162)
r_triples = int(99)
width_rings = int(8)
cv2.circle(IMG, center, r_doubles + width_rings, (0,0,0), -1)
cv2.circle(IMG, center, r_doubles, (255,255,255), -1)
cv2.circle(IMG, center, r_triples + width_rings, (0,0,0), -1)
cv2.circle(IMG, center, r_triples, (255,255,255), -1)
thetas_min = np.radians([(18 * t - 9) for t in range(20)])
thetas_max = np.radians([(18 * t + 9) for t in range(20)])
for idx, (theta_min, theta_max) in enumerate(zip(thetas_min, thetas_max)):
if (idx % 2) == 0:
x_min = int(center[0] + r_board * np.cos(theta_min))
y_min = int(center[1] + r_board * np.sin(theta_min))
x_max = int(center[0] + r_board * np.cos(theta_max))
y_max = int(center[1] + r_board * np.sin(theta_max))
cv2.fillPoly(IMG, np.array([(center, (x_min,y_min), (x_max,y_max))]), (0,0,0))
cv2.circle(IMG, center, r_sb, (0,0,0), -1)
return IMG
The output of this image looks as follows:
How can I “fit” the theoretical dartboard in the real image? Clearly, there is a mismatch in orientation and scale. What's the best way to do this?
You can register your dartboard image (i.e. source image) to the one you processed (i.e. destination image) by using affine transformations.
Here is my approach, and the outcome.
import cv2
import matplotlib.pyplot as plt
import numpy as np
# read images and remove matplotlib axes
src = cv2.imread('source.png',0)
src = src[20:-30,40:-20]
dest = cv2.imread('dest.png',0)
dest = dest[40:-40,40:-40]
# find matching points manually
dest_pts = np.array([[103,29],[215,13],[236,125]]).astype(np.float32) # x,y
src_pts = np.array([[19,175],[145,158],[176,284]]).astype(np.float32) #x,y
# calculate the affine transformation matrix
warp_mat = cv2.getAffineTransform(src_pts, dest_pts)
# get the registered source image
warp_dst = cv2.warpAffine(src, warp_mat, (dest.shape[1], dest.shape[0]))
fig,ax = plt.subplots(1,3)
ax[0].imshow(src,'gray')
ax[0].scatter(src_pts[:,0],src_pts[:,1],s=1,c='r')
ax[0].set_title('src')
ax[1].imshow(dest,'gray')
ax[1].scatter(dest_pts[:,0],dest_pts[:,1],s=1,c='r')
ax[1].set_title('dest')
ax[2].imshow(warp_dst,'gray')
ax[2].set_title('registered src')
plt.savefig('result.png')
fig, ax = plt.subplots(1)
ax.imshow(dest,'gray')
ax.imshow(warp_dst,cmap='jet',alpha=0.5)
plt.savefig('overlayed_result.png')
# plt.show()
In order to calculate affine transformation matrix, you will need 3 matching points on both images. I highlighted the points I chose on both images. FYI, you can develop a way to automate finding matching points, let us know in your question if you need that.
As you have already done the image processing, I will take it from there. So just to be clear, this is the image I will be working with (I cropped out the matplotlib axises, as I'm sure they aren't present in your actual image):
The concept is really simple:
Find the bounding box of the contour of the target.
With the bounding box, we can find the radius of the target by selecting the greatest among the dimensions (width and height) of the bounding box, and dividing it by 2.
With the radius of the target and the top-left corner coordinates of the target (returned when finding the bounding box of the target), we can find the center of the target with the expressions x + r and y + h - r.
With the radius of the target, you can scale your theoretical target accordingly, and with the center of the target, you can draw your theoretical target at the right coordinates.
Here is how the code goes, where Image.png is the above image. Note that I only draw one circle onto the image; the rest of them can be drawn on using the same way, with just some added scaling:
import cv2
import numpy as np
img = cv2.imread("Image.png")
img_processed = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
contours, _ = cv2.findContours(img_processed, cv2.RETR_LIST, cv2.CHAIN_APPROX_NONE)
cnt = sorted(contours, key=cv2.contourArea)[-2]
x, y, w, h = cv2.boundingRect(cnt)
r = max(w, h) // 2
center_x = x + r
center_y = y + h - r
cv2.circle(img, (center_x, center_y), r, (0, 255, 0), 5)
cv2.imshow("Image", img)
cv2.waitKey(0)
cv2.destroyAllWindows()
Output:
Note that at this line:
cnt = sorted(contours, key=cv2.contourArea)[-2]
I am getting the contour with the second-greatest area, as the one with the greatest area would be the border of the image.
Related
Goal:
I'd like to estimate a 4 coordinates quadrilateral (not only rectangles) of a given masked object as shown in the image + without losing any pixel of the masked object.
Trials:
I tried using CV2 however couldn't end up with a solution.
cv2.boundingRect: returns the coordinates of the bounding rectangle (while the quadrilateral estimation is not always necessary to be a perfect rectangle)
cv2.findContours + cv2.approxPolyDP: isn't that accurate and returns an estimate extreme points of the object (Needs more work to estimate the quadrilateral 4 coordinates and there might be an easier and faster solution).
Code Snippets:
Trying cv2.boundinRect:
#mask = grayed image with only a specific object being masked
#image = the original rgb image
x,y,x_width,y_height = cv2.boundingRect(mask)
image=np.array(im[0])
cv2.rectangle(image,(x,y),(x+x_width,y+y_height),(0,255,0),2)
plt.imshow(image)
Trying cv2.findContours + cv2.approxPolyDP:
#mask = grayed image with only a specific object being masked
#image = the original rgb image
contours, hierarchy = cv2.findContours(mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
selected_contour = max(contours, key=lambda x: cv2.contourArea(x))
approx = cv2.approxPolyDP(selected_contour, 0.0035 * cv2.arcLength(selected_contour, True), True)
cv2.drawContours(image, [approx], 0, (0, 0, 255), 5)
plt.imshow(image)
I am not sure if there is a better or built-in version; but i have a simple idea based on random numbers:
I only did this for the top, but you can do the same for other sides. The idea is to find the bounding-box of object first; and then divide the object into equal parts so that we can find the highest peaks.
In each range, You can find points randomly; But for best results, it is best to check all the top points of the shape to find the highest peaks correctly.
After finding the highest peaks, we have to calculate a line equation with respect to those 2 points so that we can draw a global line with respect to that line equation.
import sys
import cv2
import random
import numpy as np
from tqdm import tqdm
def rndPt(l, t, r, b):
# Generate a random point in given ROI
return (random.randint(int(l), int(r)), random.randint(int(t), int(b)))
def intArr(arr):
# Cast each item of 1D array to integer
return [int(x) for x in arr]
# Load our image
pth = sys.path[0]
org = cv2.imread(pth+'/bound.png')
im = org.copy()
H, W = im.shape[:2]
# Make mask and copy from that image
im = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
bw = cv2.threshold(im, 127, 255, cv2.THRESH_BINARY)[1]
im = bw.copy()
# Find the ROI of object
cnts, _ = cv2.findContours(bw, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
cnts.sort(key=lambda x: cv2.boundingRect(x)[0])
ROI = None
for cnt in cnts:
x, y, w, h = cv2.boundingRect(cnt)
if w < W-1 and h < H-1:
cv2.rectangle(bw, (x, y), (x+w, y+h), 127, 2)
ROI = {'x': x, 'y': y, 'w': w, 'h': h, 'h2': y+h}
# We have to find the peaks; so we have to
# divide the bounding-box of shape into several
# ranges.
spaces = 5
sw = ROI['w']//spaces
# Each range can have a peak as a candidate point
candidates = [(ROI['x']+(sw*x)+sw//2, ROI['h']//2) for x in range(0, spaces)]
# Divide the object and find the highest point in
# each range
for s in tqdm(range(0, spaces)):
l = ROI['x']+(sw*s)
cv2.line(im, pt1=(l, ROI['y']), pt2=(l, ROI['h2']),
color=127, thickness=2)
for x in range(0, sw):
for i in range(0, 200):
pt = rndPt(l, ROI['y'], l+sw, ROI['h2']//4)
if pt[1] < candidates[s][1] and bw[pt[1], pt[0]] == 0:
candidates[s] = pt
l = ROI['x']+(sw*spaces)
cv2.line(im, pt1=(l, ROI['y']), pt2=(l, ROI['h2']), color=127, thickness=2)
print(candidates)
# We remove duplicate points and also sort the points
# according to the peak
candidates = list(set(candidates))
candidates.sort(key=lambda p: p[1])
print(candidates)
c = candidates
# Now that we have found two of the highest points, we can
# write a line equation for these two points
xA, xB = ROI['x'], ROI['x']+ROI['w']
x1, y1 = c[0][0], c[0][1]
x2, y2 = c[1][0], c[1][1]
m = (y2-y1)/(x2-x1)
# y=mx+b -> y-mx=b
b = y1-m*x1
yA = m*xA+b
yB = m*xB+b
# Convert images to BGR
im = cv2.cvtColor(im, cv2.COLOR_GRAY2BGR)
bw = cv2.cvtColor(bw, cv2.COLOR_GRAY2BGR)
# Make a copy of image to draw candidate points
marker = im.copy()
for p in candidates:
cv2.circle(marker, (p[0],p[1]),
h//25, color=(50, 100, 200),thickness=4)
# Draw lines
cv2.line(im, pt1=intArr((xA, yA)), pt2=intArr((xB, yB)),
color=(255, 0, 100), thickness=4, lineType=cv2.LINE_AA)
cv2.line(bw, pt1=intArr(c[0]), pt2=intArr(c[1]),
color=(100, 0, 255), thickness=4, lineType=cv2.LINE_AA)
# Save final output
top = np.hstack((org, marker))
btm = np.hstack((bw, im))
cv2.imwrite(pth+'/out.png', np.vstack((top, btm)))
I am trying to do image classification task and want to make sure my input data all have the same orientation.
The code bellow did not match all the images to the same directions and some flipped wrongly.
I will be thankful if anyone can help me with this matter, Thank you
original image 1
original image 2
import cv2
import numpy as np
import matplotlib.pyplot as plt
def getSubImage(rect, image):
center, size, theta = rect
center, size = tuple(map(int, center)), tuple(map(int, size))
M = cv2.getRotationMatrix2D( center, theta, 1)
dst = cv2.warpAffine(image, M, src.shape[:2])
out = cv2.getRectSubPix(dst, size, center)
return out
image = cv2.imread('orginal1.png')
im_bw = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
blur = cv2.GaussianBlur(im_bw, (5,5), 0)
im_bw = cv2.Canny(blur, 10, 90)
contours, hierarchy = cv2.findContours(im_bw, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
rect = cv2.minAreaRect(contours[0])
out = getSubImage(rect, image)
cv2.imwrite('rotedorginal1.jpg', out)
plt.imshow(out)
plt.show()
You just need to build the matrix that rotates the image the right angle using cv2.getRotationMatrix2D and applying the operation with the matrix using cv2.warpAffine:
(x, y), (w, h), angle = cv2.minAreaRect(contours[0])
result = cv2.warpAffine(image, cv2.getRotationMatrix2D((image.shape[1]//2, image.shape[0]//2), angle-90, 1), (image.shape))
The code above transforms this image:
into this one:
I'm putting together an image processing tool to follow the deformation of a part using images. The part has rectangular markers that get detected with image segmentation and cv2.findContours function. Contour centers are then used to calculate distances and to bend radiuses. Everything seems to work fine, but I found out that the contours aren't sorted how I would like to sort them when reviewing results.
The part is repeatedly bent, and the contours are positioned in a circle.
I found this article that describes the sorting horizontally and vertically:
https://www.pyimagesearch.com/2015/04/20/sorting-contours-using-python-and-opencv/
Does anyone have any idea how to sort the contours in a clockwise direction?
The code is below.
import os
import exifread
import cv2
import numpy as np
import scipy
from matplotlib import pyplot as plt
import imutils
import pandas as pd
#---------- INPUT ----------
# Define the image filename
img_filename = 'frame397.jpg'
img_path = img_filename
# Define values for cropping
x = 0
y = 200
w = 1200
h = 800
# Define color values for segmentation
# the values can be probed with GIMP
h1 = 0
s1 = 70
v1 = 120
h2 = 255
s2 = 255
v2 = 255
red_lower = np.array([h1,s1,v1])
red_upper = np.array([h2,s2,v2])
# Define desired area size
# desired area size is pixel count - use GIMP for probe
s1 = 500
s2 = 10000
#---------- PROCESS IMAGES ----------
# Create an empty dataframe for storing results
# in shape of (image_name,time,angle,angle_smooth,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11)
# Define the results dataframe shape and column names
results_df = pd.DataFrame(columns=['image_name','alpha','r1','r2','r3','r4','r5','r6','r7','r8','r9','r10','r11',
'center_dist1', 'center_dist2','center_dist3','center_dist4',
'center_dist5','center_dist6','center_dist7','center_dist8',
'center_dist9','center_dist10','center_dist11'])
# Open image, make it black and white and find contours
img = cv2.imread(img_path)
crop = img[y:y+h, x:x+w]
blur = cv2.blur(crop,(2,2))
hsv = cv2.cvtColor(blur,cv2.COLOR_BGR2HSV)
mask = cv2.inRange(hsv, red_lower, red_upper)
mask_copy = mask.copy()
cnts = cv2.findContours(mask_copy,cv2.RETR_LIST,cv2.CHAIN_APPROX_SIMPLE)
cnts = imutils.grab_contours(cnts)
#print cnts
x = []
y = []
# Loop through contours, calculate the centers and prepare the
#contours and contour centers display
#define the font for the text on the image
font = cv2.FONT_HERSHEY_SIMPLEX
for cnt in cnts:
area = cv2.contourArea(cnt)
moment = cv2.moments(cnt)
if s1<area<s2:
print area
c_x = int(moment["m10"]/moment["m00"])
c_y = int(moment["m01"]/moment["m00"])
#draw contours
cv2.drawContours(crop, cnt, -1, (0,255,0),3)
#draw a circle in the center of every contour, -1 is for thickness, this means
#that the cirlce will get filled in
cv2.circle(crop, (c_x,c_y), 10, (0,255,0),-1)
#display center coordinates on the image
string = str(c_x) + ',' + str(c_y)
cv2.putText(crop,string,(c_x,c_y),font,0.5,(255,255,255),2)
x.append(float(c_x))
y.append(float(c_y))
print (c_x, c_y)
print x
print y
# Display image
cv2.namedWindow('Contours', cv2.WINDOW_NORMAL)
cv2.resizeWindow('Contours', 1200,900)
cv2.imshow('Contours', crop)
# Wait for windows closing
cv2.waitKey() & 0xFF
cv2.destroyAllWindows
Image is here:
I used openCV's minEnclosingCircle to "fit" a circle to the points (it's not actually a fit, but it's good enough for finding a point inside the curvature of the markers). Marking each contour with the angle from its centroid to the circle's center gave me a set of angles that I could sort with.
import cv2
import numpy as np
import math
# 2d distance
def dist2D(one, two):
dx = one[0] - two[0];
dy = one[1] - two[1];
return math.sqrt(dx*dx + dy*dy);
# angle between three points (the last point is the middle)
def angle3P(p1, p2, p3):
# get distances
a = dist2D(p3, p1);
b = dist2D(p3, p2);
c = dist2D(p1, p2);
# calculate angle // assume a and b are nonzero
# (law of cosines)
numer = c**2 - a**2 - b**2;
denom = -2 * a * b;
if denom == 0:
denom = 0.000001;
rads = math.acos(numer / denom);
degs = math.degrees(rads);
# check if past 180 degrees
if p1[1] > p3[1]:
degs = 360 - degs;
return degs;
# load image
img = cv2.imread("slinky.jpg");
# rescale
scale = 0.5;
h, w = img.shape[:2];
h = int(h * scale);
w = int(w * scale);
img = cv2.resize(img, (w,h));
# change color space
lab = cv2.cvtColor(img, cv2.COLOR_BGR2LAB);
l,a,b = cv2.split(lab);
# threshold
thresh = cv2.inRange(a, 140, 255);
# get rid of little dots
kernel = np.ones((3,3),np.uint8)
thresh = cv2.erode(thresh,kernel,iterations = 1);
thresh = cv2.dilate(thresh,kernel, iterations = 1);
# contours
_, contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE);
# get centroids
centroids = [];
centers = [];
for con in contours:
m = cv2.moments(con);
cx = int(m['m10'] / m['m00']);
cy = int(m['m01'] / m['m00']);
centers.append([cx, cy]);
centroids.append([[cx, cy], con]);
img = cv2.circle(img, (cx, cy), 10, (0,0,255), -1);
# find circle around points
# NOTE: this doesn't "fit" a circle to the points
# I'm just using this to find a "good enough" center
# that's in the direction of the curve
numped = np.array(centers);
(x, y), radius = cv2.minEnclosingCircle(numped);
img = cv2.circle(img, (int(x), int(y)), int(radius), (255,0,0), 2);
middle = [x,y];
offshoot = [x + 100, y];
# get angles
angles = [];
for cen in centroids:
center, contour = cen;
angle = angle3P(center, offshoot, middle);
angles.append([angle, center, contour]);
# sort by angle
final = sorted(angles, key = lambda a: a[0], reverse = True);
# pull out just the contours
contours = [clump[2] for clump in final];
# draw contours in order
marked = img.copy();
counter = 0;
for con in contours:
cv2.drawContours(marked, [con], -1, (0, 255, 0), 2);
cv2.imshow("marked", marked);
cv2.imwrite("marking_seq/" + str(counter) + ".png", marked);
counter += 1;
cv2.waitKey(0);
# show
cv2.imshow("orig", img);
cv2.imshow("a", a);
cv2.imshow("thresh", thresh);
cv2.waitKey(0);
In my image processing project, I have already obtained a masked image (black-and-white image) and its contours using the cv.findContours function. My goal now is to create an algorithm that can draw a middle line for this contour. The masked image and its contour are shown in the following images.
Masked image:
Contour:
In my imagination, for that contour, I would like to create a middle line which is near horizontal. I have manually marked my ideal middle line in red. Please check the following image for the red middle line that I have mentioned.
Contour with the middle line:
It is noticeable that my ultimate goal is to find the tip point that I have marked in yellow. If you have other ideas that can directly find the yellow tip point, please also let me know. For finding the yellow tip point, I have tried two approaches cv.convexHull and cv.minAreaRect, but the issue is the robustness. I made these two approaches worked for some images but for some other images in my dataset, they are not working very well. Therefore, I think to find the middle line might be a good approach that I can try.
I believe you're trying to determine the contour's center of gravity and orientation. We can easily do this using Central Moments. More info on that here.
The code below generates this plot. Is this the result you wanted?
# Determine contour
img = cv2.imread(img_file, cv2.IMREAD_GRAYSCALE)
img_bin = (img>128).astype(np.uint8)
contours, _ = cv2.findContours(img_bin, mode=cv2.RETR_EXTERNAL, method=cv2.CHAIN_APPROX_NONE)
# Determine center of gravity and orientation using Moments
M = cv2.moments(contours[0])
center = (int(M["m10"] / M["m00"]), int(M["m01"] / M["m00"]))
theta = 0.5*np.arctan2(2*M["mu11"],M["mu20"]-M["mu02"])
endx = 600 * np.cos(theta) + center[0] # linelength 600
endy = 600 * np.sin(theta) + center[1]
# Display results
plt.imshow(img_bin, cmap='gray')
plt.scatter(center[0], center[1], marker="X")
plt.plot([center[0], endx], [center[1], endy])
plt.show()
My goal right now is to create an algorithm that can draw a middle line for this contour.
If you detect the upper and lower bounds of your horizontal-lines, then you can calculate the middle-line coordinates.
For instance:
Middle-line will be:
If you change the size to the width of the image:
Code:
import cv2
img = cv2.imread("contour.jpg")
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
(h, w) = img.shape[:2]
x1_upper = h
x1_lower = 0
x2_upper = h
x2_lower = 0
y1_upper = h
y1_lower = 0
y2_upper = h
y2_lower = 0
lines = cv2.ximgproc.createFastLineDetector().detect(gray)
for cur in lines:
x1 = cur[0][0]
y1 = cur[0][1]
x2 = cur[0][2]
y2 = cur[0][3]
# upper-bound coords
if y1 < y1_upper and y2 < y2_upper:
y1_upper = y1
y2_upper = y2
x1_upper = x1
x2_upper = x2
elif y1 > y1_lower and y2 > y2_lower:
y1_lower = y1
y2_lower = y2
x1_lower = x1
x2_lower = x2
print("\n\n-lower-bound-\n")
print("({}, {}) - ({}, {})".format(x1_lower, y1_lower, x2_lower, y2_lower))
print("\n\n-upper-bound-\n")
print("({}, {}) - ({}, {})".format(x1_upper, y1_upper, x2_upper, y2_upper))
cv2.line(img, (x1_lower, y1_lower), (x2_lower, y2_lower), (0, 255, 0), 5)
cv2.line(img, (x1_upper, y1_upper), (x2_upper, y2_upper), (0, 0, 255), 5)
x1_avg = int((x1_lower + x1_upper) / 2)
y1_avg = int((y1_lower + y1_upper) / 2)
x2_avg = int((x2_lower + x2_upper) / 2)
y2_avg = int((y2_lower + y2_upper) / 2)
cv2.line(img, (0, y1_avg), (w, y2_avg), (255, 0, 0), 5)
cv2.imshow("result", img)
cv2.waitKey(0)
cv2.destroyAllWindows()
I beleive skeleton is what you are looking for.
import cv2
import timeit
img = cv2.imread('Ggh8d - Copy.jpg',0)
s = timeit.default_timer()
thinned = cv2.ximgproc.thinning(img, thinningType = cv2.ximgproc.THINNING_ZHANGSUEN)
e = timeit.default_timer()
print(e-s)
cv2.imwrite("thinned1.png", thinned)
if smooth the edge a little bit
Actually the line will not torch the yellow point, since the algorithm have to check distance from edges, yellow point is located on the edge.
Here is another way to do that by computing the centerline of the rotated bounding box about your object in Python/OpenCV.
Input:
import cv2
import numpy as np
# load image
img = cv2.imread("blob_mask.jpg")
# convert to gray
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# threshold the grayscale image
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY)[1]
# get coordinates of all non-zero pixels
# NOTE: must transpose since numpy coords are y,x and opencv uses x,y
coords = np.column_stack(np.where(thresh.transpose() > 0))
# get rotated rectangle from
rotrect = cv2.minAreaRect(coords)
box = cv2.boxPoints(rotrect)
box = np.int0(box)
print (box)
# get center line from box
# note points are clockwise from bottom right
x1 = (box[0][0] + box[3][0]) // 2
y1 = (box[0][1] + box[3][1]) // 2
x2 = (box[1][0] + box[2][0]) // 2
y2 = (box[1][1] + box[2][1]) // 2
# draw rotated rectangle on copy of img as result
result = img.copy()
cv2.drawContours(result, [box], 0, (0,0,255), 2)
cv2.line(result, (x1,y1), (x2,y2), (255,0,0), 2)
# write result to disk
cv2.imwrite("blob_mask_rotrect.png", result)
# display results
cv2.imshow("THRESH", thresh)
cv2.imshow("RESULT", result)
cv2.waitKey(0)
cv2.destroyAllWindows()
Result:
I am working on a text recognition project. There is a chance the text is rotated 180 degrees. I have tried tesseract-ocr on terminal, but no luck. Is there any way to detect it and correct it? An example of the text is shown below.
tesseract input.png output
tesseract input.png - --psm 0 -c min_characters_to_try=10
Warning. Invalid resolution 0 dpi. Using 70 instead.
Page number: 0
Orientation in degrees: 180
Rotate: 180
Orientation confidence: 0.74
Script: Latin
Script confidence: 1.67
One simple approach to detect if text is rotated 180 degrees is to use the observation that text tends to be skewed towards the bottom. Here's the strategy:
Convert image to grayscale
Gaussian blur
Threshold image
Find the top/bottom half ROIs of thresholded image
Count non-zero array elements for each half
Threshold image
Find ROIs of top and bottom half
Next we split the top/bottom sections
With each half we count non-zero array elements using cv2.countNonZero(). We get this
('top', 4035)
('bottom', 3389)
By comparing the values between the two halves, if the top half has more pixels than the bottom half, it is upside down by 180 degrees. If it has less, it is correctly oriented.
Now that we have detected if it is upside down, we can rotate it using this function
def rotate(image, angle):
# Obtain the dimensions of the image
(height, width) = image.shape[:2]
(cX, cY) = (width / 2, height / 2)
# Grab the rotation components of the matrix
matrix = cv2.getRotationMatrix2D((cX, cY), -angle, 1.0)
cos = np.abs(matrix[0, 0])
sin = np.abs(matrix[0, 1])
# Find the new bounding dimensions of the image
new_width = int((height * sin) + (width * cos))
new_height = int((height * cos) + (width * sin))
# Adjust the rotation matrix to take into account translation
matrix[0, 2] += (new_width / 2) - cX
matrix[1, 2] += (new_height / 2) - cY
# Perform the actual rotation and return the image
return cv2.warpAffine(image, matrix, (new_width, new_height))
Rotating the image
rotated = rotate(original_image, 180)
cv2.imshow("rotated", rotated)
which gives us the correct result
This is the pixel result if the image was correctly oriented
('top', 3209)
('bottom', 4206)
Full code
import numpy as np
import cv2
def rotate(image, angle):
# Obtain the dimensions of the image
(height, width) = image.shape[:2]
(cX, cY) = (width / 2, height / 2)
# Grab the rotation components of the matrix
matrix = cv2.getRotationMatrix2D((cX, cY), -angle, 1.0)
cos = np.abs(matrix[0, 0])
sin = np.abs(matrix[0, 1])
# Find the new bounding dimensions of the image
new_width = int((height * sin) + (width * cos))
new_height = int((height * cos) + (width * sin))
# Adjust the rotation matrix to take into account translation
matrix[0, 2] += (new_width / 2) - cX
matrix[1, 2] += (new_height / 2) - cY
# Perform the actual rotation and return the image
return cv2.warpAffine(image, matrix, (new_width, new_height))
image = cv2.imread("1.PNG")
original_image = image.copy()
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blurred = cv2.GaussianBlur(gray, (3,3), 0)
thresh = cv2.threshold(blurred, 110, 255, cv2.THRESH_BINARY_INV)[1]
cv2.imshow("thresh", thresh)
x, y, w, h = 0, 0, image.shape[1], image.shape[0]
top_half = ((x,y), (x+w, y+h/2))
bottom_half = ((x,y+h/2), (x+w, y+h))
top_x1,top_y1 = top_half[0]
top_x2,top_y2 = top_half[1]
bottom_x1,bottom_y1 = bottom_half[0]
bottom_x2,bottom_y2 = bottom_half[1]
# Split into top/bottom ROIs
top_image = thresh[top_y1:top_y2, top_x1:top_x2]
bottom_image = thresh[bottom_y1:bottom_y2, bottom_x1:bottom_x2]
cv2.imshow("top_image", top_image)
cv2.imshow("bottom_image", bottom_image)
# Count non-zero array elements
top_pixels = cv2.countNonZero(top_image)
bottom_pixels = cv2.countNonZero(bottom_image)
print('top', top_pixels)
print('bottom', bottom_pixels)
# Rotate if upside down
if top_pixels > bottom_pixels:
rotated = rotate(original_image, 180)
cv2.imshow("rotated", rotated)
cv2.waitKey(0)
I kind of liked the pytessaract solution.
import cv2
import pytesseract
from scipy.ndimage import rotate as Rotate
def float_convertor(x):
if x.isdigit():
out= float(x)
else:
out= x
return out
def tesseract_find_rotatation(img: str):
img = cv2.imread(img) if isinstance(img, str) else img
k = pytesseract.image_to_osd(img)
out = {i.split(":")[0]: float_convertor(i.split(":")[-1].strip()) for i in k.rstrip().split("\n")}
img_rotated = Rotate(img, 360-out["Rotate"])
return img_rotated, out
usage
img_loc = ""
img_rotated, out = tessaract_find_rotation(img_loc)