Find new (X,Y) after resizing and cropping image - python

I have a image that has to be cropped around a bounding box and resized to 256x256. In my original image I have an number of Points (x,y) that are in the bounding box.
This is my original image with my original coordinates marked:
Heres the cropped result, where the red points are the right x,y and the blue ones are my current result:
Heres how I'm doing it:
import numpy as np
import cv2
def scaleBB(bb, scale):
centerX = (bb[0][0] + bb[1][0]) / 2
centerY = (bb[0][1] + bb[2][1]) / 2
center = (centerX, centerY)
scl_center = (centerX * scale[0], centerY * scale[1])
p1 = scale * (bb[0] - center) + scl_center
p2 = scale * (bb[1] - center) + scl_center
p3 = scale * (bb[2] - center) + scl_center
p4 = scale * (bb[3] - center) + scl_center
return np.array([p1, p2, p3, p4])
def expandBB(scaledBB, size):
bbw = np.abs(scaledBB[0][0] - scaledBB[1][0])
bbh = np.abs(scaledBB[0][1] - scaledBB[2][1])
expandX = (size[0] - bbw) / 2
expandY = (size[1] - bbh) / 2
p1 = scaledBB[0] + (-expandX, -expandY)
p2 = scaledBB[1] + (+expandX, -expandY)
p3 = scaledBB[2] + (+expandX, +expandY)
p4 = scaledBB[3] + (+expandX, +expandY)
return np.array([p1, p2, p3, p4])
def recalculate_joints_points(oldX, oldY, newX, newY, joints):
R_x = newX / oldX
R_y = newY / oldY
new_joints = []
for index, joint in enumerate(joints):
x = joint[0]
y = joint[1]
n_x = round(R_x * x)
n_y = round(R_y * y)
print(R_x, R_y, x, y, n_x, n_y)
new_joints.append([n_x, n_y])
return np.array(new_joints)
def cropAndResizeImage(label, bb):
img_path = "original.jpg"
# downscale
image = cv2.imread(img_path)
# orgSize = image.shape[:2]
label = label
bb = bb
print(bb)
dim = int(256 / 2)
# define the target height of the bounding box
targetHeight = 200.0
w = np.abs(bb[0][0] - bb[1][0])
h = np.abs(bb[0][1] - bb[2][1])
targetScale = targetHeight / h
print(targetScale)
scaledImage = cv2.resize(image, (0, 0), fx=targetScale, fy=targetScale)
scaledBB = scaleBB(bb, (targetScale, targetScale))
cropRegion = expandBB(scaledBB, (256, 256))
print(scaledBB)
print(cropRegion)
startX = int(cropRegion[0][0] + dim)
startY = int(cropRegion[0][1] + dim)
endX = startX + 256 # cropRegion[2][0] + dim
endY = startY + 256 #cropRegion[2][1] + dim
print(startX, startY, endX, endY)
padded_image = np.pad(scaledImage, ((dim, dim), (dim, dim), (0, 0)), mode='constant')
croppedImage = padded_image[startY:endY, startX:endX]
# new label
print(image.shape, croppedImage.shape)
oldWidth = image.shape[1]
oldHeight = image.shape[0]
newWidth = 256 + dim
newHeight = 256 + dim
out_label = recalculate_joints_points(oldWidth, oldHeight, newWidth, newHeight, label)
return [croppedImage, out_label]
def main():
labels = np.array([[1214, 598],
[1169, 424],
[1238, 273],
[1267, 285],
[1212, 453],
[1229, 622],
[1253, 279],
[1173, 114],
[1171, 113],
[1050, 60],
[1106, 143],
[1140, 100],
[1169, 80],
[1176, 148],
[1152, 280],
[1087, 391]])
bb = np.array([[1050, 60],
[1267, 60],
[1267, 622],
[1050, 622]])
img, label = cropAndResizeImage(labels, bb)
for point in label:
print(point)
x,y = point
cv2.circle(img,(int(x),int(y)),5,(255,0,0),-11)
cv2.imshow("cropped", img)
cv2.waitKey()
if __name__ == '__main__':
main()
As far as I understood is to get the new (x,y) you have to calculate the ratio (difference of size in a scale factor) but it still seems off. Any help is appreciated.
EDIT 1:
Using as newHeight/Width just 256 produces this image:
*EDIT 2:
Using solution of #ChrisH its quite perfect but still a little bit off:

Here is a function that will translate directly from the original coordinates into the cropped and scaled coordinates. You can skip all the other functions and transform points directly with this
def getNewCoords(x,y):
bbUpperLeftX = bb[0][0]
bbUpperLeftY = bb[0][1]
bbLowerRightX = bb[2][0]
bbLowerRightY = bb[2][1]
sizeX = bbLowerRightX - bbUpperLeftX
sizeY = bbLowerRightY - bbUpperLeftY
sizeMax = max(sizeX, sizeY)
centerX = (bbLowerRightX + bbUpperLeftX)/2
centerY = (bbLowerRightY + bbUpperLeftY)/2
offsetX = (centerX-sizeMax/2)*256/sizeMax
offsetY = (centerY-sizeMax/2)*256/sizeMax
x = x * 256/sizeMax - offsetX
y = y * 256/sizeMax - offsetY
return (x,y)

Since you define
endX = startX + 256
endY = startY + 256
And make the output image as
croppedImage = padded_image[startY:endY, startX:endX]
Shouldn’t the new width and height be 256? instead you define them as
newWidth = 256 + dim
newHeight = 256 + dim
I think dim is unnecessary here

You can use augmentit to do the task.
pip install augmentit
Documentation
link : https://github.com/sandesha-hegde/augmentit

Related

Draw a bounding box of second class on main image which was cropped to get detection of second class

I have a problem.
I have an object detection model that detects two classes, what I want to do is:
Detect class 1 (say c1) on source image (640x640) Draw bounding box and crop bounding box -> (c1 image) and then resize it to (640x640) (DONE)
Detect class 2 (say c2) on c1 image (640x640) (DONE)
Now I want to draw bounding box of c2 on source image
I have tried to explain it here by visualizing it
how can I do it? please help.
Code:
frame = self.REC.ImgResize(frame)
frame, score1, self.FLAG1, x, y, w, h = self.Detect(frame, "c1")
if self.FLAG1 and x > 0 and y > 0:
x1, y1 = w,h
cv2.rectangle(frame, (x, y), (w, h), self.COLOR1, 1)
c1Img = frame[y:h, x:w]
c1Img = self.REC.ImgResize(c1Img)
ratio = c2Img.shape[1] / float(frame.shape[1])
if ratio > 0.35:
c2Img, score2, self.FLAG2, xN, yN, wN, hN = self.Detect(c1Img, "c2")
if self.FLAG2 and xN > 0 and yN > 0:
# What should be the values for these => (__, __),(__,__)
cv2.rectangle(frame, (__, __), (__, __), self.COLOR2, 1)
I had tried a way which could only solve (x,y) coordinates but width and height was a mess
what I tried was
first found the rate of width and height at which the cropped c1 image increased after resize.
for example
x1 = 329
y1 = 102
h1 = 637
w1 = 630
r_w = 630 / 640 # 0.9843
r_h = 637 / 640 # 0.9953
x2 = 158
y2 = 393
h2 = 499
w2 = 588
new_x2 = 158 * 0.9843 # 156
new_y2 = 389 * 0.9953 # 389
new_x2 = x1 + new_x2
new_y2 = y1 + new_y2
this work to find (x,y)
but I am still trying to find a way to get (w,h) of the bounding box.
EDIT
The complete code is:
import cv2
import random
import numpy as np
import onnxruntime as ort
cuda = False
w = "models/model.onnx"
providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if cuda else ['CPUExecutionProvider']
session = ort.InferenceSession(w, providers=providers)
names = ['face', 'glasses']
colors = {name:[random.randint(0, 255) for _ in range(3)] for name in names}
img = cv2.imread("test.jpg")
def ImgResize(image, width = 640, height = 640, inter = cv2.INTER_CUBIC):
if image is not None:
resized = cv2.resize(image, (width,height), interpolation = inter)
return resized
def Detect(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleup=True, stride=32):
flag = False
w, h = 0, 0
x, y = 0, 0
score = 0
try:
if im is None:
raise Exception(IOError())
shape = im.shape[:2]
if isinstance(new_shape, int):
new_shape = (new_shape, new_shape)
ratio = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
if not scaleup:
ratio = min(ratio, 1.0)
new_unpad = int(round(shape[1] * ratio)), int(round(shape[0] * ratio))
dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]
if auto:
dw, dh = np.mod(dw, stride), np.mod(dh, stride)
dw /= 2
dh /= 2
if shape[::-1] != new_unpad:
im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)
image_ = im.transpose((2, 0, 1))
image_ = np.expand_dims(image_, 0)
image_ = np.ascontiguousarray(image_)
im = image_.astype(np.float32)
im /= 255
outname = [i.name for i in session.get_outputs()]
inname = [i.name for i in session.get_inputs()]
inp = {inname[0]:im}
outputs = session.run(outname, inp)[0]
return im, outputs, ratio, (dw, dh)
except IOError:
print("Invalid Image File")
def Detection(img, c_name):
score = 0
name = ""
a, b, c, d = 0, 0, 0, 0
image_, outputs, ratio, dwdh = Detect(img)
ori_images = [img.copy()]
for batch_id, x0, y0, x1, y1, cls_id, score in outputs:
img = ori_images[int(batch_id)]
box = np.array([x0, y0, x1, y1])
box -= np.array(dwdh * 2)
box /= ratio
box = box.round().astype(np.int32).tolist()
cls_id = int(cls_id)
score = round(float(score), 3)
if score > 0.55:
name = names[cls_id]
if name != c_name:
return img, 0, False, 0, 0, 0, 0, "Could Not Detect"
flag = True
a, b, c, d = tuple(box)
score = round(score * 100, 0)
return img, score, flag, a, b, c, d, name
COLORF = (212, 15, 24)
COLORG = (25, 240, 255)
nameW = "Det"
flagF, flagN = False, False
img = ImgResize(img)
c1_img, score, flagF, x1,y1,w1,h1,name = Detection(img,"face")
print(score, flagF, x1,y1,w1,h1,name)
if flagF:
cv2.rectangle(img, (x1,y1), (w1,h1), COLORF, 1)
cv2.putText(img, name, (x1,y1),cv2.FONT_HERSHEY_PLAIN, 2,COLORF,2)
cv2.imshow("face", img)
c1_img = c1_img[y1:h1,x1:w1]
c1_img_orig = c1_img.copy()
c1_img = ImgResize(c1_img)
c2_img, score, flagG, x2,y2,w2,h2,name = Detection(c1_img,"glasses")
if flagG:
c2_img = c2_img[y2:h2,x2:w2]
cv2.rectangle(c1_img_orig, (x2,y2), (w2,h2), COLORG, 1)
cv2.putText(c1_img_orig, name, (x1,y1),cv2.FONT_HERSHEY_PLAIN, 2,COLORG,2)
cv2.imshow("glasses", c2_img)
x3 = x1 + int(x2 * w1 / 640)
y3 = y1 + int(y2 * h1 / 640)
w3 = int(w2 * w1 / 640)
h3 = int(h2 * h1 / 640)
cv2.rectangle(img, (x3,y3), (w3,h3), COLORG, 1)
cv2.imshow(nameW, img)
cv2.waitKey(0)
cv2.destroyAllWindows()
what this code does is for some images it draws the bounding box as required:
but for other images and in video stream this is what happens:
Here is a complete programming example. Please keep in mind that for cv2.rectangle you need to pass top-left corner and bottom-right corner of the rectangle. As you didn't share ImgResize and Detect I made some assumptions:
import cv2
import numpy as np
COLOR1 = (0, 255, 0)
COLOR2 = (0, 0, 255)
DETECT_c1 = (40, 20, 120, 160)
DETECT_c2 = (20, 120, 160, 40)
RESIZE_x, RESIZE_y = 200, 200
frame = np.zeros((RESIZE_y, RESIZE_x, 3), np.uint8)
x1, y1, w1, h1 = DETECT_c1
c1Img = frame[y1:h1, x1:w1]
cv2.rectangle(frame, (x1, y1), (x1 + w1, y1 + h1), COLOR1, 1)
c1Img = cv2.resize(c1Img, (RESIZE_x, RESIZE_y))
x2, y2, w2, h2 = DETECT_c2
x3 = x1 + int(x2 * w1 / RESIZE_x)
y3 = y1 + int(y2 * h1 / RESIZE_y)
w3 = int(w2 * w1 / RESIZE_x)
h3 = int(h2 * h1 / RESIZE_y)
cv2.rectangle(frame, (x3, y3), (x3 + w3, y3 + h3), COLOR2, 1)
cv2.imwrite('out.png', frame)
Output:
I suggest that you treat your bounding box coordinates relatively.
If I understand correctly, your problem is that you have different referential. One way to bypass that is to normalize at each step your bbox coordinates.
c1_box is relative to your image, so :
c1_x = x/640
c1_y = y/640
When you crop, you can record the ratio values between main image and your cropped object.
image_vs_c1_x = c1_x / img_x
image_vs_c1_y = c1_y / img_y
Then you need to multiply your c2 bounding box coordinates by those ratios.
this is how I was able to solve it.
rwf = round((w1-x1)/640, 2)
rhf = round((h1-y1)/640, 2)
x3 = int(x2*rwf )
y3 = int(y2*rhf)
w3 = int(w2*rwf)
h3 = int(h2*rhf)
# these are the top right and bottom left cooridinates
x4 = x1 + x3
y4 = y1 + y3
w4 = x1 + w3
h4 = y1 + h3

How do i calculate FOV?

So I have a pan-tilt system with an airbrush on top, the pressure is quite strong so that I can place the robot at a distance of at least 1.5 meters. I currently have normalized coordinates XY that I can visualize on my camera like this
Now I want to translate those coordinates to a real canvas and allow the pan-tilt to point towards them and eventually spray. The two servos have 0 to 180 degrees but the airbrush is positioned on top of the tilt at 90. So if we consider that the pan and tilt it's at 90 the airbrush points perpendicularly to the real canvas. I am following along with this answer https://stackoverflow.com/a/44269392/13475623
lx = (2 * canvasXpoint / canvasW - 1) * np.tan(fovX / 2)
ly = (-2 * canvasYpoint / canvasH + 1) * np.tan(fovY / 2)
lz = 100
tx = np.cos(pan) * np.cos(tilt) * lx - np.cos(tilt) * np.sin(pan) * ly - np.sin(tilt) * lz
ty = np.sin(pan) * lx + np.cos(pan) * ly
tz = np.cos(pan) * np.sin(tilt) * lx - np.sin(pan) * np.sin(tilt) * ly + np.cos(tilt) * lz
tilt = abs(np.arctan2(tz, tx) )*180 /np.pi
pan = abs(np.arcsin(ty / np.sqrt(tx*tx + ty*ty + tz*tz))) *180 /np.pi
he specifically ask to use fovx and fovy, but i have no idea how to place the, is fovx and fovy the same as the centre of the canvas plus z? which gives the robot position?
this is the entire code:
import numpy as np
import random
import cv2
rect = (0,0,0,0)
startPoint = False
endPoint = False
def on_mouse(event,x,y,flags,params):
global rect,startPoint,endPoint
# get mouse click
if event == cv2.EVENT_LBUTTONDOWN:
if startPoint == True and endPoint == True:
startPoint = False
endPoint = False
rect = (0, 0, 0, 0)
if startPoint == False:
rect = (x, y, 0, 0)
startPoint = True
elif endPoint == False:
rect = (rect[0], rect[1], x, y)
endPoint = True
cap = cv2.VideoCapture(0)
waitTime = 50
#Reading the first frame
(grabbed, frame) = cap.read()
# create a numpy array with coordinates, dtype= np.uint32
points = np.array([
[0.3791454386035252, 0.5089704263689607], [0.4983802415059109, 0.4865878212776629], [0.4191061040406586, 0.4890729258496474], [0.48898375092596835, 0.6904554156787046], [0.41117320428962, 0.6855686449973655], [0.48969027909831686, 0.8806483247709954], [0.4096722346480175, 0.8725103831012889], [0.45146556567120294, 0.216198952126905], [0.6304876750748412, 0.1994776546413951], [0.6406976694235704, 0.1861724655606558], [0.6199918357274865, 0.18561325370105788], [0.6525936779272056, 0.201758477474465], [0.6013198509477334, 0.20041966221830415], [0.6683290543094758, 0.29699362669473495], [0.5645238852104717, 0.3113999818240313], [0.6545654774178274, 0.49620430200480303], [0.5898070573107588, 0.49659117464889346], [0.6592482998457356, 0.6834740545963035], [0.5840631897032319, 0.6828527784533074], [0.6408640096147972, 0.8299668209407426], [0.5829181988101784, 0.8173392725052692], [0.6197806290284397, 0.30050890733295843], [0.8252923243905792, 0.23409826375167195], [0.835683753646597, 0.2185883280832016], [0.8131540844750428, 0.21904862499113367], [0.8506741192799976, 0.2279991219170517], [0.7959142481709739, 0.22725381616179272], [0.8733570624656342, 0.3256920048853457], [0.7652207837892534, 0.3239122878098148], [0.893097550288673, 0.44273291363944955], [0.7346131146711571, 0.4430594635999311], [0.902709244982588, 0.5343829401117663], [0.8520378940615836, 0.543215423861057], [0.7842126810888624, 0.5430821914771806], [0.8496391467917583, 0.7170072127563635], [0.7934480818135997, 0.7157067918591926], [0.8415470663986131, 0.8790693270711738], [0.7969306654944098, 0.8786970205344115], [0.8191112469834433, 0.32444646417244244], [0.4544294400182521, 0.10802826838116084], [0.4652589441860643, 0.09470838455219986], [0.44184697991125976, 0.09401847354478254], [0.4784184639521475, 0.1113126386155105], [0.42270482157448985, 0.10977393520172159], [0.5101597581790689, 0.21719483055184013], [0.39370939342390643, 0.21645334444157344], [0.3703281257159549, 0.34746637604116004]], np.float64)
while(cap.isOpened()):
(grabbed, frame) = cap.read()
cv2.namedWindow('frame')
cv2.setMouseCallback('frame', on_mouse)
panarr=[]
tiltarr=[]
#drawing rectangle
if startPoint == True:
cv2.circle(frame, (rect[0], rect[1]), 2,(255, 0, 255), 2)
if startPoint == True and endPoint == True:
cv2.rectangle(frame, (rect[0], rect[1]), (rect[2], rect[3]), (255, 0, 255), 2)
w = rect[2] - rect[0]
h = rect[3] - rect[1]
canvasW = 120
canvasH = 90
distanceZ = 100
#position machine
screenXCenter = (rect[0] + rect[2]) / 2
screenYCenter = (rect[1] + rect[3]) / 2
pan = tilt = 90
servoXcentrepoint = canvasW / 2
servoYcentrepoint = canvasH / 2
# fov
fovX = np.arctan((canvasW * canvasH )/distanceZ)
for x, y in points:
screenX = (x * w) + rect[0] #- differencesqrx
screenY = (y * h) + rect[1] #- differencesqry
cv2.circle(frame,(int(screenXCenter),int(screenYCenter)),2,(255, 255, 0),2)
cv2.circle(frame,(int(screenX),int(screenY)),2,(255, 45, 250),2)
canvasXpoint = (x * canvasW)
canvasYpoint = (y * canvasH)
# dx = canvasXpoint - servoXcentrepoint
# dy = canvasYpoint - servoYcentrepoint
# pan = abs(np.arctan((distanceZ/dx))) * 180/np.pi
# tilt = abs(np.arctan(distanceZ/dy)) * 180/np.pi
lx = (2 * canvasXpoint / canvasW - 1) * np.tan(servoXcentrepoint / 2)
ly = (-2 * canvasYpoint / canvasH + 1) * np.tan(servoYcentrepoint / 2)
lz = 10
tx = np.cos(pan) * np.cos(tilt) * lx - np.cos(tilt) * np.sin(pan) * ly - np.sin(tilt) * lz
ty = np.sin(pan) * lx + np.cos(pan) * ly
tz = np.cos(pan) * np.sin(tilt) * lx - np.sin(pan) * np.sin(tilt) * ly + np.cos(tilt) * lz
tilt = abs(np.arctan2(tz, tx) )*180 /np.pi
pan = abs(np.arcsin(ty / np.sqrt(tx*tx + ty*ty + tz*tz))) *180 /np.pi
tiltarr.append(int(tilt))
panarr.append(int(pan))
# a = [x,y]
cv2.imshow('frame',frame)
if cv2.waitKey(1)==ord('q'):
break
print(tiltarr)
print(panarr)
cap.release()
cv2.destroyAllWindows()
The ultimate goal is to determine the angle for the pan and tilt based on each point

How to detect the number plate with the most black pixels. If there is another method pls comment

Im trying to detect the ROI with the most black pixels for the license plate
Below are the code on the number plate. It is based of the question of How to recognize vehicle license / number plate (ANPR) from an image?.
I modified it a bit b
import cv2
import numpy as np
import imutils
import sys
import glob
import math
import time
import os
def validate_contour(contour, img, aspect_ratio_range, area_range):
rect = cv2.minAreaRect(contour)
img_width = img.shape[1]
img_height = img.shape[0]
box = cv2.boxPoints(rect)
box = np.int0(box)
X = rect[0][0]
Y = rect[0][1]
angle = rect[2]
width = rect[1][0]
height = rect[1][1]
angle = (angle + 180) if width < height else (angle + 90)
output = False
if (width > 0 and height > 0) and ((width < img_width / 2.0) and (height < img_width / 2.0)):
aspect_ratio = float(width) / height if width > height else float(height) / width
if (aspect_ratio >= aspect_ratio_range[0] and aspect_ratio <= aspect_ratio_range[1]):
if ((height * width > area_range[0]) and (height * width < area_range[1])):
box_copy = list(box)
point = box_copy[0]
del (box_copy[0])
dists = [((p[0] - point[0]) ** 2 + (p[1] - point[1]) ** 2) for p in box_copy]
sorted_dists = sorted(dists)
opposite_point = box_copy[dists.index(sorted_dists[1])]
tmp_angle = 90
if abs(point[0] - opposite_point[0]) > 0:
tmp_angle = abs(float(point[1] - opposite_point[1])) / abs(point[0] - opposite_point[0])
tmp_angle = rad_to_deg(math.atan(tmp_angle))
if tmp_angle <= 45:
output = True
return output
def deg_to_rad(angle):
return angle * np.pi / 180.0
def rad_to_deg(angle):
return angle * 180 / np.pi
def enhance(img):
kernel = np.array([[-1, 0, 1], [-2, 0, 2], [1, 0, 1]])
return cv2.filter2D(img, -1, kernel)
img = cv2.imread('13.jpg')
input_image = imutils.resize(img, width=500)
raw_image = np.copy(input_image)
img_original = input_image.copy()
img_mask = input_image.copy()
lic_plate = input_image.copy()
contoured = input_image.copy()
gray = cv2.cvtColor(img_original, cv2.COLOR_BGR2GRAY)
gray = enhance(gray)
gray = cv2.GaussianBlur(gray, (5, 5), 0)
gray = cv2.Sobel(gray, -1, 1, 0)
h, sobel = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
se = cv2.getStructuringElement(cv2.MORPH_RECT, (16, 4))
binary = cv2.morphologyEx(sobel, cv2.MORPH_CLOSE, se)
ed_img = np.copy(binary)
contours, _ = cv2.findContours(binary, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
for contour in contours:
aspect_ratio_range = (2.2, 12) # minimum 2.2 , max 12
area_range = (500, 18000)
rectangles = cv2.minAreaRect(contour) # rect = ((center_x,center_y),(width,height),angle)
boxes = cv2.boxPoints(rectangles) # Find four vertices of rectangle from above rect
boxes = np.int0(boxes) # Round the values and make it integers
# print(box)
all_area = cv2.drawContours(contoured, [boxes], 0, (127, 0, 255), 2)
if validate_contour(contour, binary, aspect_ratio_range, area_range):
rect = cv2.minAreaRect(contour)
box = cv2.boxPoints(rect)
box = np.int0(box)
Xs = [i[0] for i in box]
Ys = [i[1] for i in box]
x1 = min(Xs)
x2 = max(Xs)
y1 = min(Ys)
y2 = max(Ys)
angle = rect[2]
if angle < -45:
angle += 90
W = rect[1][0]
H = rect[1][1]
aspect_ratio = float(W) / H if W > H else float(H) / W
center = ((x1 + x2) / 2, (y1 + y2) / 2)
size = (x2 - x1, y2 - y1)
M = cv2.getRotationMatrix2D((size[0] / 2, size[1] / 2), angle, 1.0);
tmp = cv2.getRectSubPix(ed_img, size, center)
tmp = cv2.warpAffine(tmp, M, size)
TmpW = H if H > W else W
TmpH = H if H < W else W
tmp = cv2.getRectSubPix(tmp, (int(TmpW), int(TmpH)), (size[0] / 2, size[1] / 2))
__, tmp = cv2.threshold(tmp, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
sortedplate = cv2.drawContours(img_mask, [box], 0, (127, 0, 255), 2)
white_pixels = 0
for x in range(tmp.shape[0]):
for y in range(tmp.shape[1]):
if tmp[x][y] == 255:
white_pixels += 1
edge_density = float(white_pixels) / (tmp.shape[0] * tmp.shape[1])
tmp = cv2.getRectSubPix(raw_image, size, center)
tmp = cv2.warpAffine(tmp, M, size)
TmpW = H if H > W else W
TmpH = H if H < W else W
tmp = cv2.getRectSubPix(tmp, (int(TmpW), int(TmpH)), (size[0] / 2, size[1] / 2))
# getRectSubPix( = Retrieves a pixel rectangle from an image with sub-pixel accuracy.
if edge_density > 0.5:
cv2.drawContours(input_image, [box], 0, (127, 0, 255), 2)
cv2.imshow('original', img_original)
cv2.imshow('sobel', sobel)
cv2.imshow('binary', binary)
cv2.imshow("all contours", all_area)
cv2.imshow("sorted", sortedplate)
cv2.imshow("detected", lic_plate)
cv2.waitKey(0)
The image The number plate that needed to detect
Example of images lp1 lp2 lp3 lp4

How to make a single bounding box

I am doing text-detection using OCR and in my program, if a photo has multiple text, it makes multiple bounding boxes. I was wondering if there was a way to combine all the boxes and make a new cropped output image of the text. PS: I'm using the EAST deep learning text. The problem is it detects the texts in the image but if the texts are a little further apart, it creates 2 or 3 images based on that original image and I am trying to look for a way for it to combine these 2-3 crops into 1.
(newW, newH) = (args["width"], args["height"])
rW = W / float(newW)
rH = H / float(newH)
image = cv2.resize(image, (newW, newH))
(H, W) = image.shape[:2]
layerNames = [
"feature_fusion/Conv_7/Sigmoid",
"feature_fusion/concat_3"]
print("[INFO] loading EAST text detector...")
net = cv2.dnn.readNet(args["east"])
blob = cv2.dnn.blobFromImage(image, 1.0, (W, H),
(123.68, 116.78, 103.94), swapRB=True,
crop=False)
start = time.time()
net.setInput(blob)
(scores, geometry) = net.forward(layerNames)
end = time.time()
print("[INFO] text detection took {:.6f} seconds".format(end - start))
(numRows, numCols) = scores.shape[2:4]
rects = []
confidences = []
for y in range(0, numRows):
scoresData = scores[0, 0, y]
xData0 = geometry[0, 0, y]
xData1 = geometry[0, 1, y]
xData2 = geometry[0, 2, y]
xData3 = geometry[0, 3, y]
anglesData = geometry[0, 4, y]
for x in range(0, numCols):
if scoresData[x] < args["min_confidence"]:
continue
(offsetX, offsetY) = (x * 4.0, y * 4.0)
angle = anglesData[x]
cos = np.cos(angle)
sin = np.sin(angle)
h = xData0[x] + xData2[x]
w = xData1[x] + xData3[x]
endX = int(offsetX + (cos * xData1[x]) + (sin * xData2[x]))
endY = int(offsetY - (sin * xData1[x]) + (cos * xData2[x]))
startX = int(endX - w)
startY = int(endY - h)
rects.append((startX, startY, endX, endY))
confidences.append(scoresData[x])
boxes = non_max_suppression(np.array(rects), probs=confidences)
for number, (startX, startY, endX, endY) in enumerate(boxes):
startX = int(startX * rW)
startY = int(startY * rH)
endX = int(endX * rW)
endY = int(endY * rH)
Final = orig[startY:endY, startX:endX]
cv2.imshow("Text Detection", Final)
cv2.waitKey(0)
cv2.imwrite("crop{}.jpg".format(number), Final)

image tiling in loops using Python OpenCV

Python noob needs some help guys! Can someone show me how to rewrite my code using loops? Tried some different syntaxes but did not seem to work!
img = cv2.imread("C://Users//user//Desktop//research//images//Underwater_Caustics//set1//set1_color_0001.png")
tile11=img[1:640, 1:360]
cv2.imwrite('tile11_underwater_caustic_set1_0001.png', tile11)
tile12=img[641:1280, 1:360]
cv2.imwrite('tile12_underwater_caustic_set1_0001.png', tile12)
tile13=img[1281:1920, 1:360]
cv2.imwrite('tile13_underwater_caustic_set1_0001.png', tile13)
tile21=img[1:640, 361:720]
cv2.imwrite('tile21_underwater_caustic_set1_0001.png', tile21)
tile22=img[641:1280, 361:720]
cv2.imwrite('tile22_underwater_caustic_set1_0001.png', tile22)
tile23=img[1281:1920, 361:720]
cv2.imwrite('tile23_underwater_caustic_set1_0001.png', tile23)
tile31=img[1:640, 721:1080]
cv2.imwrite('tile31_underwater_caustic_set1_0001.png', tile31)
tile32=img[641:1280, 721:1080]
cv2.imwrite('tile32_underwater_caustic_set1_0001.png', tile32)
tile33=img[1281:1920, 721:1080]
cv2.imwrite('tile33_underwater_caustic_set1_0001.png', tile33)
As you can see, the image will be cut into 9 equal-size pieces, how to write it using loops?
This won't produce the same result like your code, but will give you some ideas:
img = cv2.imread('sample.jpg')
numrows, numcols = 4, 4
height = int(img.shape[0] / numrows)
width = int(img.shape[1] / numcols)
for row in range(numrows):
for col in range(numcols):
y0 = row * height
y1 = y0 + height
x0 = col * width
x1 = x0 + width
cv2.imwrite('tile_%d%d.jpg' % (row, col), img[y0:y1, x0:x1])
I needed image tiling where last parts or edge tiles are required to be full tile images.
Here is the code I use:
import cv2
import math
import os
Path = "FullImage.tif";
filename, file_extension = os.path.splitext(Path)
image = cv2.imread(Path, 0)
tileSizeX = 256;
tileSizeY = 256;
numTilesX = math.ceil(image.shape[1]/tileSizeX)
numTilesY = math.ceil(image.shape[0]/tileSizeY)
makeLastPartFull = True; # in case you need even siez
for nTileX in range(numTilesX):
for nTileY in range(numTilesY):
startX = nTileX*tileSizeX
endX = startX + tileSizeX
startY = nTileY*tileSizeY
endY = startY + tileSizeY;
if(endY > image.shape[0]):
endY = image.shape[0]
if(endX > image.shape[1]):
endX = image.shape[1]
if( makeLastPartFull == True and (nTileX == numTilesX-1 or nTileY == numTilesY-1) ):
startX = endX - tileSizeX
startY = endY - tileSizeY
currentTile = image[startY:endY, startX:endX]
cv2.imwrite(filename + '_%d_%d' % (nTileY, nTileX) + file_extension, currentTile)
This is for massive image reconstruction using part of flowfree his code. By using a folder of sliced images in the same area the script is, you can rebuild the image. I hope this helps.
import cv2
import glob
import os
dir = "."
pathname = os.path.join(dir, "*" + ".png")
images = [cv2.imread(img) for img in glob.glob(pathname)]
img = images[0]
numrows, numcols = 1,1
height = int(img.shape[0] / numrows)
width = int(img.shape[1] / numcols)
for row in range(numrows):
for col in range(numcols):
y0 = row * height
y1 = y0 + height
x0 = col * width
x1 = x0 + width
cv2.imwrite('merged_img_%d%d.jpg' % (row, col), img[y0:y1, x0:x1])

Categories