I'm pasting a randomly generated barcode on a background image.
This barcode has been randomly rotated, skewed, and scaled.
Then, this barcode is randomly placed onto the background image.
I'm trying to find out the coordinates of the actual barcode, ignoring the expanded black mask.
I'm a beginner in matrices and image manipulation so any help, especially in the math, would be appreciated.
This is where I generate the barcode, using pdf417gen library, along with the coordinates of the barcode.
import numpy as np
import os
import random
import sys
from pdf417gen import encode, render_image
from PIL import Image
def generate_barcode(self):
barcode = encode("random text data", columns=5, security_level=5)
scale = 5
ratio = 3
padding = 5
barcode_image = render_image(barcode, scale=scale, ratio=ratio, padding=padding)
barcode_coords = np.array([
[(barcode_image.width - padding) / float(barcode_image.width), (barcode_image.height - padding) / float(barcode_image.height)],
[padding / float(barcode_image.width), (barcode_image.height - padding) / float(barcode_image.height)],
[padding / float(barcode_image.width), padding / float(barcode_image.height)],
[(barcode_image.width - padding) / float(barcode_image.width), padding / float(barcode_image.height)]
])
return (barcode_coords, barcode_image)
Once I have the barcode's image and coordinate, I do the following.
transform the barcode's image
attempt to match the coordinates with the image's transformation
paste the image onto a background image
then draw a red outline using the coordinates
The red outline should outline the barcode's image.
Here's where I transform the barcode image and paste it to the background image.
def composite_images(self, background_image, barcode_coords, barcode_image):
coords = barcode_coords
barcode = barcode_image
# instantiating the transformation variables
scale = random.randrange(4, 50) / 100.0
size = int( min(background_image.size) * scale) # background_image.size returns (width, height)
barcode = barcode.resize((int(size * 2.625), size)) # width:height ratio is 2.625:1
rotation = random.randrange(0, 360)
xstretch = random.randrange(0, 100) / 100.0
ystretch = random.randrange(0, 100) / 100.0
xshear = random.randrange(0, 100) / 100.0
yshear = random.randrange(0, 100) / 100.0
# set affine transform on the barcode coordinates
affine_transform = get_affine_transform(rotation, xstretch, ystretch, xshear, yshear)
coords = transform_coords(coords, affine_transform, True)
expand_mask = transform_coords(np.array([ # shifts expand mask based on transformation
[0.0, 0.0],
[float(size * 2.625), 0.0],
[float(size * 2.625), float(size)],
[0.0, float(size)]
]), mat, False)
minx = min(expand_mask[:,0])
maxx = max(expand_mask[:,0])
miny = min(expand_mask[:,1])
maxy = max(expand_mask[:,1])
mat_inv = np.linalg.inv(np.array([ # the inverse matrix
[mat[0,0], mat[0,1], -minx],
[mat[1,0], mat[1,1], -miny],
[0,0,1.0]
]))
image_matrix = (mat_inv[0,0], mat_inv[0,1], mat_inv[0,2],
mat_inv[1,0], mat_inv[1,1], mat_inv[1,2])
new_size = (int(maxx-minx), int(maxy-miny))
# set affine transform on the barcode image using data from coordinates affine transformation
barcode = barcode.transform(new_size, method=Image.AFFINE, data=image_matrix)
# paste the barcode image onto a random position on background image
region_x = random.randrange(0, background_image.width - size)
region_y = random.randrange(0, background_image.height - size)
background_image.paste(barcode, (region_x, region_y))
coords *= scale
coords += [region_x / float(background_image.width), region_y / float(background_image.height)]
return(coords, background_image)
def get_affine_transform(self, rotation, xstretch, ystretch, xshear, yshear):
theta = -(rotation / 180.0) * np.pi
return np.array([
[np.cos(theta) * xstretch, -np.sin(theta) * xshear],
[np.sin(theta) * ystretch, np.cos(theta) * yshear]
])
def transform_coords(self, coords, affine_transform, center):
if center:
coords -= (.5, .5) # center on origin
coords = np.dot(coords, affine_transform.T)
if center:
coords += (.5, .5) # reset centering
return coords
Now I draw the red outline using the coords and image (with pasted barcode) returned from composite_images().
def draw_red_outline(self, box_coords, image):
outline = box_coords * [image.width, image.height]
outline = outline.astype(int)
outline = tuple(map(tuple, outline))
draw = ImageDraw.Draw(image)
draw.poly(outline, outline=(255,0,0,0))
del draw
image.show()
I'm unsure as to where my math is going wrong.
To get coordinates of transformed points you can do the following:
After getting transformation matrix:
transformed_img = cv2.warpPerspective(source_img, m, image_shape)
You apply it to image:
transformed_img = cv2.warpPerspective(source_img, m, image_shape)
and transformed image contains result with coordinates which you want to calculate and some black region.
So, the solution for each of 4 points' coordinates (if there are no 0 coordinates) is the following:
point = np.array([w, h]) #width and hight of the source point (before transform)
homg_point = [point[0], point[1], 1] # homogeneous coords
transf_homg_point = m.dot(homg_point) # transform
transf_homg_point /= transf_homg_point1[2] # scale
transf_point = transf_homg_point[:2] # remove Cartesian coords
print(transf_point) #check the result
Related
I'm trying to apply LUT to MR images using following codes. the original dicom pixel intensities ranges from 0 to 4334 and the pixel intensities after applying LUT function ranges from 0 to 254. I need to preserve the metadata info for the resulted image. When I plot the resulted image, it displays correct but when I save the image, it is all black pixels. I know I need to change some meta data tags but I'm new to dicom image processing and couldn't figure out what is causing the problem.
def make_lut(dcm_data, width, center, p_i):
"""
LUT: look-up tables
VOI: volume of interest
"""
slope= 1.0
intercept= 0.0
min_px= int(np.amin(dcm_data))
max_px= int(np.amax(dcm_data))
lut= [0] * (max_px + 1)
invert= False
if p_i == "MONOCHROME1":
invert= True
else:
center = (max_px - min_px) - center
for px_value in range(min_px, max_px):
lut_value = px_value * slope + intercept
voi_value= (((lut_value - center) / width + 0.5) * 255.0)
clamped_value= min(max(voi_value, 0), 255)
if invert:
lut[px_value] = round(255 - clamped_value)
else:
lut[px_value] = round(clamped_value)
return lut
def apply_lut(pixels_in, lut):
pixels= pixels_in.flatten()
pixels_out= [0] * len(pixels)
for i in range (0, len(pixels)):
pixel= pixels[i]
if pixel > 0:
pixels_out[i] = int(lut[pixel])
return np.reshape(pixels_out, (pixels_in.shape[0], pixels_in.shape[1]))
# apply the function
idx= 30
ds= pydicom.dcmread(dcm_files[idx])
raw_pixels= dcm_data.pixel_array
if dcm_data.WindowWidth != '' and dcm_data.WindowCenter != '':
window_width = dcm_data.WindowWidth
window_center = dcm_data.WindowCenter
lut = make_lut(raw_pixels, window_width, window_center, dcm_data.PhotometricInterpretation)
dcm_default_windowing = apply_lut(raw_pixels, lut)
# save the result
ds.PixelData = dcm_default_windowing.tobytes()
ds.save_as("test_luted.dcm")
I have image with with many cars, every car has coordinates of polygon and keypoints. I use this code to crop object by polygon and get new keypoints.
x,y,w,h = cv2.boundingRect(points_poly_int)
cropped_img = img[y:y+h,x:x+w]
head_coords_after_crop = np.asarray([head_coords_old[0] - x, head_coords_old[1] -y])
center_coords_after_crop = np.asarray([center_coords_old[0] - x, center_coords_old[1] -y])
Here example of cropped image and keypoints:
What I need is rotate the whole image by any angle and remap coordinates of polygons and keypoints for every object
Here method which return rotated image and matrix of transformation:
def rotate_image(mat, angle):
"""
Rotates an image (angle in degrees) and expands image to avoid cropping
"""
height, width = mat.shape[:2] # image shape has 3 dimensions
image_center = (width/2, height/2) # getRotationMatrix2D needs coordinates in reverse order (width, height) compared to shape
rotation_mat = cv2.getRotationMatrix2D(image_center, angle, 1.)
# rotation calculates the cos and sin, taking absolutes of those.
abs_cos = abs(rotation_mat[0,0])
abs_sin = abs(rotation_mat[0,1])
# find the new width and height bounds
bound_w = int(height * abs_sin + width * abs_cos)
bound_h = int(height * abs_cos + width * abs_sin)
# subtract old image center (bringing image back to origo) and adding the new image center coordinates
rotation_mat[0, 2] += bound_w/2 - image_center[0]
rotation_mat[1, 2] += bound_h/2 - image_center[1]
# rotate image with the new bounds and translated rotation matrix
rotated_mat = cv2.warpAffine(mat, rotation_mat, (bound_w, bound_h))
return rotated_mat, rotation_mat
What I do next is multiplying old coordinates with matrix of transformation. Here code:
img_roated, C = rotate_image(img, 180)
#Remap polygons coordinates
ones = np.ones((points_poly.shape[0], 1))
new_poly = np.hstack((points_poly,ones))
new_poly = (C # new_poly.T).T
new_poly = new_poly.astype(np.int32)
#Crop by new polygons
x,y,w,h = cv2.boundingRect(new_poly)
cropped_img = img_roated[y:y+h,x:x+w]
#Reamp keypoints coordinates
head_coords_new = np.asarray([756.600, 1687.900, 1])
center_coords_new = np.asarray([762.300, 1708.400, 1])
head_coords_new = (C # head_coords_new.T).T
center_coords_new = (C # center_coords_new.T).T
head_coords_new = np.asarray([head_coords_old[0] - x, head_coords_old[1] - y])
center_coords_new = np.asarray([center_coords_old[0] - x, center_coords_old[1] - y])
head_coords_new = head_coords_new.astype(np.int32)
center_coords_new = center_coords_new.astype(np.int32)
But result is differnt from first picture, Here new picture:
Somehow keypoints shift, and it happens with every angle. And I don't know how to fix it.
Here the source image: https://drive.google.com/file/d/14K_MQHMwtWlw-QCQbaB5ecrREbWwyKhO/view?usp=sharing
And polygons with keypoints:
{'keypoints': [{'id': 'head', 'pos': '756.600;1687.900'},
{'id': 'roof_center', 'pos': '762.300;1708.400'}],
'polygon': '{(759.700;1717.300);(770.000;1714.200);(762.000;1687.400);(756.600;1687.900);(751.200;1690.700);(759.700;1717.300)}'}
If you wish to reproduce the issue.
Thanks in advnced
Here the differnce. Right pic is first image rotated in pic viewer. Left is transformed pic
I want to divide a picture in equally big squares and measure the average gray scale level and replace it with a blob, aka halftoning. This code gives me a picture but it doesn't look right. Any ideas what could be wrong?
im = scipy.misc.imread("uggla.tif")
def halftoning(im):
im = im.astype('float64')
width,height = im.shape
halftone_pic = np.zeros((width, height))
for x in range(width):
for y in range(height):
floating_matrix = im[x:x + 1, y:y + 1]
sum = np.sum(floating_matrix)
mean = np.mean(sum)
round = (mean > 128) * 255
halftone_pic[x,y] = round
fig, ax = plt.subplots(1,2)
ax[0].imshow(im, cmap="gray")
ax[1].imshow(halftone_pic, cmap="gray")
plt.show()
Here's something that does what you want. It's essentially a simplification of the code in the accepted answer to the related question How to create CMYK halftone Images from a color image?:
from PIL import Image, ImageDraw, ImageStat
# Adaption of answer https://stackoverflow.com/a/10575940/355230
def halftone(img, sample, scale, angle=45):
''' Returns a halftone image created from the given input image `img`.
`sample` (in pixels), determines the sample box size from the original
image. The maximum output dot diameter is given by `sample` * `scale`
(which is also the number of possible dot sizes). So `sample` == 1 will
preserve the original image resolution, but `scale` must be > 1 to allow
variations in dot size.
'''
img_grey = img.convert('L') # Convert to greyscale.
channel = img_grey.split()[0] # Get grey pixels.
channel = channel.rotate(angle, expand=1)
size = channel.size[0]*scale, channel.size[1]*scale
bitmap = Image.new('1', size)
draw = ImageDraw.Draw(bitmap)
for x in range(0, channel.size[0], sample):
for y in range(0, channel.size[1], sample):
box = channel.crop((x, y, x+sample, y+sample))
mean = ImageStat.Stat(box).mean[0]
diameter = (mean/255) ** 0.5
edge = 0.5 * (1-diameter)
x_pos, y_pos = (x+edge) * scale, (y+edge) * scale
box_edge = sample * diameter * scale
draw.ellipse((x_pos, y_pos, x_pos+box_edge, y_pos+box_edge),
fill=255)
bitmap = bitmap.rotate(-angle, expand=1)
width_half, height_half = bitmap.size
xx = (width_half - img.size[0]*scale) / 2
yy = (height_half - img.size[1]*scale) / 2
bitmap = bitmap.crop((xx, yy, xx + img.size[0]*scale,
yy + img.size[1]*scale))
return Image.merge('1', [bitmap])
# Sample usage
img = Image.open('uggla.tif')
img_ht = halftone(img, 8, 1)
img_ht.show()
Here's the results from using this as the input image:
Halftoned result produced:
For my neural network I want to augment my training data by adding small random rotations and zooms to my images. The issue I am having is that scipy is changing the size of my images when it applies the rotations and zooms. I need to to just clip the edges if part of the image goes out of bounds. All of my images must be the same size.
def loadImageData(img, distort = False):
c, fn = img
img = scipy.ndimage.imread(fn, True)
if distort:
img = scipy.ndimage.zoom(img, 1 + 0.05 * rnd(), mode = 'constant')
img = scipy.ndimage.rotate(img, 10 * rnd(), mode = 'constant')
print(img.shape)
img = img - np.min(img)
img = img / np.max(img)
img = np.reshape(img, (1, *img.shape))
y = np.zeros(ncats)
y[c] = 1
return (img, y)
scipy.ndimage.rotate accepts a reshape= parameter:
reshape : bool, optional
If reshape is true, the output shape is adapted so that the input
array is contained completely in the output. Default is True.
So to "clip" the edges you can simply call scipy.ndimage.rotate(img, ..., reshape=False).
from scipy.ndimage import rotate
from scipy.misc import face
from matplotlib import pyplot as plt
img = face()
rot = rotate(img, 30, reshape=False)
fig, ax = plt.subplots(1, 2)
ax[0].imshow(img)
ax[1].imshow(rot)
Things are more complicated for scipy.ndimage.zoom.
A naive method would be to zoom the entire input array, then use slice indexing and/or zero-padding to make the output the same size as your input. However, in cases where you're increasing the size of the image it's wasteful to interpolate pixels that are only going to get clipped off at the edges anyway.
Instead you could index only the part of the input that will fall within the bounds of the output array before you apply zoom:
import numpy as np
from scipy.ndimage import zoom
def clipped_zoom(img, zoom_factor, **kwargs):
h, w = img.shape[:2]
# For multichannel images we don't want to apply the zoom factor to the RGB
# dimension, so instead we create a tuple of zoom factors, one per array
# dimension, with 1's for any trailing dimensions after the width and height.
zoom_tuple = (zoom_factor,) * 2 + (1,) * (img.ndim - 2)
# Zooming out
if zoom_factor < 1:
# Bounding box of the zoomed-out image within the output array
zh = int(np.round(h * zoom_factor))
zw = int(np.round(w * zoom_factor))
top = (h - zh) // 2
left = (w - zw) // 2
# Zero-padding
out = np.zeros_like(img)
out[top:top+zh, left:left+zw] = zoom(img, zoom_tuple, **kwargs)
# Zooming in
elif zoom_factor > 1:
# Bounding box of the zoomed-in region within the input array
zh = int(np.round(h / zoom_factor))
zw = int(np.round(w / zoom_factor))
top = (h - zh) // 2
left = (w - zw) // 2
out = zoom(img[top:top+zh, left:left+zw], zoom_tuple, **kwargs)
# `out` might still be slightly larger than `img` due to rounding, so
# trim off any extra pixels at the edges
trim_top = ((out.shape[0] - h) // 2)
trim_left = ((out.shape[1] - w) // 2)
out = out[trim_top:trim_top+h, trim_left:trim_left+w]
# If zoom_factor == 1, just return the input array
else:
out = img
return out
For example:
zm1 = clipped_zoom(img, 0.5)
zm2 = clipped_zoom(img, 1.5)
fig, ax = plt.subplots(1, 3)
ax[0].imshow(img)
ax[1].imshow(zm1)
ax[2].imshow(zm2)
I recommend using cv2.resize because it is way faster than scipy.ndimage.zoom, probably due to support for simpler interpolation methods.
For a 480x640 image :
cv2.resize takes ~2 ms
scipy.ndimage.zoom takes ~500 ms
scipy.ndimage.zoom(...,order=0) takes ~175ms
If you are doing the data augmentation on the fly, this amount of speedup is invaluable because it means more experiments in less time.
Here is a version of clipped_zoom using cv2.resize
def cv2_clipped_zoom(img, zoom_factor=0):
"""
Center zoom in/out of the given image and returning an enlarged/shrinked view of
the image without changing dimensions
------
Args:
img : ndarray
Image array
zoom_factor : float
amount of zoom as a ratio [0 to Inf). Default 0.
------
Returns:
result: ndarray
numpy ndarray of the same shape of the input img zoomed by the specified factor.
"""
if zoom_factor == 0:
return img
height, width = img.shape[:2] # It's also the final desired shape
new_height, new_width = int(height * zoom_factor), int(width * zoom_factor)
### Crop only the part that will remain in the result (more efficient)
# Centered bbox of the final desired size in resized (larger/smaller) image coordinates
y1, x1 = max(0, new_height - height) // 2, max(0, new_width - width) // 2
y2, x2 = y1 + height, x1 + width
bbox = np.array([y1,x1,y2,x2])
# Map back to original image coordinates
bbox = (bbox / zoom_factor).astype(np.int)
y1, x1, y2, x2 = bbox
cropped_img = img[y1:y2, x1:x2]
# Handle padding when downscaling
resize_height, resize_width = min(new_height, height), min(new_width, width)
pad_height1, pad_width1 = (height - resize_height) // 2, (width - resize_width) //2
pad_height2, pad_width2 = (height - resize_height) - pad_height1, (width - resize_width) - pad_width1
pad_spec = [(pad_height1, pad_height2), (pad_width1, pad_width2)] + [(0,0)] * (img.ndim - 2)
result = cv2.resize(cropped_img, (resize_width, resize_height))
result = np.pad(result, pad_spec, mode='constant')
assert result.shape[0] == height and result.shape[1] == width
return result
The following picture will tell you what I want.
I have the information of the rectangles in the image (width, height, center point and rotation degree). Now, I want to write a script to cut them out and save them as an image, but straighten them as well. As in, I want to go from the rectangle shown inside the image to the rectangle that is shown outside.
I am using OpenCV Python. Please tell me a way to accomplish this.
Kindly show some code as examples of OpenCV Python are hard to find.
You can use the warpAffine function to rotate the image around a defined center point. The suitable rotation matrix can be generated using getRotationMatrix2D (where theta is in degrees).
You then can use Numpy slicing to cut the image.
import cv2
import numpy as np
def subimage(image, center, theta, width, height):
'''
Rotates OpenCV image around center with angle theta (in deg)
then crops the image according to width and height.
'''
# Uncomment for theta in radians
#theta *= 180/np.pi
shape = ( image.shape[1], image.shape[0] ) # cv2.warpAffine expects shape in (length, height)
matrix = cv2.getRotationMatrix2D( center=center, angle=theta, scale=1 )
image = cv2.warpAffine( src=image, M=matrix, dsize=shape )
x = int( center[0] - width/2 )
y = int( center[1] - height/2 )
image = image[ y:y+height, x:x+width ]
return image
Keep in mind that dsize is the shape of the output image. If the patch/angle is sufficiently large, edges get cut off (compare image above) if using the original shape as--for means of simplicity--done above. In this case, you could introduce a scaling factor to shape (to enlarge the output image) and the reference point for slicing (here center).
The above function can be used as follows:
image = cv2.imread('owl.jpg')
image = subimage(image, center=(110, 125), theta=30, width=100, height=200)
cv2.imwrite('patch.jpg', image)
I had problems with wrong offsets while using the solutions here and in similar questions.
So I did the math and came up with the following solution that works:
def subimage(self,image, center, theta, width, height):
theta *= 3.14159 / 180 # convert to rad
v_x = (cos(theta), sin(theta))
v_y = (-sin(theta), cos(theta))
s_x = center[0] - v_x[0] * ((width-1) / 2) - v_y[0] * ((height-1) / 2)
s_y = center[1] - v_x[1] * ((width-1) / 2) - v_y[1] * ((height-1) / 2)
mapping = np.array([[v_x[0],v_y[0], s_x],
[v_x[1],v_y[1], s_y]])
return cv2.warpAffine(image,mapping,(width, height),flags=cv2.WARP_INVERSE_MAP,borderMode=cv2.BORDER_REPLICATE)
For reference here is an image that explains the math behind it:
Note that
w_dst = width-1
h_dst = height-1
This is because the last coordinate has the value width-1 and not width, or height.
The other methods will work only if the content of the rectangle is in the rotated image after rotation and will fail badly in other situations. What if some of the part are lost? See an example below:
If you are to crop the rotated rectangle text area using the above method,
import cv2
import numpy as np
def main():
img = cv2.imread("big_vertical_text.jpg")
cnt = np.array([
[[64, 49]],
[[122, 11]],
[[391, 326]],
[[308, 373]]
])
print("shape of cnt: {}".format(cnt.shape))
rect = cv2.minAreaRect(cnt)
print("rect: {}".format(rect))
box = cv2.boxPoints(rect)
box = np.int0(box)
print("bounding box: {}".format(box))
cv2.drawContours(img, [box], 0, (0, 0, 255), 2)
img_crop, img_rot = crop_rect(img, rect)
print("size of original img: {}".format(img.shape))
print("size of rotated img: {}".format(img_rot.shape))
print("size of cropped img: {}".format(img_crop.shape))
new_size = (int(img_rot.shape[1]/2), int(img_rot.shape[0]/2))
img_rot_resized = cv2.resize(img_rot, new_size)
new_size = (int(img.shape[1]/2)), int(img.shape[0]/2)
img_resized = cv2.resize(img, new_size)
cv2.imshow("original contour", img_resized)
cv2.imshow("rotated image", img_rot_resized)
cv2.imshow("cropped_box", img_crop)
# cv2.imwrite("crop_img1.jpg", img_crop)
cv2.waitKey(0)
def crop_rect(img, rect):
# get the parameter of the small rectangle
center = rect[0]
size = rect[1]
angle = rect[2]
center, size = tuple(map(int, center)), tuple(map(int, size))
# get row and col num in img
height, width = img.shape[0], img.shape[1]
print("width: {}, height: {}".format(width, height))
M = cv2.getRotationMatrix2D(center, angle, 1)
img_rot = cv2.warpAffine(img, M, (width, height))
img_crop = cv2.getRectSubPix(img_rot, size, center)
return img_crop, img_rot
if __name__ == "__main__":
main()
This is what you will get:
Apparently, some of the parts are cut out! Why do not directly warp the rotated rectangle since we can get its four corner points with cv.boxPoints() method?
import cv2
import numpy as np
def main():
img = cv2.imread("big_vertical_text.jpg")
cnt = np.array([
[[64, 49]],
[[122, 11]],
[[391, 326]],
[[308, 373]]
])
print("shape of cnt: {}".format(cnt.shape))
rect = cv2.minAreaRect(cnt)
print("rect: {}".format(rect))
box = cv2.boxPoints(rect)
box = np.int0(box)
width = int(rect[1][0])
height = int(rect[1][1])
src_pts = box.astype("float32")
dst_pts = np.array([[0, height-1],
[0, 0],
[width-1, 0],
[width-1, height-1]], dtype="float32")
M = cv2.getPerspectiveTransform(src_pts, dst_pts)
warped = cv2.warpPerspective(img, M, (width, height))
Now the cropped image becomes
Much better, isn't it? If you check carefully, you will notice that there are some black area in the cropped image. That is because a small part of the detected rectangle is out of the bound of the image. To remedy this, you may pad the image a little bit and do the crop after that. There is an example illustrated in this answer.
Now, we compare the two methods to crop the rotated rectangle from the image.
This method do not require rotating the image and can deal with this problem more elegantly with less code.
Similar recipe for openCV version 3.4.0.
from cv2 import cv
import numpy as np
def getSubImage(rect, src):
# Get center, size, and angle from rect
center, size, theta = rect
# Convert to int
center, size = tuple(map(int, center)), tuple(map(int, size))
# Get rotation matrix for rectangle
M = cv2.getRotationMatrix2D( center, theta, 1)
# Perform rotation on src image
dst = cv2.warpAffine(src, M, src.shape[:2])
out = cv2.getRectSubPix(dst, size, center)
return out
img = cv2.imread('img.jpg')
# Find some contours
thresh2, contours, hierarchy = cv2.findContours(img, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
# Get rotated bounding box
rect = cv2.minAreaRect(contours[0])
# Extract subregion
out = getSubImage(rect, img)
# Save image
cv2.imwrite('out.jpg', out)
This is my C++ version that performs the same task. I have noticed it is a bit slow. If anyone sees anything that would improve the performance of this function, then please let me know. :)
bool extractPatchFromOpenCVImage( cv::Mat& src, cv::Mat& dest, int x, int y, double angle, int width, int height) {
// obtain the bounding box of the desired patch
cv::RotatedRect patchROI(cv::Point2f(x,y), cv::Size2i(width,height), angle);
cv::Rect boundingRect = patchROI.boundingRect();
// check if the bounding box fits inside the image
if ( boundingRect.x >= 0 && boundingRect.y >= 0 &&
(boundingRect.x+boundingRect.width) < src.cols &&
(boundingRect.y+boundingRect.height) < src.rows ) {
// crop out the bounding rectangle from the source image
cv::Mat preCropImg = src(boundingRect);
// the rotational center relative tot he pre-cropped image
int cropMidX, cropMidY;
cropMidX = boundingRect.width/2;
cropMidY = boundingRect.height/2;
// obtain the affine transform that maps the patch ROI in the image to the
// dest patch image. The dest image will be an upright version.
cv::Mat map_mat = cv::getRotationMatrix2D(cv::Point2f(cropMidX, cropMidY), angle, 1.0f);
map_mat.at<double>(0,2) += static_cast<double>(width/2 - cropMidX);
map_mat.at<double>(1,2) += static_cast<double>(height/2 - cropMidY);
// rotate the pre-cropped image. The destination image will be
// allocated by warpAffine()
cv::warpAffine(preCropImg, dest, map_mat, cv::Size2i(width,height));
return true;
} // if
else {
return false;
} // else
} // extractPatch
This was a very frustrating endeavor, but finally I solved it based on rroowwllaanndd's answer. I just had to add the angle correction when the width < height. Without this I got very strange results for images which fulfilled this condition.
def crop_image(rect, image):
shape = (image.shape[1], image.shape[0]) # cv2.warpAffine expects shape in (length, height)
center, size, theta = rect
width, height = tuple(map(int, size))
center = tuple(map(int, center))
if width < height:
theta -= 90
width, height = height, width
matrix = cv.getRotationMatrix2D(center=center, angle=theta, scale=1.0)
image = cv.warpAffine(src=image, M=matrix, dsize=shape)
x = int(center[0] - width // 2)
y = int(center[1] - height // 2)
image = image[y : y + height, x : x + width]
return image