For my neural network I want to augment my training data by adding small random rotations and zooms to my images. The issue I am having is that scipy is changing the size of my images when it applies the rotations and zooms. I need to to just clip the edges if part of the image goes out of bounds. All of my images must be the same size.
def loadImageData(img, distort = False):
c, fn = img
img = scipy.ndimage.imread(fn, True)
if distort:
img = scipy.ndimage.zoom(img, 1 + 0.05 * rnd(), mode = 'constant')
img = scipy.ndimage.rotate(img, 10 * rnd(), mode = 'constant')
print(img.shape)
img = img - np.min(img)
img = img / np.max(img)
img = np.reshape(img, (1, *img.shape))
y = np.zeros(ncats)
y[c] = 1
return (img, y)
scipy.ndimage.rotate accepts a reshape= parameter:
reshape : bool, optional
If reshape is true, the output shape is adapted so that the input
array is contained completely in the output. Default is True.
So to "clip" the edges you can simply call scipy.ndimage.rotate(img, ..., reshape=False).
from scipy.ndimage import rotate
from scipy.misc import face
from matplotlib import pyplot as plt
img = face()
rot = rotate(img, 30, reshape=False)
fig, ax = plt.subplots(1, 2)
ax[0].imshow(img)
ax[1].imshow(rot)
Things are more complicated for scipy.ndimage.zoom.
A naive method would be to zoom the entire input array, then use slice indexing and/or zero-padding to make the output the same size as your input. However, in cases where you're increasing the size of the image it's wasteful to interpolate pixels that are only going to get clipped off at the edges anyway.
Instead you could index only the part of the input that will fall within the bounds of the output array before you apply zoom:
import numpy as np
from scipy.ndimage import zoom
def clipped_zoom(img, zoom_factor, **kwargs):
h, w = img.shape[:2]
# For multichannel images we don't want to apply the zoom factor to the RGB
# dimension, so instead we create a tuple of zoom factors, one per array
# dimension, with 1's for any trailing dimensions after the width and height.
zoom_tuple = (zoom_factor,) * 2 + (1,) * (img.ndim - 2)
# Zooming out
if zoom_factor < 1:
# Bounding box of the zoomed-out image within the output array
zh = int(np.round(h * zoom_factor))
zw = int(np.round(w * zoom_factor))
top = (h - zh) // 2
left = (w - zw) // 2
# Zero-padding
out = np.zeros_like(img)
out[top:top+zh, left:left+zw] = zoom(img, zoom_tuple, **kwargs)
# Zooming in
elif zoom_factor > 1:
# Bounding box of the zoomed-in region within the input array
zh = int(np.round(h / zoom_factor))
zw = int(np.round(w / zoom_factor))
top = (h - zh) // 2
left = (w - zw) // 2
out = zoom(img[top:top+zh, left:left+zw], zoom_tuple, **kwargs)
# `out` might still be slightly larger than `img` due to rounding, so
# trim off any extra pixels at the edges
trim_top = ((out.shape[0] - h) // 2)
trim_left = ((out.shape[1] - w) // 2)
out = out[trim_top:trim_top+h, trim_left:trim_left+w]
# If zoom_factor == 1, just return the input array
else:
out = img
return out
For example:
zm1 = clipped_zoom(img, 0.5)
zm2 = clipped_zoom(img, 1.5)
fig, ax = plt.subplots(1, 3)
ax[0].imshow(img)
ax[1].imshow(zm1)
ax[2].imshow(zm2)
I recommend using cv2.resize because it is way faster than scipy.ndimage.zoom, probably due to support for simpler interpolation methods.
For a 480x640 image :
cv2.resize takes ~2 ms
scipy.ndimage.zoom takes ~500 ms
scipy.ndimage.zoom(...,order=0) takes ~175ms
If you are doing the data augmentation on the fly, this amount of speedup is invaluable because it means more experiments in less time.
Here is a version of clipped_zoom using cv2.resize
def cv2_clipped_zoom(img, zoom_factor=0):
"""
Center zoom in/out of the given image and returning an enlarged/shrinked view of
the image without changing dimensions
------
Args:
img : ndarray
Image array
zoom_factor : float
amount of zoom as a ratio [0 to Inf). Default 0.
------
Returns:
result: ndarray
numpy ndarray of the same shape of the input img zoomed by the specified factor.
"""
if zoom_factor == 0:
return img
height, width = img.shape[:2] # It's also the final desired shape
new_height, new_width = int(height * zoom_factor), int(width * zoom_factor)
### Crop only the part that will remain in the result (more efficient)
# Centered bbox of the final desired size in resized (larger/smaller) image coordinates
y1, x1 = max(0, new_height - height) // 2, max(0, new_width - width) // 2
y2, x2 = y1 + height, x1 + width
bbox = np.array([y1,x1,y2,x2])
# Map back to original image coordinates
bbox = (bbox / zoom_factor).astype(np.int)
y1, x1, y2, x2 = bbox
cropped_img = img[y1:y2, x1:x2]
# Handle padding when downscaling
resize_height, resize_width = min(new_height, height), min(new_width, width)
pad_height1, pad_width1 = (height - resize_height) // 2, (width - resize_width) //2
pad_height2, pad_width2 = (height - resize_height) - pad_height1, (width - resize_width) - pad_width1
pad_spec = [(pad_height1, pad_height2), (pad_width1, pad_width2)] + [(0,0)] * (img.ndim - 2)
result = cv2.resize(cropped_img, (resize_width, resize_height))
result = np.pad(result, pad_spec, mode='constant')
assert result.shape[0] == height and result.shape[1] == width
return result
Related
For an assignment I want to resize a .jpg image with a python code, but without using the pil.image.resize() function or another similar function. I want to write the code myself but I can't figure out how. The image is RGB. I have found this can be solved by nearest neighbor interpolation (as well as other methods but this one is fine for my specific assignment). The height and the width should both be able to be made bigger or smaller. So far I only have this:
import numpy as np
import scipy as sc
import matplotlib as plt
import math
import PIL
from PIL import Image
img = np.array(Image.open("foto1.jpg"))
height = img.shape[0]
width = img.shape[1]
dim = img.shape[2]
new_h = int(input("New height: "))
new_w = int(input("New width: "))
imgR = img[:,:,0] #red pixels
imgG = img[:,:,1] #green pixels
imgB = img[:,:,2] #blue pixels
newR = np.empty([new_h, new_w])
newG = np.empty([new_h, new_w])
newB = np.empty([new_h, new_w])
So now all three colours have a new array of the right dimensions. Unfortunately on the web I can only find people who use resize() functions... Does anyone know?
Thank in advance!
The key to doing any image transformation like resizing is to have a mapping from output coordinates to input coordinates. Then you can simply iterate over the entire output and grab a pixel from the input. Nearest neighbor makes this particularly easy, because there's never a need to interpolate a pixel that doesn't lie exactly on integer coordinates - you simply round the coordinates to the nearest integer.
for new_y in range(new_h):
old_y = int(round(new_y * (new_h - 1) / (height - 1)))
if old_y < 0: old_y = 0
if old_y >= height: old_y = height - 1
for new_x in range(new_w):
old_x = int(round(new_x * (new_w - 1) / (width - 1)))
if old_x < 0: old_x = 0
if old_x >= width: old_x = width - 1
newR[new_y,new_x] = imgR[old_y,old_x]
newG[new_y,new_x] = imgG[old_y,old_x]
newB[new_y,new_x] = imgB[old_y,old_x]
The following code could do the trick.
def resize_img(image, resize_width, resize_height):
"""
:params
image: shape -> (width, height, channels)
resize_width: The resize width dimension.
resize_height: The resize height dimension.
:returns
array of shape -> (resized_width, resized_height, channels)
"""
original_width, original_height, channel = image.shape
red_channel = image[:, :, 0]
green_channel = image[:, :, 1]
blue_channel = image[:, :, 2]
resized_image = np.zeros((resize_width, resize_height, channel), dtype=np.uint8)
x_scale = original_width/resize_width
y_scale = original_height/resize_height
resize_idx = np.zeros((resize_width, resize_height))
resize_index_x = np.ceil(np.arange(0, original_width, x_scale)).astype(int)
resize_index_y = np.ceil(np.arange(0, original_height, y_scale)).astype(int)
resize_index_x[np.where(resize_index_x == original_width)] -= 1
resize_index_y[np.where(resize_index_y == original_height)] -= 1
resized_image[:, :, 0] = red_channel[resize_index_x, :][:, resize_index_y]
resized_image[:, :, 1] = green_channel[resize_index_x, :][:, resize_index_y]
resized_image[:, :, 2] = blue_channel[resize_index_x, :][:, resize_index_y]
return resized_image
How do I get ImageOps.fit(source28x32, (128, 128)) to fit without cropping off the top/bottom/sides? Do I really have to find the aspect, resize accordingly so the enlarged version does not exceed 128x128, and then add border pixels (or center the image in a 128x128 canvas)? Mind you that the source can be of any ratio, the 28x32 is just an example.
source image (28x32)
fitted image (128x128)
This is my attempt so far, not particularly elegant
def fit(im):
size = 128
x, y = im.size
ratio = float(x) / float(y)
if x > y:
x = size
y = size * 1 / ratio
else:
y = size
x = size * ratio
x, y = int(x), int(y)
im = im.resize((x, y))
new_im = Image.new('L', (size, size), 0)
new_im.paste(im, ((size - x) / 2, (size - y) / 2))
return new_im
New fitted image
Here is the function implemented in both PIL and cv2. The input can be of any size; the function finds the scale needed to fit the largest edge to the desired width, and then puts it onto a black square image of the desired width.
In PIL
def resize_PIL(im, output_edge):
scale = output_edge / max(im.size)
new = Image.new(im.mode, (output_edge, output_edge), (0, 0, 0))
paste = im.resize((int(im.width * scale), int(im.height * scale)), resample=Image.NEAREST)
new.paste(paste, (0, 0))
return new
In cv2
def resize_cv2(im, output_edge):
scale = output_edge / max(im.shape[:2])
new = np.zeros((output_edge, output_edge, 3), np.uint8)
paste = cv2.resize(im, None, fx=scale, fy=scale, interpolation=cv2.INTER_NEAREST)
new[:paste.shape[0], :paste.shape[1], :] = paste
return new
With a desired width of 128:
→
→
Not shown: these functions work on images larger than the desired size
This works pretty good to fit the image to size you want while filling in the rest with black space
from PIL import Image, ImageOps
def fit(im, width):
border = int((max(im.width, im.height) - min(im.width, im.height))/2)
im = ImageOps.expand(im, border)
im = ImageOps.fit(im, (width, width))
return im
I want to divide a picture in equally big squares and measure the average gray scale level and replace it with a blob, aka halftoning. This code gives me a picture but it doesn't look right. Any ideas what could be wrong?
im = scipy.misc.imread("uggla.tif")
def halftoning(im):
im = im.astype('float64')
width,height = im.shape
halftone_pic = np.zeros((width, height))
for x in range(width):
for y in range(height):
floating_matrix = im[x:x + 1, y:y + 1]
sum = np.sum(floating_matrix)
mean = np.mean(sum)
round = (mean > 128) * 255
halftone_pic[x,y] = round
fig, ax = plt.subplots(1,2)
ax[0].imshow(im, cmap="gray")
ax[1].imshow(halftone_pic, cmap="gray")
plt.show()
Here's something that does what you want. It's essentially a simplification of the code in the accepted answer to the related question How to create CMYK halftone Images from a color image?:
from PIL import Image, ImageDraw, ImageStat
# Adaption of answer https://stackoverflow.com/a/10575940/355230
def halftone(img, sample, scale, angle=45):
''' Returns a halftone image created from the given input image `img`.
`sample` (in pixels), determines the sample box size from the original
image. The maximum output dot diameter is given by `sample` * `scale`
(which is also the number of possible dot sizes). So `sample` == 1 will
preserve the original image resolution, but `scale` must be > 1 to allow
variations in dot size.
'''
img_grey = img.convert('L') # Convert to greyscale.
channel = img_grey.split()[0] # Get grey pixels.
channel = channel.rotate(angle, expand=1)
size = channel.size[0]*scale, channel.size[1]*scale
bitmap = Image.new('1', size)
draw = ImageDraw.Draw(bitmap)
for x in range(0, channel.size[0], sample):
for y in range(0, channel.size[1], sample):
box = channel.crop((x, y, x+sample, y+sample))
mean = ImageStat.Stat(box).mean[0]
diameter = (mean/255) ** 0.5
edge = 0.5 * (1-diameter)
x_pos, y_pos = (x+edge) * scale, (y+edge) * scale
box_edge = sample * diameter * scale
draw.ellipse((x_pos, y_pos, x_pos+box_edge, y_pos+box_edge),
fill=255)
bitmap = bitmap.rotate(-angle, expand=1)
width_half, height_half = bitmap.size
xx = (width_half - img.size[0]*scale) / 2
yy = (height_half - img.size[1]*scale) / 2
bitmap = bitmap.crop((xx, yy, xx + img.size[0]*scale,
yy + img.size[1]*scale))
return Image.merge('1', [bitmap])
# Sample usage
img = Image.open('uggla.tif')
img_ht = halftone(img, 8, 1)
img_ht.show()
Here's the results from using this as the input image:
Halftoned result produced:
I'm pasting a randomly generated barcode on a background image.
This barcode has been randomly rotated, skewed, and scaled.
Then, this barcode is randomly placed onto the background image.
I'm trying to find out the coordinates of the actual barcode, ignoring the expanded black mask.
I'm a beginner in matrices and image manipulation so any help, especially in the math, would be appreciated.
This is where I generate the barcode, using pdf417gen library, along with the coordinates of the barcode.
import numpy as np
import os
import random
import sys
from pdf417gen import encode, render_image
from PIL import Image
def generate_barcode(self):
barcode = encode("random text data", columns=5, security_level=5)
scale = 5
ratio = 3
padding = 5
barcode_image = render_image(barcode, scale=scale, ratio=ratio, padding=padding)
barcode_coords = np.array([
[(barcode_image.width - padding) / float(barcode_image.width), (barcode_image.height - padding) / float(barcode_image.height)],
[padding / float(barcode_image.width), (barcode_image.height - padding) / float(barcode_image.height)],
[padding / float(barcode_image.width), padding / float(barcode_image.height)],
[(barcode_image.width - padding) / float(barcode_image.width), padding / float(barcode_image.height)]
])
return (barcode_coords, barcode_image)
Once I have the barcode's image and coordinate, I do the following.
transform the barcode's image
attempt to match the coordinates with the image's transformation
paste the image onto a background image
then draw a red outline using the coordinates
The red outline should outline the barcode's image.
Here's where I transform the barcode image and paste it to the background image.
def composite_images(self, background_image, barcode_coords, barcode_image):
coords = barcode_coords
barcode = barcode_image
# instantiating the transformation variables
scale = random.randrange(4, 50) / 100.0
size = int( min(background_image.size) * scale) # background_image.size returns (width, height)
barcode = barcode.resize((int(size * 2.625), size)) # width:height ratio is 2.625:1
rotation = random.randrange(0, 360)
xstretch = random.randrange(0, 100) / 100.0
ystretch = random.randrange(0, 100) / 100.0
xshear = random.randrange(0, 100) / 100.0
yshear = random.randrange(0, 100) / 100.0
# set affine transform on the barcode coordinates
affine_transform = get_affine_transform(rotation, xstretch, ystretch, xshear, yshear)
coords = transform_coords(coords, affine_transform, True)
expand_mask = transform_coords(np.array([ # shifts expand mask based on transformation
[0.0, 0.0],
[float(size * 2.625), 0.0],
[float(size * 2.625), float(size)],
[0.0, float(size)]
]), mat, False)
minx = min(expand_mask[:,0])
maxx = max(expand_mask[:,0])
miny = min(expand_mask[:,1])
maxy = max(expand_mask[:,1])
mat_inv = np.linalg.inv(np.array([ # the inverse matrix
[mat[0,0], mat[0,1], -minx],
[mat[1,0], mat[1,1], -miny],
[0,0,1.0]
]))
image_matrix = (mat_inv[0,0], mat_inv[0,1], mat_inv[0,2],
mat_inv[1,0], mat_inv[1,1], mat_inv[1,2])
new_size = (int(maxx-minx), int(maxy-miny))
# set affine transform on the barcode image using data from coordinates affine transformation
barcode = barcode.transform(new_size, method=Image.AFFINE, data=image_matrix)
# paste the barcode image onto a random position on background image
region_x = random.randrange(0, background_image.width - size)
region_y = random.randrange(0, background_image.height - size)
background_image.paste(barcode, (region_x, region_y))
coords *= scale
coords += [region_x / float(background_image.width), region_y / float(background_image.height)]
return(coords, background_image)
def get_affine_transform(self, rotation, xstretch, ystretch, xshear, yshear):
theta = -(rotation / 180.0) * np.pi
return np.array([
[np.cos(theta) * xstretch, -np.sin(theta) * xshear],
[np.sin(theta) * ystretch, np.cos(theta) * yshear]
])
def transform_coords(self, coords, affine_transform, center):
if center:
coords -= (.5, .5) # center on origin
coords = np.dot(coords, affine_transform.T)
if center:
coords += (.5, .5) # reset centering
return coords
Now I draw the red outline using the coords and image (with pasted barcode) returned from composite_images().
def draw_red_outline(self, box_coords, image):
outline = box_coords * [image.width, image.height]
outline = outline.astype(int)
outline = tuple(map(tuple, outline))
draw = ImageDraw.Draw(image)
draw.poly(outline, outline=(255,0,0,0))
del draw
image.show()
I'm unsure as to where my math is going wrong.
To get coordinates of transformed points you can do the following:
After getting transformation matrix:
transformed_img = cv2.warpPerspective(source_img, m, image_shape)
You apply it to image:
transformed_img = cv2.warpPerspective(source_img, m, image_shape)
and transformed image contains result with coordinates which you want to calculate and some black region.
So, the solution for each of 4 points' coordinates (if there are no 0 coordinates) is the following:
point = np.array([w, h]) #width and hight of the source point (before transform)
homg_point = [point[0], point[1], 1] # homogeneous coords
transf_homg_point = m.dot(homg_point) # transform
transf_homg_point /= transf_homg_point1[2] # scale
transf_point = transf_homg_point[:2] # remove Cartesian coords
print(transf_point) #check the result
In tensorflow, I would like to rotate an image from a random angle, for data augmentation. But I don't find this transformation in the tf.image module.
This can be done in tensorflow now:
tf.contrib.image.rotate(images, degrees * math.pi / 180, interpolation='BILINEAR')
Because I wanted to be able to rotate tensors I came up with the following piece of code, which rotates a [height, width, depth] tensor by a given angle:
def rotate_image_tensor(image, angle, mode='black'):
"""
Rotates a 3D tensor (HWD), which represents an image by given radian angle.
New image has the same size as the input image.
mode controls what happens to border pixels.
mode = 'black' results in black bars (value 0 in unknown areas)
mode = 'white' results in value 255 in unknown areas
mode = 'ones' results in value 1 in unknown areas
mode = 'repeat' keeps repeating the closest pixel known
"""
s = image.get_shape().as_list()
assert len(s) == 3, "Input needs to be 3D."
assert (mode == 'repeat') or (mode == 'black') or (mode == 'white') or (mode == 'ones'), "Unknown boundary mode."
image_center = [np.floor(x/2) for x in s]
# Coordinates of new image
coord1 = tf.range(s[0])
coord2 = tf.range(s[1])
# Create vectors of those coordinates in order to vectorize the image
coord1_vec = tf.tile(coord1, [s[1]])
coord2_vec_unordered = tf.tile(coord2, [s[0]])
coord2_vec_unordered = tf.reshape(coord2_vec_unordered, [s[0], s[1]])
coord2_vec = tf.reshape(tf.transpose(coord2_vec_unordered, [1, 0]), [-1])
# center coordinates since rotation center is supposed to be in the image center
coord1_vec_centered = coord1_vec - image_center[0]
coord2_vec_centered = coord2_vec - image_center[1]
coord_new_centered = tf.cast(tf.pack([coord1_vec_centered, coord2_vec_centered]), tf.float32)
# Perform backward transformation of the image coordinates
rot_mat_inv = tf.dynamic_stitch([[0], [1], [2], [3]], [tf.cos(angle), tf.sin(angle), -tf.sin(angle), tf.cos(angle)])
rot_mat_inv = tf.reshape(rot_mat_inv, shape=[2, 2])
coord_old_centered = tf.matmul(rot_mat_inv, coord_new_centered)
# Find nearest neighbor in old image
coord1_old_nn = tf.cast(tf.round(coord_old_centered[0, :] + image_center[0]), tf.int32)
coord2_old_nn = tf.cast(tf.round(coord_old_centered[1, :] + image_center[1]), tf.int32)
# Clip values to stay inside image coordinates
if mode == 'repeat':
coord_old1_clipped = tf.minimum(tf.maximum(coord1_old_nn, 0), s[0]-1)
coord_old2_clipped = tf.minimum(tf.maximum(coord2_old_nn, 0), s[1]-1)
else:
outside_ind1 = tf.logical_or(tf.greater(coord1_old_nn, s[0]-1), tf.less(coord1_old_nn, 0))
outside_ind2 = tf.logical_or(tf.greater(coord2_old_nn, s[1]-1), tf.less(coord2_old_nn, 0))
outside_ind = tf.logical_or(outside_ind1, outside_ind2)
coord_old1_clipped = tf.boolean_mask(coord1_old_nn, tf.logical_not(outside_ind))
coord_old2_clipped = tf.boolean_mask(coord2_old_nn, tf.logical_not(outside_ind))
coord1_vec = tf.boolean_mask(coord1_vec, tf.logical_not(outside_ind))
coord2_vec = tf.boolean_mask(coord2_vec, tf.logical_not(outside_ind))
coord_old_clipped = tf.cast(tf.transpose(tf.pack([coord_old1_clipped, coord_old2_clipped]), [1, 0]), tf.int32)
# Coordinates of the new image
coord_new = tf.transpose(tf.cast(tf.pack([coord1_vec, coord2_vec]), tf.int32), [1, 0])
image_channel_list = tf.split(2, s[2], image)
image_rotated_channel_list = list()
for image_channel in image_channel_list:
image_chan_new_values = tf.gather_nd(tf.squeeze(image_channel), coord_old_clipped)
if (mode == 'black') or (mode == 'repeat'):
background_color = 0
elif mode == 'ones':
background_color = 1
elif mode == 'white':
background_color = 255
image_rotated_channel_list.append(tf.sparse_to_dense(coord_new, [s[0], s[1]], image_chan_new_values,
background_color, validate_indices=False))
image_rotated = tf.transpose(tf.pack(image_rotated_channel_list), [1, 2, 0])
return image_rotated
for tensorflow 2.0:
import tensorflow_addons as tfa
tfa.image.transform_ops.rotate(image, radian)
Rotation and cropping in TensorFlow
I personally needed image rotation and cropping out black borders functions in TensorFlow as below.
And I could implement this function as below.
def _rotate_and_crop(image, output_height, output_width, rotation_degree, do_crop):
"""Rotate the given image with the given rotation degree and crop for the black edges if necessary
Args:
image: A `Tensor` representing an image of arbitrary size.
output_height: The height of the image after preprocessing.
output_width: The width of the image after preprocessing.
rotation_degree: The degree of rotation on the image.
do_crop: Do cropping if it is True.
Returns:
A rotated image.
"""
# Rotate the given image with the given rotation degree
if rotation_degree != 0:
image = tf.contrib.image.rotate(image, math.radians(rotation_degree), interpolation='BILINEAR')
# Center crop to ommit black noise on the edges
if do_crop == True:
lrr_width, lrr_height = _largest_rotated_rect(output_height, output_width, math.radians(rotation_degree))
resized_image = tf.image.central_crop(image, float(lrr_height)/output_height)
image = tf.image.resize_images(resized_image, [output_height, output_width], method=tf.image.ResizeMethod.BILINEAR, align_corners=False)
return image
def _largest_rotated_rect(w, h, angle):
"""
Given a rectangle of size wxh that has been rotated by 'angle' (in
radians), computes the width and height of the largest possible
axis-aligned rectangle within the rotated rectangle.
Original JS code by 'Andri' and Magnus Hoff from Stack Overflow
Converted to Python by Aaron Snoswell
Source: http://stackoverflow.com/questions/16702966/rotate-image-and-crop-out-black-borders
"""
quadrant = int(math.floor(angle / (math.pi / 2))) & 3
sign_alpha = angle if ((quadrant & 1) == 0) else math.pi - angle
alpha = (sign_alpha % math.pi + math.pi) % math.pi
bb_w = w * math.cos(alpha) + h * math.sin(alpha)
bb_h = w * math.sin(alpha) + h * math.cos(alpha)
gamma = math.atan2(bb_w, bb_w) if (w < h) else math.atan2(bb_w, bb_w)
delta = math.pi - alpha - gamma
length = h if (w < h) else w
d = length * math.cos(alpha)
a = d * math.sin(alpha) / math.sin(delta)
y = a * math.cos(gamma)
x = y * math.tan(gamma)
return (
bb_w - 2 * x,
bb_h - 2 * y
)
If you need further implementation of example and visualization in TensorFlow, you can use this repository.
I hope this could be helpful to other people.
Update: see #astromme's answer below. Tensorflow now supports rotating images natively.
What you can do while there is no native method in tensorflow is something like this:
from PIL import Image
sess = tf.InteractiveSession()
# Pass image tensor object to a PIL image
image = Image.fromarray(image.eval())
# Use PIL or other library of the sort to rotate
rotated = Image.Image.rotate(image, degrees)
# Convert rotated image back to tensor
rotated_tensor = tf.convert_to_tensor(np.array(rotated))
tf.contrib is not available in tensorflow 2.
For tensorflow >= 2.* the following can be used:
tf.keras.preprocessing.image.random_rotation(x, rg, row_axis=1,col_axis=2, channel_axis=0,fill_mode='nearest', cval=0., interpolation_order=1);
you can find the documantation here:
https://www.tensorflow.org/api_docs/python/tf/keras/preprocessing/image/random_rotation
Here's the #zimmermc answer updated to Tensorflow v0.12
Changes:
pack() is now stack()
order of split parameters reversed
def rotate_image_tensor(image, angle, mode='white'):
"""
Rotates a 3D tensor (HWD), which represents an image by given radian angle.
New image has the same size as the input image.
mode controls what happens to border pixels.
mode = 'black' results in black bars (value 0 in unknown areas)
mode = 'white' results in value 255 in unknown areas
mode = 'ones' results in value 1 in unknown areas
mode = 'repeat' keeps repeating the closest pixel known
"""
s = image.get_shape().as_list()
assert len(s) == 3, "Input needs to be 3D."
assert (mode == 'repeat') or (mode == 'black') or (mode == 'white') or (mode == 'ones'), "Unknown boundary mode."
image_center = [np.floor(x/2) for x in s]
# Coordinates of new image
coord1 = tf.range(s[0])
coord2 = tf.range(s[1])
# Create vectors of those coordinates in order to vectorize the image
coord1_vec = tf.tile(coord1, [s[1]])
coord2_vec_unordered = tf.tile(coord2, [s[0]])
coord2_vec_unordered = tf.reshape(coord2_vec_unordered, [s[0], s[1]])
coord2_vec = tf.reshape(tf.transpose(coord2_vec_unordered, [1, 0]), [-1])
# center coordinates since rotation center is supposed to be in the image center
coord1_vec_centered = coord1_vec - image_center[0]
coord2_vec_centered = coord2_vec - image_center[1]
coord_new_centered = tf.cast(tf.stack([coord1_vec_centered, coord2_vec_centered]), tf.float32)
# Perform backward transformation of the image coordinates
rot_mat_inv = tf.dynamic_stitch([[0], [1], [2], [3]], [tf.cos(angle), tf.sin(angle), -tf.sin(angle), tf.cos(angle)])
rot_mat_inv = tf.reshape(rot_mat_inv, shape=[2, 2])
coord_old_centered = tf.matmul(rot_mat_inv, coord_new_centered)
# Find nearest neighbor in old image
coord1_old_nn = tf.cast(tf.round(coord_old_centered[0, :] + image_center[0]), tf.int32)
coord2_old_nn = tf.cast(tf.round(coord_old_centered[1, :] + image_center[1]), tf.int32)
# Clip values to stay inside image coordinates
if mode == 'repeat':
coord_old1_clipped = tf.minimum(tf.maximum(coord1_old_nn, 0), s[0]-1)
coord_old2_clipped = tf.minimum(tf.maximum(coord2_old_nn, 0), s[1]-1)
else:
outside_ind1 = tf.logical_or(tf.greater(coord1_old_nn, s[0]-1), tf.less(coord1_old_nn, 0))
outside_ind2 = tf.logical_or(tf.greater(coord2_old_nn, s[1]-1), tf.less(coord2_old_nn, 0))
outside_ind = tf.logical_or(outside_ind1, outside_ind2)
coord_old1_clipped = tf.boolean_mask(coord1_old_nn, tf.logical_not(outside_ind))
coord_old2_clipped = tf.boolean_mask(coord2_old_nn, tf.logical_not(outside_ind))
coord1_vec = tf.boolean_mask(coord1_vec, tf.logical_not(outside_ind))
coord2_vec = tf.boolean_mask(coord2_vec, tf.logical_not(outside_ind))
coord_old_clipped = tf.cast(tf.transpose(tf.stack([coord_old1_clipped, coord_old2_clipped]), [1, 0]), tf.int32)
# Coordinates of the new image
coord_new = tf.transpose(tf.cast(tf.stack([coord1_vec, coord2_vec]), tf.int32), [1, 0])
image_channel_list = tf.split(image, s[2], 2)
image_rotated_channel_list = list()
for image_channel in image_channel_list:
image_chan_new_values = tf.gather_nd(tf.squeeze(image_channel), coord_old_clipped)
if (mode == 'black') or (mode == 'repeat'):
background_color = 0
elif mode == 'ones':
background_color = 1
elif mode == 'white':
background_color = 255
image_rotated_channel_list.append(tf.sparse_to_dense(coord_new, [s[0], s[1]], image_chan_new_values,
background_color, validate_indices=False))
image_rotated = tf.transpose(tf.stack(image_rotated_channel_list), [1, 2, 0])
return image_rotated
For rotating an image or a batch of images counter-clockwise by multiples of 90 degrees, you can use tf.image.rot90(image,k=1,name=None).
k denotes the number of 90 degrees rotations you want to make.
In case of a single image, image is a 3-D Tensor of shape [height, width, channels] and in case of a batch of images, image is a 4-D Tensor of shape [batch, height, width, channels]