Hi I am creating a program that replaces a face in a image with someone else's face. However, I am stuck on trying to insert the new face into the original, larger image. I have researched ROI and addWeight(needs the images to be the same size) but I haven't found a way to do this in python. Any advise is great. I am new to opencv.
I am using the following test images:
smaller_image:
larger_image:
Here is my Code so far... a mixer of other samples:
import cv2
import cv2.cv as cv
import sys
import numpy
def detect(img, cascade):
rects = cascade.detectMultiScale(img, scaleFactor=1.1, minNeighbors=3, minSize=(10, 10), flags = cv.CV_HAAR_SCALE_IMAGE)
if len(rects) == 0:
return []
rects[:,2:] += rects[:,:2]
return rects
def draw_rects(img, rects, color):
for x1, y1, x2, y2 in rects:
cv2.rectangle(img, (x1, y1), (x2, y2), color, 2)
if __name__ == '__main__':
if len(sys.argv) != 2: ## Check for error in usage syntax
print "Usage : python faces.py <image_file>"
else:
img = cv2.imread(sys.argv[1],cv2.CV_LOAD_IMAGE_COLOR) ## Read image file
if (img == None):
print "Could not open or find the image"
else:
cascade = cv2.CascadeClassifier("haarcascade_frontalface_alt.xml")
gray = cv2.cvtColor(img, cv.CV_BGR2GRAY)
gray = cv2.equalizeHist(gray)
rects = detect(gray, cascade)
## Extract face coordinates
x1 = rects[0][3]
y1 = rects[0][0]
x2 = rects[0][4]
y2 = rects[0][5]
y=y2-y1
x=x2-x1
## Extract face ROI
faceROI = gray[x1:x2, y1:y2]
## Show face ROI
cv2.imshow('Display face ROI', faceROI)
small = cv2.imread("average_face.png",cv2.CV_LOAD_IMAGE_COLOR)
print "here"
small=cv2.resize(small, (x, y))
cv2.namedWindow('Display image') ## create window for display
cv2.imshow('Display image', small) ## Show image in the window
print "size of image: ", img.shape ## print size of image
cv2.waitKey(1000)
A simple way to achieve what you want:
import cv2
s_img = cv2.imread("smaller_image.png")
l_img = cv2.imread("larger_image.jpg")
x_offset=y_offset=50
l_img[y_offset:y_offset+s_img.shape[0], x_offset:x_offset+s_img.shape[1]] = s_img
Update
I suppose you want to take care of the alpha channel too. Here is a quick and dirty way of doing so:
s_img = cv2.imread("smaller_image.png", -1)
y1, y2 = y_offset, y_offset + s_img.shape[0]
x1, x2 = x_offset, x_offset + s_img.shape[1]
alpha_s = s_img[:, :, 3] / 255.0
alpha_l = 1.0 - alpha_s
for c in range(0, 3):
l_img[y1:y2, x1:x2, c] = (alpha_s * s_img[:, :, c] +
alpha_l * l_img[y1:y2, x1:x2, c])
Using #fireant's idea, I wrote up a function to handle overlays. This works well for any position argument (including negative positions).
def overlay_image_alpha(img, img_overlay, x, y, alpha_mask):
"""Overlay `img_overlay` onto `img` at (x, y) and blend using `alpha_mask`.
`alpha_mask` must have same HxW as `img_overlay` and values in range [0, 1].
"""
# Image ranges
y1, y2 = max(0, y), min(img.shape[0], y + img_overlay.shape[0])
x1, x2 = max(0, x), min(img.shape[1], x + img_overlay.shape[1])
# Overlay ranges
y1o, y2o = max(0, -y), min(img_overlay.shape[0], img.shape[0] - y)
x1o, x2o = max(0, -x), min(img_overlay.shape[1], img.shape[1] - x)
# Exit if nothing to do
if y1 >= y2 or x1 >= x2 or y1o >= y2o or x1o >= x2o:
return
# Blend overlay within the determined ranges
img_crop = img[y1:y2, x1:x2]
img_overlay_crop = img_overlay[y1o:y2o, x1o:x2o]
alpha = alpha_mask[y1o:y2o, x1o:x2o, np.newaxis]
alpha_inv = 1.0 - alpha
img_crop[:] = alpha * img_overlay_crop + alpha_inv * img_crop
Example usage:
import numpy as np
from PIL import Image
# Prepare inputs
x, y = 50, 0
img = np.array(Image.open("img_large.jpg"))
img_overlay_rgba = np.array(Image.open("img_small.png"))
# Perform blending
alpha_mask = img_overlay_rgba[:, :, 3] / 255.0
img_result = img[:, :, :3].copy()
img_overlay = img_overlay_rgba[:, :, :3]
overlay_image_alpha(img_result, img_overlay, x, y, alpha_mask)
# Save result
Image.fromarray(img_result).save("img_result.jpg")
Result:
If you encounter errors or unusual outputs, please ensure:
img should not contain an alpha channel. (e.g. If it is RGBA, convert to RGB first.)
img_overlay has the same number of channels as img.
Based on fireant's excellent answer above, here is the alpha blending but a bit more human legible. You may need to swap 1.0-alpha and alpha depending on which direction you're merging (mine is swapped from fireant's answer).
o* == s_img.*
b* == b_img.*
for c in range(0,3):
alpha = s_img[oy:oy+height, ox:ox+width, 3] / 255.0
color = s_img[oy:oy+height, ox:ox+width, c] * (1.0-alpha)
beta = l_img[by:by+height, bx:bx+width, c] * (alpha)
l_img[by:by+height, bx:bx+width, c] = color + beta
Here it is:
def put4ChannelImageOn4ChannelImage(back, fore, x, y):
rows, cols, channels = fore.shape
trans_indices = fore[...,3] != 0 # Where not transparent
overlay_copy = back[y:y+rows, x:x+cols]
overlay_copy[trans_indices] = fore[trans_indices]
back[y:y+rows, x:x+cols] = overlay_copy
#test
background = np.zeros((1000, 1000, 4), np.uint8)
background[:] = (127, 127, 127, 1)
overlay = cv2.imread('imagee.png', cv2.IMREAD_UNCHANGED)
put4ChannelImageOn4ChannelImage(background, overlay, 5, 5)
A simple function that blits an image front onto an image back and returns the result. It works with both 3 and 4-channel images and deals with the alpha channel. Overlaps are handled as well.
The output image has the same size as back, but always 4 channels.
The output alpha channel is given by (u+v)/(1+uv) where u,v are the alpha channels of the front and back image and -1 <= u,v <= 1. Where there is no overlap with front, the alpha value from back is taken.
import cv2
def merge_image(back, front, x,y):
# convert to rgba
if back.shape[2] == 3:
back = cv2.cvtColor(back, cv2.COLOR_BGR2BGRA)
if front.shape[2] == 3:
front = cv2.cvtColor(front, cv2.COLOR_BGR2BGRA)
# crop the overlay from both images
bh,bw = back.shape[:2]
fh,fw = front.shape[:2]
x1, x2 = max(x, 0), min(x+fw, bw)
y1, y2 = max(y, 0), min(y+fh, bh)
front_cropped = front[y1-y:y2-y, x1-x:x2-x]
back_cropped = back[y1:y2, x1:x2]
alpha_front = front_cropped[:,:,3:4] / 255
alpha_back = back_cropped[:,:,3:4] / 255
# replace an area in result with overlay
result = back.copy()
print(f'af: {alpha_front.shape}\nab: {alpha_back.shape}\nfront_cropped: {front_cropped.shape}\nback_cropped: {back_cropped.shape}')
result[y1:y2, x1:x2, :3] = alpha_front * front_cropped[:,:,:3] + (1-alpha_front) * back_cropped[:,:,:3]
result[y1:y2, x1:x2, 3:4] = (alpha_front + alpha_back) / (1 + alpha_front*alpha_back) * 255
return result
For just add an alpha channel to s_img I just use cv2.addWeighted before the line
l_img[y_offset:y_offset+s_img.shape[0], x_offset:x_offset+s_img.shape[1]] = s_img
as following:
s_img=cv2.addWeighted(l_img[y_offset:y_offset+s_img.shape[0], x_offset:x_offset+s_img.shape[1]],0.5,s_img,0.5,0)
When attempting to write to the destination image using any of these answers above and you get the following error:
ValueError: assignment destination is read-only
A quick potential fix is to set the WRITEABLE flag to true.
img.setflags(write=1)
A simple 4on4 pasting function that works-
def paste(background,foreground,pos=(0,0)):
#get position and crop pasting area if needed
x = pos[0]
y = pos[1]
bgWidth = background.shape[0]
bgHeight = background.shape[1]
frWidth = foreground.shape[0]
frHeight = foreground.shape[1]
width = bgWidth-x
height = bgHeight-y
if frWidth<width:
width = frWidth
if frHeight<height:
height = frHeight
# normalize alpha channels from 0-255 to 0-1
alpha_background = background[x:x+width,y:y+height,3] / 255.0
alpha_foreground = foreground[:width,:height,3] / 255.0
# set adjusted colors
for color in range(0, 3):
fr = alpha_foreground * foreground[:width,:height,color]
bg = alpha_background * background[x:x+width,y:y+height,color] * (1 - alpha_foreground)
background[x:x+width,y:y+height,color] = fr+bg
# set adjusted alpha and denormalize back to 0-255
background[x:x+width,y:y+height,3] = (1 - (1 - alpha_foreground) * (1 - alpha_background)) * 255
return background
I reworked #fireant's concept to allow for optional alpha masks and allow any x or y, including values outside of the bounds of the image. It will crop to the bounds.
def overlay_image_alpha(img, img_overlay, x, y, alpha_mask=None):
"""Overlay `img_overlay` onto `img` at (x, y) and blend using optional `alpha_mask`.
`alpha_mask` must have same HxW as `img_overlay` and values in range [0, 1].
"""
if y < 0 or y + img_overlay.shape[0] > img.shape[0] or x < 0 or x + img_overlay.shape[1] > img.shape[1]:
y_origin = 0 if y > 0 else -y
y_end = img_overlay.shape[0] if y < 0 else min(img.shape[0] - y, img_overlay.shape[0])
x_origin = 0 if x > 0 else -x
x_end = img_overlay.shape[1] if x < 0 else min(img.shape[1] - x, img_overlay.shape[1])
img_overlay_crop = img_overlay[y_origin:y_end, x_origin:x_end]
alpha = alpha_mask[y_origin:y_end, x_origin:x_end] if alpha_mask is not None else None
else:
img_overlay_crop = img_overlay
alpha = alpha_mask
y1 = max(y, 0)
y2 = min(img.shape[0], y1 + img_overlay_crop.shape[0])
x1 = max(x, 0)
x2 = min(img.shape[1], x1 + img_overlay_crop.shape[1])
img_crop = img[y1:y2, x1:x2]
img_crop[:] = alpha * img_overlay_crop + (1.0 - alpha) * img_crop if alpha is not None else img_overlay_crop
Related
I am trying to implement the conversion discussed in the blogspot https://medium.com/#alexppppp/how-to-annotate-keypoints-using-roboflow-9bc2aa8915cd.
In my dataset I need 4 keypoints and 1 box. I want the edited code for it.
My code and error are shown below.
keypoint_names = ['Head', 'Tail']
rectangles2keypoints = {1:0, 2:1}
def converter(file_labels, file_image, keypoint_names):
img = cv2.imread(file_image)
img_w, img_h = img.shape[1], img.shape[0]
with open(file_labels) as f:
lines_txt = f.readlines()
lines = []
for line in lines_txt:
lines.append([int(line.split()[0])] + [round(float(el), 5) for el in line.split()[1:]])
bboxes = []
keypoints = []
# In this loop we convert normalized coordinates to absolute coordinates
for line in lines:
# Number 0 is a class of rectangles related to bounding boxes.
if line[0] == 0:
x_c, y_c, w, h = round(line[1] * img_w), round(line[2] * img_h), round(line[3] * img_w), round(line[4] * img_h)
bboxes.append([round(x_c - w/2), round(y_c - h/2), round(x_c + w/2), round(y_c + h/2)])
# Other numbers are the classes of rectangles related to keypoints.
# After convertion, numbers of keypoint classes should start with 0, so we apply rectangles2keypoints dictionary to achieve that.
# In our case:
# 1 is rectangle for head keypoint, which is 0, so we convert 1 to 0;
# 2 is rectangle for tail keypoint, which is 1, so we convert 2 to 1.
if line[0] != 0:
kp_id, x_c, y_c = rectangles2keypoints[line[0]], round(line[1] * img_w), round(line[2] * img_h)
keypoints.append([kp_id, x_c, y_c])
# In this loop we are iterating over each keypoint and looking to which bounding box it matches.
# Thus, we are matching keypoints and corresponding bounding boxes.
keypoints_sorted = [[[] for _ in keypoint_names] for _ in bboxes]
for kp in keypoints:
kp_id, kp_x, kp_y = kp[0], kp[1], kp[2]
for bbox_idx, bbox in enumerate(bboxes):
x1, y1, x2, y2 = bbox[0], bbox[1], bbox[2], bbox[3]
if x1 < kp_x < x2 and y1 < kp_y < y2:
keypoints_sorted[bbox_idx][kp_id] = [kp_x, kp_y, 1] # All keypoints are visible
return bboxes, keypoints_sorted
enter image description here
i ma trying to make an automatic annotiation tool for yolo object detection which useses previosly trained model to find the detections , and i managed to put together some code but i am stuck a little, as far as i know this needs to be the annotation format for YOLO:
18 0.154167 0.431250 0.091667 0.612500
And with my code i get
0.5576068858305613, 0.5410404056310654, -0.7516528169314066, 0.33822181820869446
I am not sure why i get the - at the third number and if i need to shorten my float number,
I will post the code below if someone could help me , after completing this project i will post the whole code if someone wants to use it
def convert(size, box):
dw = 1./size[0]
dh = 1./size[1]
x = (box[0] + box[1])/2.0
y = (box[2] + box[3])/2.0
w = box[1] - box[0]
h = box[3] - box[2]
x = x*dw
w = w*dw
y = y*dh
h = h*dh
return (x,y,w,h)
The above code is the function that converts the coordinates for YOLO format , For the size you need to pass the (w,h) and the for the box you need to pass (x,x+w, y, y+h)
net = cv2.dnn.readNetFromDarknet(config_path, weights_path)
# path_name = "images/city_scene.jpg"
path_name = image
image = cv2.imread(path_name)
file_name = os.path.basename(path_name)
filename, ext = file_name.split(".")
h, w = image.shape[:2]
# create 4D blob
blob = cv2.dnn.blobFromImage(image, 1 / 255.0, (416, 416), swapRB=True, crop=False)
# sets the blob as the input of the network
net.setInput(blob)
# get all the layer names
ln = net.getLayerNames()
ln = [ln[i[0] - 1] for i in net.getUnconnectedOutLayers()]
# feed forward (inference) and get the network output
# measure how much it took in seconds
start = time.perf_counter()
layer_outputs = net.forward(ln)
time_took = time.perf_counter() - start
print(f"Time took: {time_took:.2f}s")
boxes, confidences, class_ids = [], [], []
b=[]
a=[]
# loop over each of the layer outputs
for output in layer_outputs:
# loop over each of the object detections
for detection in output:
# extract the class id (label) and confidence (as a probability) of
# the current object detection
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
# discard weak predictions by ensuring the detected
# probability is greater than the minimum probability
if confidence > CONFIDENCE:
# scale the bounding box coordinates back relative to the
# size of the image, keeping in mind that YOLO actually
# returns the center (x, y)-coordinates of the bounding
# box followed by the boxes' width and height
box = detection[0:4] * np.array([w, h, w, h])
(centerX, centerY, width, height) = box.astype("float")
# use the center (x, y)-coordinates to derive the top and
# and left corner of the bounding box
x = int(centerX - (width / 2))
y = int(centerY - (height / 2))
a = w, h
convert(a, box)
boxes.append([x, y, int(width), int(height)])
confidences.append(float(confidence))
class_ids.append(class_id)
idxs = cv2.dnn.NMSBoxes(boxes, confidences, SCORE_THRESHOLD,
IOU_THRESHOLD)
font_scale = 1
thickness = 1
# ensure at least one detection exists
if len(idxs) > 0:
# loop over the indexes we are keeping
for i in idxs.flatten():
# extract the bounding box coordinates
x, y = boxes[i][0], boxes[i][1]
w, h = boxes[i][2], boxes[i][3]
# draw a bounding box rectangle and label on the image
color = [int(c) for c in colors[class_ids[i]]]
ba=w,h
print(w,h)
cv2.rectangle(image, (x, y), (x + w, y + h), color=color, thickness=thickness)
text = "{}".format(labels[class_ids[i]])
conf = "{:.3f}".format(confidences[i], x, y)
int1, int2 = (x, y)
print(text)
#print(convert(ba, box))
#b=w,h
#print(convert(b, boxes))
#print(convert(a, box)) #coordinates
ivan = str(int1)
b.append([text, ivan])
#a.append(float(conf))
#print(a)
# calculate text width & height to draw the transparent boxes as background of the text
(text_width, text_height) = \
cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, fontScale=font_scale, thickness=thickness)[0]
text_offset_x = x
text_offset_y = y - 5
box_coords = ((text_offset_x, text_offset_y), (text_offset_x + text_width + 2, text_offset_y - text_height))
overlay = image.copy()
cv2.rectangle(overlay, box_coords[0], box_coords[1], color=color, thickness=cv2.FILLED)
# add opacity (transparency to the box)
image = cv2.addWeighted(overlay, 0.6, image, 0.4, 0)
# now put the text (label: confidence %)
cv2.putText(image, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX,
fontScale=font_scale, color=(0, 0, 0), thickness=thickness)
text = "{}".format(labels[class_ids[i]],x,y)
conf = "{:.3f}".format(confidences[i])
the problem is the indexes in your function.
box[0]=>center x
box[1]=>center y
box[2]=>width of your bbox
box[3]=>height of your bbox
and according to the document, yolo labels are like this :
<object-class> <x> <y> <width> <height>
which x and y are the center of the bounding box.so your code should be like this :
def convert(size, box):
dw = 1./size[0]
dh = 1./size[1]
x = box[0]*dw
y = box[1]*dh
w = box[2]*dw
h = box[3]*dh
return (x,y,w,h)
Maybe this can help you
def bounding_box_2_yolo(obj_detections, frame, index):
yolo_info = []
for object_det in obj_detections:
left_x, top_y, right_x, bottom_y = object_det.boxes
xmin = left_x
xmax = right_x
ymin = top_y
ymax = bottom_y
xcen = float((xmin + xmax)) / 2 / frame.shape[1]
ycen = float((ymin + ymax)) / 2 / frame.shape[0]
w = float((xmax - xmin)) / frame.shape[1]
h = float((ymax - ymin)) / frame.shape[0]
yolo_info.append((index, xcen, ycen, w, h))
return yolo_info
The labelimg has a lot of things that you can use too
https://github.com/tzutalin/labelImg/blob/master/libs/yolo_io.py
For an assignment I want to resize a .jpg image with a python code, but without using the pil.image.resize() function or another similar function. I want to write the code myself but I can't figure out how. The image is RGB. I have found this can be solved by nearest neighbor interpolation (as well as other methods but this one is fine for my specific assignment). The height and the width should both be able to be made bigger or smaller. So far I only have this:
import numpy as np
import scipy as sc
import matplotlib as plt
import math
import PIL
from PIL import Image
img = np.array(Image.open("foto1.jpg"))
height = img.shape[0]
width = img.shape[1]
dim = img.shape[2]
new_h = int(input("New height: "))
new_w = int(input("New width: "))
imgR = img[:,:,0] #red pixels
imgG = img[:,:,1] #green pixels
imgB = img[:,:,2] #blue pixels
newR = np.empty([new_h, new_w])
newG = np.empty([new_h, new_w])
newB = np.empty([new_h, new_w])
So now all three colours have a new array of the right dimensions. Unfortunately on the web I can only find people who use resize() functions... Does anyone know?
Thank in advance!
The key to doing any image transformation like resizing is to have a mapping from output coordinates to input coordinates. Then you can simply iterate over the entire output and grab a pixel from the input. Nearest neighbor makes this particularly easy, because there's never a need to interpolate a pixel that doesn't lie exactly on integer coordinates - you simply round the coordinates to the nearest integer.
for new_y in range(new_h):
old_y = int(round(new_y * (new_h - 1) / (height - 1)))
if old_y < 0: old_y = 0
if old_y >= height: old_y = height - 1
for new_x in range(new_w):
old_x = int(round(new_x * (new_w - 1) / (width - 1)))
if old_x < 0: old_x = 0
if old_x >= width: old_x = width - 1
newR[new_y,new_x] = imgR[old_y,old_x]
newG[new_y,new_x] = imgG[old_y,old_x]
newB[new_y,new_x] = imgB[old_y,old_x]
The following code could do the trick.
def resize_img(image, resize_width, resize_height):
"""
:params
image: shape -> (width, height, channels)
resize_width: The resize width dimension.
resize_height: The resize height dimension.
:returns
array of shape -> (resized_width, resized_height, channels)
"""
original_width, original_height, channel = image.shape
red_channel = image[:, :, 0]
green_channel = image[:, :, 1]
blue_channel = image[:, :, 2]
resized_image = np.zeros((resize_width, resize_height, channel), dtype=np.uint8)
x_scale = original_width/resize_width
y_scale = original_height/resize_height
resize_idx = np.zeros((resize_width, resize_height))
resize_index_x = np.ceil(np.arange(0, original_width, x_scale)).astype(int)
resize_index_y = np.ceil(np.arange(0, original_height, y_scale)).astype(int)
resize_index_x[np.where(resize_index_x == original_width)] -= 1
resize_index_y[np.where(resize_index_y == original_height)] -= 1
resized_image[:, :, 0] = red_channel[resize_index_x, :][:, resize_index_y]
resized_image[:, :, 1] = green_channel[resize_index_x, :][:, resize_index_y]
resized_image[:, :, 2] = blue_channel[resize_index_x, :][:, resize_index_y]
return resized_image
I implemented the following code, to match nodes in a plant, using as template a small cropped image.
img_rgb = cv2.imread('Exp.2 - Florestópolis, 35DAE, 2017-2018, T4, R2, P4.jpg')
img_rgb = cv2.medianBlur(img_rgb, 7)
img_gray = cv2.cvtColor(img_rgb, cv2.COLOR_BGR2GRAY)
template_image = cv2.imread('TemplateNode.jpg',0)
template_image = cv2.medianBlur(template_image, 5)
width, height = template_image.shape[::-1]
res = cv2.matchTemplate(img_gray, template_image, cv2.TM_CCOEFF_NORMED)
threshold = 0.6
locations = np.where(res >= threshold)
for position_tuple in zip(*locations[::-1]):
cv2.rectangle(img_rgb, position_tuple, (position_tuple[0] + width, position_tuple[1] + height), (0,255,0), 1)
However, it generates too many bounding boxes (tuples), around a same location, as show:
So, there is a work around to solve this issue?
Here is one possible approach. Code is unlikely an efficient one. I think k-means clustering from some package may work better. Idea is to group together locations, which are too close:
def group_locations(locations, min_radius):
x = locations[:, 0][ : , None]
dist_x = x - x.T
y = locations[:, 1][ : , None]
dist_y = y - y.T
dist = np.sqrt(dist_x**2 + dist_y**2)
np.fill_diagonal(dist, min_radius+1)
too_close = np.nonzero(dist <= min_radius)
groups = []
points = np.arange(len(locations))
i = 0
j = 0
while i < len(points):
groups.append([points[i]])
for j in range(len(too_close[0])):
if too_close[0][j] == points[i]:
groups[i].append(too_close[1][j])
points = np.delete(points, np.nonzero(points == too_close[1][j]))
i += 1
new_locations = []
for group in groups:
new_locations.append(np.mean(locations[group], axis=0))
return np.array(new_locations)
So you take your locations and group them before plotting:
locations = []
size = 600
for _ in range(50):
locations.append((random.randint(0, size), random.randint(0, size)))
locations = np.array(locations)
min_radius = 50
new_locations = group_locations(locations, min_radius)
#I run it second time as sometimes locations form chains which are longer than radius
new_locations = group_locations(new_locations, min_radius)
plt.scatter(locations[:, 0], locations[:, 1], c='blue', label='Original locations')
plt.scatter(new_locations[:, 0], new_locations[:, 1], c='red', marker='x', label='New grouped locations')
plt.legend()
plt.show()
actually tried it with image provided
img_rgb = cv2.imread('obsvu.jpg')
img_rgb = cv2.medianBlur(img_rgb, 7)
img_gray = cv2.cvtColor(img_rgb, cv2.COLOR_BGR2GRAY)
template_image = cv2.imread('template.jpg',0)
template_image = cv2.medianBlur(template_image, 5)
width, height = template_image.shape[::-1]
res = cv2.matchTemplate(img_gray, template_image, cv2.TM_CCOEFF_NORMED)
threshold = 0.6
locations = np.where(res >= threshold)
new_locations = group_locations(np.array(locations).T, 50).T
for position_tuple in zip(*new_locations.astype(int)[::-1]):
cv2.rectangle(img_rgb, position_tuple, (position_tuple[0] + width, position_tuple[1] + height), (0,255,0), 5)
Original locations: 723
New locations: 6 (yep, template selection not the best)
In tensorflow, I would like to rotate an image from a random angle, for data augmentation. But I don't find this transformation in the tf.image module.
This can be done in tensorflow now:
tf.contrib.image.rotate(images, degrees * math.pi / 180, interpolation='BILINEAR')
Because I wanted to be able to rotate tensors I came up with the following piece of code, which rotates a [height, width, depth] tensor by a given angle:
def rotate_image_tensor(image, angle, mode='black'):
"""
Rotates a 3D tensor (HWD), which represents an image by given radian angle.
New image has the same size as the input image.
mode controls what happens to border pixels.
mode = 'black' results in black bars (value 0 in unknown areas)
mode = 'white' results in value 255 in unknown areas
mode = 'ones' results in value 1 in unknown areas
mode = 'repeat' keeps repeating the closest pixel known
"""
s = image.get_shape().as_list()
assert len(s) == 3, "Input needs to be 3D."
assert (mode == 'repeat') or (mode == 'black') or (mode == 'white') or (mode == 'ones'), "Unknown boundary mode."
image_center = [np.floor(x/2) for x in s]
# Coordinates of new image
coord1 = tf.range(s[0])
coord2 = tf.range(s[1])
# Create vectors of those coordinates in order to vectorize the image
coord1_vec = tf.tile(coord1, [s[1]])
coord2_vec_unordered = tf.tile(coord2, [s[0]])
coord2_vec_unordered = tf.reshape(coord2_vec_unordered, [s[0], s[1]])
coord2_vec = tf.reshape(tf.transpose(coord2_vec_unordered, [1, 0]), [-1])
# center coordinates since rotation center is supposed to be in the image center
coord1_vec_centered = coord1_vec - image_center[0]
coord2_vec_centered = coord2_vec - image_center[1]
coord_new_centered = tf.cast(tf.pack([coord1_vec_centered, coord2_vec_centered]), tf.float32)
# Perform backward transformation of the image coordinates
rot_mat_inv = tf.dynamic_stitch([[0], [1], [2], [3]], [tf.cos(angle), tf.sin(angle), -tf.sin(angle), tf.cos(angle)])
rot_mat_inv = tf.reshape(rot_mat_inv, shape=[2, 2])
coord_old_centered = tf.matmul(rot_mat_inv, coord_new_centered)
# Find nearest neighbor in old image
coord1_old_nn = tf.cast(tf.round(coord_old_centered[0, :] + image_center[0]), tf.int32)
coord2_old_nn = tf.cast(tf.round(coord_old_centered[1, :] + image_center[1]), tf.int32)
# Clip values to stay inside image coordinates
if mode == 'repeat':
coord_old1_clipped = tf.minimum(tf.maximum(coord1_old_nn, 0), s[0]-1)
coord_old2_clipped = tf.minimum(tf.maximum(coord2_old_nn, 0), s[1]-1)
else:
outside_ind1 = tf.logical_or(tf.greater(coord1_old_nn, s[0]-1), tf.less(coord1_old_nn, 0))
outside_ind2 = tf.logical_or(tf.greater(coord2_old_nn, s[1]-1), tf.less(coord2_old_nn, 0))
outside_ind = tf.logical_or(outside_ind1, outside_ind2)
coord_old1_clipped = tf.boolean_mask(coord1_old_nn, tf.logical_not(outside_ind))
coord_old2_clipped = tf.boolean_mask(coord2_old_nn, tf.logical_not(outside_ind))
coord1_vec = tf.boolean_mask(coord1_vec, tf.logical_not(outside_ind))
coord2_vec = tf.boolean_mask(coord2_vec, tf.logical_not(outside_ind))
coord_old_clipped = tf.cast(tf.transpose(tf.pack([coord_old1_clipped, coord_old2_clipped]), [1, 0]), tf.int32)
# Coordinates of the new image
coord_new = tf.transpose(tf.cast(tf.pack([coord1_vec, coord2_vec]), tf.int32), [1, 0])
image_channel_list = tf.split(2, s[2], image)
image_rotated_channel_list = list()
for image_channel in image_channel_list:
image_chan_new_values = tf.gather_nd(tf.squeeze(image_channel), coord_old_clipped)
if (mode == 'black') or (mode == 'repeat'):
background_color = 0
elif mode == 'ones':
background_color = 1
elif mode == 'white':
background_color = 255
image_rotated_channel_list.append(tf.sparse_to_dense(coord_new, [s[0], s[1]], image_chan_new_values,
background_color, validate_indices=False))
image_rotated = tf.transpose(tf.pack(image_rotated_channel_list), [1, 2, 0])
return image_rotated
for tensorflow 2.0:
import tensorflow_addons as tfa
tfa.image.transform_ops.rotate(image, radian)
Rotation and cropping in TensorFlow
I personally needed image rotation and cropping out black borders functions in TensorFlow as below.
And I could implement this function as below.
def _rotate_and_crop(image, output_height, output_width, rotation_degree, do_crop):
"""Rotate the given image with the given rotation degree and crop for the black edges if necessary
Args:
image: A `Tensor` representing an image of arbitrary size.
output_height: The height of the image after preprocessing.
output_width: The width of the image after preprocessing.
rotation_degree: The degree of rotation on the image.
do_crop: Do cropping if it is True.
Returns:
A rotated image.
"""
# Rotate the given image with the given rotation degree
if rotation_degree != 0:
image = tf.contrib.image.rotate(image, math.radians(rotation_degree), interpolation='BILINEAR')
# Center crop to ommit black noise on the edges
if do_crop == True:
lrr_width, lrr_height = _largest_rotated_rect(output_height, output_width, math.radians(rotation_degree))
resized_image = tf.image.central_crop(image, float(lrr_height)/output_height)
image = tf.image.resize_images(resized_image, [output_height, output_width], method=tf.image.ResizeMethod.BILINEAR, align_corners=False)
return image
def _largest_rotated_rect(w, h, angle):
"""
Given a rectangle of size wxh that has been rotated by 'angle' (in
radians), computes the width and height of the largest possible
axis-aligned rectangle within the rotated rectangle.
Original JS code by 'Andri' and Magnus Hoff from Stack Overflow
Converted to Python by Aaron Snoswell
Source: http://stackoverflow.com/questions/16702966/rotate-image-and-crop-out-black-borders
"""
quadrant = int(math.floor(angle / (math.pi / 2))) & 3
sign_alpha = angle if ((quadrant & 1) == 0) else math.pi - angle
alpha = (sign_alpha % math.pi + math.pi) % math.pi
bb_w = w * math.cos(alpha) + h * math.sin(alpha)
bb_h = w * math.sin(alpha) + h * math.cos(alpha)
gamma = math.atan2(bb_w, bb_w) if (w < h) else math.atan2(bb_w, bb_w)
delta = math.pi - alpha - gamma
length = h if (w < h) else w
d = length * math.cos(alpha)
a = d * math.sin(alpha) / math.sin(delta)
y = a * math.cos(gamma)
x = y * math.tan(gamma)
return (
bb_w - 2 * x,
bb_h - 2 * y
)
If you need further implementation of example and visualization in TensorFlow, you can use this repository.
I hope this could be helpful to other people.
Update: see #astromme's answer below. Tensorflow now supports rotating images natively.
What you can do while there is no native method in tensorflow is something like this:
from PIL import Image
sess = tf.InteractiveSession()
# Pass image tensor object to a PIL image
image = Image.fromarray(image.eval())
# Use PIL or other library of the sort to rotate
rotated = Image.Image.rotate(image, degrees)
# Convert rotated image back to tensor
rotated_tensor = tf.convert_to_tensor(np.array(rotated))
tf.contrib is not available in tensorflow 2.
For tensorflow >= 2.* the following can be used:
tf.keras.preprocessing.image.random_rotation(x, rg, row_axis=1,col_axis=2, channel_axis=0,fill_mode='nearest', cval=0., interpolation_order=1);
you can find the documantation here:
https://www.tensorflow.org/api_docs/python/tf/keras/preprocessing/image/random_rotation
Here's the #zimmermc answer updated to Tensorflow v0.12
Changes:
pack() is now stack()
order of split parameters reversed
def rotate_image_tensor(image, angle, mode='white'):
"""
Rotates a 3D tensor (HWD), which represents an image by given radian angle.
New image has the same size as the input image.
mode controls what happens to border pixels.
mode = 'black' results in black bars (value 0 in unknown areas)
mode = 'white' results in value 255 in unknown areas
mode = 'ones' results in value 1 in unknown areas
mode = 'repeat' keeps repeating the closest pixel known
"""
s = image.get_shape().as_list()
assert len(s) == 3, "Input needs to be 3D."
assert (mode == 'repeat') or (mode == 'black') or (mode == 'white') or (mode == 'ones'), "Unknown boundary mode."
image_center = [np.floor(x/2) for x in s]
# Coordinates of new image
coord1 = tf.range(s[0])
coord2 = tf.range(s[1])
# Create vectors of those coordinates in order to vectorize the image
coord1_vec = tf.tile(coord1, [s[1]])
coord2_vec_unordered = tf.tile(coord2, [s[0]])
coord2_vec_unordered = tf.reshape(coord2_vec_unordered, [s[0], s[1]])
coord2_vec = tf.reshape(tf.transpose(coord2_vec_unordered, [1, 0]), [-1])
# center coordinates since rotation center is supposed to be in the image center
coord1_vec_centered = coord1_vec - image_center[0]
coord2_vec_centered = coord2_vec - image_center[1]
coord_new_centered = tf.cast(tf.stack([coord1_vec_centered, coord2_vec_centered]), tf.float32)
# Perform backward transformation of the image coordinates
rot_mat_inv = tf.dynamic_stitch([[0], [1], [2], [3]], [tf.cos(angle), tf.sin(angle), -tf.sin(angle), tf.cos(angle)])
rot_mat_inv = tf.reshape(rot_mat_inv, shape=[2, 2])
coord_old_centered = tf.matmul(rot_mat_inv, coord_new_centered)
# Find nearest neighbor in old image
coord1_old_nn = tf.cast(tf.round(coord_old_centered[0, :] + image_center[0]), tf.int32)
coord2_old_nn = tf.cast(tf.round(coord_old_centered[1, :] + image_center[1]), tf.int32)
# Clip values to stay inside image coordinates
if mode == 'repeat':
coord_old1_clipped = tf.minimum(tf.maximum(coord1_old_nn, 0), s[0]-1)
coord_old2_clipped = tf.minimum(tf.maximum(coord2_old_nn, 0), s[1]-1)
else:
outside_ind1 = tf.logical_or(tf.greater(coord1_old_nn, s[0]-1), tf.less(coord1_old_nn, 0))
outside_ind2 = tf.logical_or(tf.greater(coord2_old_nn, s[1]-1), tf.less(coord2_old_nn, 0))
outside_ind = tf.logical_or(outside_ind1, outside_ind2)
coord_old1_clipped = tf.boolean_mask(coord1_old_nn, tf.logical_not(outside_ind))
coord_old2_clipped = tf.boolean_mask(coord2_old_nn, tf.logical_not(outside_ind))
coord1_vec = tf.boolean_mask(coord1_vec, tf.logical_not(outside_ind))
coord2_vec = tf.boolean_mask(coord2_vec, tf.logical_not(outside_ind))
coord_old_clipped = tf.cast(tf.transpose(tf.stack([coord_old1_clipped, coord_old2_clipped]), [1, 0]), tf.int32)
# Coordinates of the new image
coord_new = tf.transpose(tf.cast(tf.stack([coord1_vec, coord2_vec]), tf.int32), [1, 0])
image_channel_list = tf.split(image, s[2], 2)
image_rotated_channel_list = list()
for image_channel in image_channel_list:
image_chan_new_values = tf.gather_nd(tf.squeeze(image_channel), coord_old_clipped)
if (mode == 'black') or (mode == 'repeat'):
background_color = 0
elif mode == 'ones':
background_color = 1
elif mode == 'white':
background_color = 255
image_rotated_channel_list.append(tf.sparse_to_dense(coord_new, [s[0], s[1]], image_chan_new_values,
background_color, validate_indices=False))
image_rotated = tf.transpose(tf.stack(image_rotated_channel_list), [1, 2, 0])
return image_rotated
For rotating an image or a batch of images counter-clockwise by multiples of 90 degrees, you can use tf.image.rot90(image,k=1,name=None).
k denotes the number of 90 degrees rotations you want to make.
In case of a single image, image is a 3-D Tensor of shape [height, width, channels] and in case of a batch of images, image is a 4-D Tensor of shape [batch, height, width, channels]