Multiple occurrences of bounding boxes around a same location - python

I implemented the following code, to match nodes in a plant, using as template a small cropped image.
img_rgb = cv2.imread('Exp.2 - Florestópolis, 35DAE, 2017-2018, T4, R2, P4.jpg')
img_rgb = cv2.medianBlur(img_rgb, 7)
img_gray = cv2.cvtColor(img_rgb, cv2.COLOR_BGR2GRAY)
template_image = cv2.imread('TemplateNode.jpg',0)
template_image = cv2.medianBlur(template_image, 5)
width, height = template_image.shape[::-1]
res = cv2.matchTemplate(img_gray, template_image, cv2.TM_CCOEFF_NORMED)
threshold = 0.6
locations = np.where(res >= threshold)
for position_tuple in zip(*locations[::-1]):
cv2.rectangle(img_rgb, position_tuple, (position_tuple[0] + width, position_tuple[1] + height), (0,255,0), 1)
However, it generates too many bounding boxes (tuples), around a same location, as show:
So, there is a work around to solve this issue?

Here is one possible approach. Code is unlikely an efficient one. I think k-means clustering from some package may work better. Idea is to group together locations, which are too close:
def group_locations(locations, min_radius):
x = locations[:, 0][ : , None]
dist_x = x - x.T
y = locations[:, 1][ : , None]
dist_y = y - y.T
dist = np.sqrt(dist_x**2 + dist_y**2)
np.fill_diagonal(dist, min_radius+1)
too_close = np.nonzero(dist <= min_radius)
groups = []
points = np.arange(len(locations))
i = 0
j = 0
while i < len(points):
groups.append([points[i]])
for j in range(len(too_close[0])):
if too_close[0][j] == points[i]:
groups[i].append(too_close[1][j])
points = np.delete(points, np.nonzero(points == too_close[1][j]))
i += 1
new_locations = []
for group in groups:
new_locations.append(np.mean(locations[group], axis=0))
return np.array(new_locations)
So you take your locations and group them before plotting:
locations = []
size = 600
for _ in range(50):
locations.append((random.randint(0, size), random.randint(0, size)))
locations = np.array(locations)
min_radius = 50
new_locations = group_locations(locations, min_radius)
#I run it second time as sometimes locations form chains which are longer than radius
new_locations = group_locations(new_locations, min_radius)
plt.scatter(locations[:, 0], locations[:, 1], c='blue', label='Original locations')
plt.scatter(new_locations[:, 0], new_locations[:, 1], c='red', marker='x', label='New grouped locations')
plt.legend()
plt.show()
actually tried it with image provided
img_rgb = cv2.imread('obsvu.jpg')
img_rgb = cv2.medianBlur(img_rgb, 7)
img_gray = cv2.cvtColor(img_rgb, cv2.COLOR_BGR2GRAY)
template_image = cv2.imread('template.jpg',0)
template_image = cv2.medianBlur(template_image, 5)
width, height = template_image.shape[::-1]
res = cv2.matchTemplate(img_gray, template_image, cv2.TM_CCOEFF_NORMED)
threshold = 0.6
locations = np.where(res >= threshold)
new_locations = group_locations(np.array(locations).T, 50).T
for position_tuple in zip(*new_locations.astype(int)[::-1]):
cv2.rectangle(img_rgb, position_tuple, (position_tuple[0] + width, position_tuple[1] + height), (0,255,0), 5)
Original locations: 723
New locations: 6 (yep, template selection not the best)

Related

How can i block wrong lines into houghspace

I found hough line implemantation in github.
And I try this code my computer.
When I plot hough space with matplot, there is build-up at shown in picture.
This cause wrong lines detection in image.
def hough_line(img, angle_step=1, lines_are_white=True, value_threshold=5):
# Rho and Theta ranges
thetas = np.deg2rad(np.arange(-90.0, 90.0, angle_step))
width, height = img.shape
diag_len = int(round(math.sqrt(width * width + height * height)))
rhos = np.linspace(-diag_len, diag_len, diag_len * 2)
# Cache some resuable values
cos_t = np.cos(thetas)
sin_t = np.sin(thetas)
num_thetas = len(thetas)
# Hough accumulator array of theta vs rho
accumulator = np.zeros((2 * diag_len, num_thetas), dtype=np.uint8)
# (row, col) indexes to edges
are_edges = img > value_threshold if lines_are_white else img < value_threshold
y_idxs, x_idxs = np.nonzero(are_edges)
# Vote in the hough accumulator
for i in range(len(x_idxs)):
x = x_idxs[i]
y = y_idxs[i]
for t_idx in range(num_thetas):
# Calculate rho. diag_len is added for a positive index
rho = diag_len + int(round(x * cos_t[t_idx] + y * sin_t[t_idx]))
accumulator[rho, t_idx] += 1
return accumulator, thetas, rhos
def show_hough_line(img, accumulator, thetas, rhos, save_path=None):
import matplotlib.pyplot as plt
fig, ax = plt.subplots(1, 2, figsize=(10, 10))
ax[0].imshow(img, cmap=plt.cm.gray)
ax[0].set_title('Input image')
ax[0].axis('image')
ax[1].imshow(
accumulator, cmap='jet',
extent=[np.rad2deg(thetas[-1]), np.rad2deg(thetas[0]), rhos[-1], rhos[0]])
ax[1].set_aspect('equal', adjustable='box')
ax[1].set_title('Hough transform')
ax[1].set_xlabel('Angles (degrees)')
ax[1].set_ylabel('Distance (pixels)')
ax[1].axis('image')
# plt.axis('off')
if save_path is not None:
plt.savefig(save_path, bbox_inches='tight')
plt.show()
And I run this code,
img = cv2.imread('05102009081.png')
img_dark = cv2.imread('05102009081-1.png')
img1 = cv2.bitwise_and(img, img_dark)
gray = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (5, 5), 0)
edges = cv2.Canny(blur, 100, 200)
accumulator, thetas, rhos = hough_line(edges)
#Thresholding with 100
a = (accumulator > 100).astype(int)
accumulator = accumulator * a
show_hough_line(edges, accumulator, thetas, rhos)
This is result without thresholding
This is result after thresholding
As you see when i apply thresholding this edges, There is some peak point approximatly 55. degree and between 10-50. pixels in the hough space, This cause wrong lines in image.
What is the problem ?
How can i solve this problem ?
Thanks in advance.

I want to add 4 circles at different random spots, in a given image (python) but the code does not work after adding one

I want to add circles at random spots on an image. The code adds the circle to the image
Attached is the code that I tried:
img = np.zeros([100,100],dtype=np.uint8)
img.fill(20)
def createCircle(width,height , rad ):
w = random.randint(1, height)
h = random.randint(1, height)
center = [int(w), int(h)]
radius = rad
Y, X = np.ogrid[:height, :width]
dist_from_center = np.sqrt((X - center[0])**2 + (Y-center[1])**2)
mask = dist_from_center <= radius
return mask
def addCircle(test_image):
m = createCircle(width = 100, height = 100 , rad = 8 )
masked_img = test_image.copy()
masked_img[~m] = 0
return masked_img
im = addCircle(test_image=img)
plt.imshow(im)
plt.show()
im1 = addCircle(test_image = im)
plt.imshow(im1)
plt.show()
When i apply addCircle function on the image img, it adds the circle to the image but when I apply addCircle function to im it does not add another circle to the image.
I want to add 4 circles to the same image at random places on the image but as of now I am only able to add one circle, the code doesn't work after that.
As mentioned in the comments... Your problem seems to be in this line masked_img[~m] = 0.
def addCircle(test_image):
m = createCircle(width = 100, height = 100 , rad = 8 )
masked_img = test_image.copy()
masked_img[m] = 0
return masked_img
# im = addCircle(test_image=img)
# plt.imshow(im)
# plt.show()
for i in range(4):
img = addCircle(test_image=img)
plt.imshow(img)
plt.show()

Calculate slope, length and angle of a specific part / side / line on a contour?

I got two detected contours in an image and need the diameter between the two vertical-edges of the top contour and the diameter between the vertical-edges of the lower contour. I achieved this with this code.
import cv2
import numpy as np
import math, os
import imutils
img = cv2.imread("1.jpg")
gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
gray = cv2.GaussianBlur(gray, (7, 7), 0)
edges = cv2.Canny(gray, 200, 100)
edges = cv2.dilate(edges, None, iterations=1)
edges = cv2.erode(edges, None, iterations=1)
cnts = cv2.findContours(edges.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = imutils.grab_contours(cnts)
# sorting the contours to find the largest and smallest one
c1 = max(cnts, key=cv2.contourArea)
c2 = min(cnts, key=cv2.contourArea)
# determine the most extreme points along the contours
extLeft1 = tuple(c1[c1[:, :, 0].argmin()][0])
extRight1 = tuple(c1[c1[:, :, 0].argmax()][0])
extLeft2 = tuple(c2[c2[:, :, 0].argmin()][0])
extRight2 = tuple(c2[c2[:, :, 0].argmax()][0])
# show contour
cimg = cv2.drawContours(img, cnts, -1, (0,200,0), 2)
# set y of left point to y of right point
lst1 = list(extLeft1)
lst1[1] = extRight1[1]
extLeft1 = tuple(lst1)
lst2 = list(extLeft2)
lst2[1] = extRight2[1]
extLeft2= tuple(lst2)
# compute the distance between the points (x1, y1) and (x2, y2)
dist1 = math.sqrt( ((extLeft1[0]-extRight1[0])**2)+((extLeft1[1]-extRight1[1])**2) )
dist2 = math.sqrt( ((extLeft2[0]-extRight2[0])**2)+((extLeft2[1]-extRight2[1])**2) )
# draw lines
cv2.line(cimg, extLeft1, extRight1, (255,0,0), 1)
cv2.line(cimg, extLeft2, extRight2, (255,0,0), 1)
# draw the distance text
font = cv2.FONT_HERSHEY_SIMPLEX
fontScale = 0.5
fontColor = (255,0,0)
lineType = 1
cv2.putText(cimg,str(dist1),(155,100),font, fontScale, fontColor, lineType)
cv2.putText(cimg,str(dist2),(155,280),font, fontScale, fontColor, lineType)
# show image
cv2.imshow("Image", img)
cv2.waitKey(0)
Now I would also need the angle of the slope lines on the bottom side of the upper contour.
Any ideas how I can get this? Is it possible using contours?
Or is it necessary to use HoughLinesP and sort the regarding lines somehow?
And continued question: Maybe its also possible to get function which describes parabola slope of that sides ?
Thanks alot for any help!
There are several ways to obtain just the slopes. In order to know the slope, we can can use cv2.HoughLines to detect the bottom horizontal line, detect to end points of that line and from those, obtain the slopes. As an illustration,
lines = cv2.HoughLines(edges, rho=1, theta=np.pi/180, threshold=int(dist2*0.66) )
on edges in your code gives 4 lines, and if we force the angle to be horizontal
for line in lines:
rho, theta = line[0]
# here we filter out non-horizontal lines
if abs(theta - np.pi/2) > np.pi/180:
continue
a = np.cos(theta)
b = np.sin(theta)
x0 = a*rho
y0 = b*rho
x1 = int(x0 + 1000*(-b))
y1 = int(y0 + 1000*(a))
x2 = int(x0 - 1000*(-b))
y2 = int(y0 - 1000*(a))
cv2.line(img_lines,(x1,y1),(x2,y2),(0,0,255),1)
we get:
For the extended question concerns with the parabolas, we first compose a function that returns the left and right points:
def horizontal_scan(gray_img, thresh=50, start=50):
'''
scan horizontally for left and right points until we met an all-background line
#param thresh: threshold for background pixel
#param start: y coordinate to start scanning
'''
ret = []
thickness = 0
for i in range(start,len(gray_img)):
row = gray_img[i]
# scan for left:
left = 0
while left < len(row) and row[left]<thresh:
left += 1
if left==len(row):
break;
# scan for right:
right = left
while right < len(row) and row[right] >= thresh:
right+=1
if thickness == 0:
thickness = right - left
# prevent sudden drop, error/noise
if (right-left) < thickness//5:
continue
else:
thickness = right - left
ret.append((i,left,right))
return ret
# we start scanning from extLeft1 down until we see a blank line
# with some tweaks, we can make horizontal_scan run on edges,
# which would be simpler and faster
horizontal_lines = horizontal_scan(gray, start = extLeft1[1])
# check if horizontal_line[0] are closed to extLeft1 and extRight1
print(horizontal_lines[0], extLeft1, extRight1[0])
Note that we can use this function to find the end points of the horizontal line returned by HoughLines.
# last line of horizontal_lines would be the points we need:
upper_lowest_y, upper_lowest_left, upper_lowest_right = horizontal_lines[-1]
img_lines = img.copy()
cv2.line(img_lines, (upper_lowest_left, upper_lowest_y), extLeft1, (0,0,255), 1)
cv2.line(img_lines, (upper_lowest_right, upper_lowest_y), extRight1, (0,0,255),1)
and that gives:
Let's return to the extended question, where we have those left and right points:
left_points = [(x,y) for y,x,_ in horizontal_lines]
right_points = [(x,y) for y,_,x in horizontal_lines]
Obviously, they would not fit perfectly in a parabola, so we need some sort of approximation/fitting here. For that, we can build a LinearRegression model:
from sklearn.linear_model import LinearRegression
class BestParabola:
def __init__(self, points):
x_x2 = np.array([(x**2,x) for x,_ in points])
ys = np.array([y for _,y in points])
self.lr = LinearRegression()
self.lr.fit(x_x2,ys)
self.a, self.b = self.lr.coef_
self.c = self.lr.intercept_
self.coef_ = (self.c,self.b,self.a)
def transform(self,points):
x_x2 = np.array([(x**2,x) for x,_ in points])
ys = self.lr.predict(x_x2)
return np.array([(x,y) for (_,x),y in zip(x_x2,ys)])
And then, we can fit the given left_points, right_points to get the desired parabolas:
# construct the approximate parabola
# the parabollas' coefficients are accessible by BestParabola.coef_
left_parabola = BestParabola(left_points)
right_parabola = BestParabola(right_points)
# get points for rendering
left_parabola_points = left_parabola.transform(left_points)
right_parabola_points = right_parabola.transform(right_points)
# render with matplotlib, cv2.drawContours would work
plt.figure(figsize=(8,8))
plt.imshow(cv2.cvtColor(img,cv2.COLOR_BGR2RGB))
plt.plot(left_parabola_points[:,0], left_parabola_points[:,1], linewidth=3)
plt.plot(right_parabola_points[:,0], right_parabola_points[:,1], linewidth=3, color='r')
plt.show()
Which gives:
The left parabola is not perfect, but you should work out that if need be :-)

How to keep records of multiple previous frames in video processing

I currently working on lane detection for autonomous vehicles using opencv and python. I have used houghline transform to get lines in the road. From those lines I have calculated the mean slope and intercept values separately for the lines which have negative and positive slopes. To reduce errors, I want to keep track of calculated mean slopes and intercepts of previous 3 frames. So that I can check the deviance of mean slope and intercept values of current frame and correct it accordingly. Is there a way to keep track of 3 previous frames ?
this is I have done up to now. This is for the current frame. I want to keep calculated mean values up to 3 previous frames and access those values form current frame.
import numpy as np
import cv2
import math
cap = cv2.VideoCapture('video3.mov')
while (cap.isOpened()):
ret, frame = cap.read()
hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HLS)
# HSL color mask
lower_white = np.uint8([0, 140, 0])
upper_white = np.uint8([255, 255, 255])
mask = cv2.inRange(hsv, lower_white, upper_white)
res = cv2.bitwise_and(frame, frame, mask=mask)
height = np.size(hsv, 0)
width = np.size(hsv, 1)
rows, cols = hsv.shape[:2]
bottom_left = [cols * 0, rows * 0.9]
top_left = [cols * 0.4, rows * 0.7]
bottom_right = [cols * 1, rows * 0.9]
top_right = [cols * 0.6, rows * 0.7]
vertices = np.array([[bottom_left, top_left, top_right, bottom_right]], dtype=np.int32)
maskzero = np.zeros_like(res)
cv2.fillPoly(maskzero, vertices, (255,) * maskzero.shape[2])
maskedimg = cv2.bitwise_and(res, maskzero)
# smoothed = cv2.medianBlur(maskedimg,3)
gaussian = cv2.GaussianBlur(maskedimg, (3, 3), 0)
# apply canny on masked image
cannymask = cv2.Canny(gaussian, 150, 250)
# apply hough transform houghlinesP
lines = cv2.HoughLinesP(cannymask, rho=1, theta=np.pi / 180, threshold=20, minLineLength=10, maxLineGap=300)
left_slope = []
right_slope = []
left_intercept = []
right_intercept = []
total_right_length = []
total_left_length = []
if lines is not None:
for line in lines:
for x1, y1, x2, y2 in line:
if (x2 - x1) != 0:
slope = (y2 - y1) / (x2 - x1)
intercept = y1 - slope * x1
length = np.sqrt((y2 - y1) ** 2 + (x2 - x1) ** 2)
angle = math.atan2(y2-y1,x2-x1)
degree = angle * 180 / np.pi
if x2 == x1 or y2 == y1:
continue
elif slope > 0:
if int(degree) in range (27,41):
right_slope.append(slope)
right_intercept.append(intercept)
total_right_length.append(length)
angle = math.atan2(y2 - y1, x2 - x1)
degree = angle * 180 / np.pi
print("positive",degree)
elif slope < 0:
if int(degree) in range (-62,-31):
left_slope.append(slope)
left_intercept.append(intercept)
total_left_length.append(length)
angle = math.atan2(y2 - y1, x2 - x1)
degree = angle * 180 / np.pi
print("negative",degree)
degreeint= int(degree)
#cv2.line(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
sum_right_length = np.sum(total_right_length) if len(total_right_length) > 0 else None
right_mean_slope=np.mean(right_slope) if len(right_slope)>0 else None
right_mean_intercept=np.mean(right_intercept) if len(right_intercept) > 0 else None
sum_left_length = np.sum(total_left_length) if len(total_left_length) > 0 else None
left_mean_slope = np.mean(left_slope) if len(left_slope)>0 else None
left_mean_intercept = np.mean(left_intercept) if len(left_intercept) > 0 else None
right_x1=0
right_y1=0
right_x2=0
right_y2=0
left_x1=0
left_x2=0
left_y2=0
left_y1=0
y1 = frame.shape[0] # bottom of the image
y2 = y1*0.7
if right_mean_intercept is not None and right_mean_slope is not None:
right_x1 = int((y1 - right_mean_intercept)/right_mean_slope)
right_x2 = int((y2 - right_mean_intercept)/right_mean_slope)
right_y1 = int(y1)
right_y2 = int(y2)
if left_mean_intercept is not None and left_mean_slope is not None:
left_x1 = int((y1 - left_mean_intercept)/left_mean_slope)
left_x2 = int((y2 - left_mean_intercept)/left_mean_slope)
left_y1 = int(y1)
left_y2 = int(y2)
cv2.line(frame, (right_x1, right_y1), (right_x2, right_y2), (0, 0, 255), 10)
cv2.line(frame, (left_x1, left_y1), (left_x2, left_y2), (255, 0,0), 10)
cv2.imshow("New_lines", frame)
if cv2.waitKey(100) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
I want to keep right and left mean slope and intercept values of 3 previous frames and access them from the current frame.
import numpy as np
import cv2
import math
cap = cv2.VideoCapture('video3.mov')
previous = [ ] # We will keep track of previous 3 frames in this list
while (cap.isOpened()):
ret, frame = cap.read()
hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HLS)
if len(previous) == 3 :
# do your thing like calculating the mean
for x in range(3):
print(previous[x]) # This is how you can access the previous frames
# ================================================================
# After you are done add the below two lines of code
# Below two lines are necessary to keep track of recent 3 frames
# ================================================================
previous = previous[1:] # Removing the first frame so now length of previous is 2
previous.append(frame) # Adding the current frame so length of previous is 3 again
else :
# else condition is for if frames are less then 3
# else condition is for the very beginning when you previous will start with empty list
previous.append(frame)
continue # If you want to do computation even when length of previous is less than 3 than comment the continue statement
if cv2.waitKey(100) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
import numpy as np
import cv2
import math
cap = cv2.VideoCapture('video3.mov')
prevFrames = [None,None,None] # avoid index mishaps for [-1]..[-3]
# you need to test for None when accessing
while (cap.isOpened()):
ret, frame = cap.read()
hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HLS)
# [...]
# access prevFrames[-1] for last frame,
# [-2] for the one before that,
# [-3] for the one 3 before now
# check value for None before using
sum_right_length = np.sum(total_right_length) if len(total_right_length) > 0 else None
right_mean_slope=np.mean(right_slope) if len(right_slope)>0 else None
right_mean_intercept=np.mean(right_intercept) if len(right_intercept) > 0 else None
sum_left_length = np.sum(total_left_length) if len(total_left_length) > 0 else None
left_mean_slope = np.mean(left_slope) if len(left_slope)>0 else None
left_mean_intercept = np.mean(left_intercept) if len(left_intercept) > 0 else None
# [...]
# add data from current frame at end
prevFrames.append( [right_mean_slope, left_mean_slope, right_mean_intercept, left_mean_intercept ]
if len(prevFrames) > 3:
prevFrames.pop(0) # remove and discard oldest element
cv2.line(frame, (right_x1, right_y1), (right_x2, right_y2), (0, 0, 255), 10)
cv2.line(frame, (left_x1, left_y1), (left_x2, left_y2), (255, 0,0), 10)
cv2.imshow("New_lines", frame)

overlay a smaller image on a larger image python OpenCv

Hi I am creating a program that replaces a face in a image with someone else's face. However, I am stuck on trying to insert the new face into the original, larger image. I have researched ROI and addWeight(needs the images to be the same size) but I haven't found a way to do this in python. Any advise is great. I am new to opencv.
I am using the following test images:
smaller_image:
larger_image:
Here is my Code so far... a mixer of other samples:
import cv2
import cv2.cv as cv
import sys
import numpy
def detect(img, cascade):
rects = cascade.detectMultiScale(img, scaleFactor=1.1, minNeighbors=3, minSize=(10, 10), flags = cv.CV_HAAR_SCALE_IMAGE)
if len(rects) == 0:
return []
rects[:,2:] += rects[:,:2]
return rects
def draw_rects(img, rects, color):
for x1, y1, x2, y2 in rects:
cv2.rectangle(img, (x1, y1), (x2, y2), color, 2)
if __name__ == '__main__':
if len(sys.argv) != 2: ## Check for error in usage syntax
print "Usage : python faces.py <image_file>"
else:
img = cv2.imread(sys.argv[1],cv2.CV_LOAD_IMAGE_COLOR) ## Read image file
if (img == None):
print "Could not open or find the image"
else:
cascade = cv2.CascadeClassifier("haarcascade_frontalface_alt.xml")
gray = cv2.cvtColor(img, cv.CV_BGR2GRAY)
gray = cv2.equalizeHist(gray)
rects = detect(gray, cascade)
## Extract face coordinates
x1 = rects[0][3]
y1 = rects[0][0]
x2 = rects[0][4]
y2 = rects[0][5]
y=y2-y1
x=x2-x1
## Extract face ROI
faceROI = gray[x1:x2, y1:y2]
## Show face ROI
cv2.imshow('Display face ROI', faceROI)
small = cv2.imread("average_face.png",cv2.CV_LOAD_IMAGE_COLOR)
print "here"
small=cv2.resize(small, (x, y))
cv2.namedWindow('Display image') ## create window for display
cv2.imshow('Display image', small) ## Show image in the window
print "size of image: ", img.shape ## print size of image
cv2.waitKey(1000)
A simple way to achieve what you want:
import cv2
s_img = cv2.imread("smaller_image.png")
l_img = cv2.imread("larger_image.jpg")
x_offset=y_offset=50
l_img[y_offset:y_offset+s_img.shape[0], x_offset:x_offset+s_img.shape[1]] = s_img
Update
I suppose you want to take care of the alpha channel too. Here is a quick and dirty way of doing so:
s_img = cv2.imread("smaller_image.png", -1)
y1, y2 = y_offset, y_offset + s_img.shape[0]
x1, x2 = x_offset, x_offset + s_img.shape[1]
alpha_s = s_img[:, :, 3] / 255.0
alpha_l = 1.0 - alpha_s
for c in range(0, 3):
l_img[y1:y2, x1:x2, c] = (alpha_s * s_img[:, :, c] +
alpha_l * l_img[y1:y2, x1:x2, c])
Using #fireant's idea, I wrote up a function to handle overlays. This works well for any position argument (including negative positions).
def overlay_image_alpha(img, img_overlay, x, y, alpha_mask):
"""Overlay `img_overlay` onto `img` at (x, y) and blend using `alpha_mask`.
`alpha_mask` must have same HxW as `img_overlay` and values in range [0, 1].
"""
# Image ranges
y1, y2 = max(0, y), min(img.shape[0], y + img_overlay.shape[0])
x1, x2 = max(0, x), min(img.shape[1], x + img_overlay.shape[1])
# Overlay ranges
y1o, y2o = max(0, -y), min(img_overlay.shape[0], img.shape[0] - y)
x1o, x2o = max(0, -x), min(img_overlay.shape[1], img.shape[1] - x)
# Exit if nothing to do
if y1 >= y2 or x1 >= x2 or y1o >= y2o or x1o >= x2o:
return
# Blend overlay within the determined ranges
img_crop = img[y1:y2, x1:x2]
img_overlay_crop = img_overlay[y1o:y2o, x1o:x2o]
alpha = alpha_mask[y1o:y2o, x1o:x2o, np.newaxis]
alpha_inv = 1.0 - alpha
img_crop[:] = alpha * img_overlay_crop + alpha_inv * img_crop
Example usage:
import numpy as np
from PIL import Image
# Prepare inputs
x, y = 50, 0
img = np.array(Image.open("img_large.jpg"))
img_overlay_rgba = np.array(Image.open("img_small.png"))
# Perform blending
alpha_mask = img_overlay_rgba[:, :, 3] / 255.0
img_result = img[:, :, :3].copy()
img_overlay = img_overlay_rgba[:, :, :3]
overlay_image_alpha(img_result, img_overlay, x, y, alpha_mask)
# Save result
Image.fromarray(img_result).save("img_result.jpg")
Result:
If you encounter errors or unusual outputs, please ensure:
img should not contain an alpha channel. (e.g. If it is RGBA, convert to RGB first.)
img_overlay has the same number of channels as img.
Based on fireant's excellent answer above, here is the alpha blending but a bit more human legible. You may need to swap 1.0-alpha and alpha depending on which direction you're merging (mine is swapped from fireant's answer).
o* == s_img.*
b* == b_img.*
for c in range(0,3):
alpha = s_img[oy:oy+height, ox:ox+width, 3] / 255.0
color = s_img[oy:oy+height, ox:ox+width, c] * (1.0-alpha)
beta = l_img[by:by+height, bx:bx+width, c] * (alpha)
l_img[by:by+height, bx:bx+width, c] = color + beta
Here it is:
def put4ChannelImageOn4ChannelImage(back, fore, x, y):
rows, cols, channels = fore.shape
trans_indices = fore[...,3] != 0 # Where not transparent
overlay_copy = back[y:y+rows, x:x+cols]
overlay_copy[trans_indices] = fore[trans_indices]
back[y:y+rows, x:x+cols] = overlay_copy
#test
background = np.zeros((1000, 1000, 4), np.uint8)
background[:] = (127, 127, 127, 1)
overlay = cv2.imread('imagee.png', cv2.IMREAD_UNCHANGED)
put4ChannelImageOn4ChannelImage(background, overlay, 5, 5)
A simple function that blits an image front onto an image back and returns the result. It works with both 3 and 4-channel images and deals with the alpha channel. Overlaps are handled as well.
The output image has the same size as back, but always 4 channels.
The output alpha channel is given by (u+v)/(1+uv) where u,v are the alpha channels of the front and back image and -1 <= u,v <= 1. Where there is no overlap with front, the alpha value from back is taken.
import cv2
def merge_image(back, front, x,y):
# convert to rgba
if back.shape[2] == 3:
back = cv2.cvtColor(back, cv2.COLOR_BGR2BGRA)
if front.shape[2] == 3:
front = cv2.cvtColor(front, cv2.COLOR_BGR2BGRA)
# crop the overlay from both images
bh,bw = back.shape[:2]
fh,fw = front.shape[:2]
x1, x2 = max(x, 0), min(x+fw, bw)
y1, y2 = max(y, 0), min(y+fh, bh)
front_cropped = front[y1-y:y2-y, x1-x:x2-x]
back_cropped = back[y1:y2, x1:x2]
alpha_front = front_cropped[:,:,3:4] / 255
alpha_back = back_cropped[:,:,3:4] / 255
# replace an area in result with overlay
result = back.copy()
print(f'af: {alpha_front.shape}\nab: {alpha_back.shape}\nfront_cropped: {front_cropped.shape}\nback_cropped: {back_cropped.shape}')
result[y1:y2, x1:x2, :3] = alpha_front * front_cropped[:,:,:3] + (1-alpha_front) * back_cropped[:,:,:3]
result[y1:y2, x1:x2, 3:4] = (alpha_front + alpha_back) / (1 + alpha_front*alpha_back) * 255
return result
For just add an alpha channel to s_img I just use cv2.addWeighted before the line
l_img[y_offset:y_offset+s_img.shape[0], x_offset:x_offset+s_img.shape[1]] = s_img
as following:
s_img=cv2.addWeighted(l_img[y_offset:y_offset+s_img.shape[0], x_offset:x_offset+s_img.shape[1]],0.5,s_img,0.5,0)
When attempting to write to the destination image using any of these answers above and you get the following error:
ValueError: assignment destination is read-only
A quick potential fix is to set the WRITEABLE flag to true.
img.setflags(write=1)
A simple 4on4 pasting function that works-
def paste(background,foreground,pos=(0,0)):
#get position and crop pasting area if needed
x = pos[0]
y = pos[1]
bgWidth = background.shape[0]
bgHeight = background.shape[1]
frWidth = foreground.shape[0]
frHeight = foreground.shape[1]
width = bgWidth-x
height = bgHeight-y
if frWidth<width:
width = frWidth
if frHeight<height:
height = frHeight
# normalize alpha channels from 0-255 to 0-1
alpha_background = background[x:x+width,y:y+height,3] / 255.0
alpha_foreground = foreground[:width,:height,3] / 255.0
# set adjusted colors
for color in range(0, 3):
fr = alpha_foreground * foreground[:width,:height,color]
bg = alpha_background * background[x:x+width,y:y+height,color] * (1 - alpha_foreground)
background[x:x+width,y:y+height,color] = fr+bg
# set adjusted alpha and denormalize back to 0-255
background[x:x+width,y:y+height,3] = (1 - (1 - alpha_foreground) * (1 - alpha_background)) * 255
return background
I reworked #fireant's concept to allow for optional alpha masks and allow any x or y, including values outside of the bounds of the image. It will crop to the bounds.
def overlay_image_alpha(img, img_overlay, x, y, alpha_mask=None):
"""Overlay `img_overlay` onto `img` at (x, y) and blend using optional `alpha_mask`.
`alpha_mask` must have same HxW as `img_overlay` and values in range [0, 1].
"""
if y < 0 or y + img_overlay.shape[0] > img.shape[0] or x < 0 or x + img_overlay.shape[1] > img.shape[1]:
y_origin = 0 if y > 0 else -y
y_end = img_overlay.shape[0] if y < 0 else min(img.shape[0] - y, img_overlay.shape[0])
x_origin = 0 if x > 0 else -x
x_end = img_overlay.shape[1] if x < 0 else min(img.shape[1] - x, img_overlay.shape[1])
img_overlay_crop = img_overlay[y_origin:y_end, x_origin:x_end]
alpha = alpha_mask[y_origin:y_end, x_origin:x_end] if alpha_mask is not None else None
else:
img_overlay_crop = img_overlay
alpha = alpha_mask
y1 = max(y, 0)
y2 = min(img.shape[0], y1 + img_overlay_crop.shape[0])
x1 = max(x, 0)
x2 = min(img.shape[1], x1 + img_overlay_crop.shape[1])
img_crop = img[y1:y2, x1:x2]
img_crop[:] = alpha * img_overlay_crop + (1.0 - alpha) * img_crop if alpha is not None else img_overlay_crop

Categories