Related
I have a bunch of face image dataset (taken from http://vision.ucsd.edu/content/yale-face-database ) that I basically want to turn into a gif of the scramble suit from the movie scanner darkly ( http://2.bp.blogspot.com/-tRLWSOqh84Y/VSb_cF7sOoI/AAAAAAAAAWI/3XqT6d_exso/s1600/scramble%2Bsuit%2B2.gif ).
So far, I am able to take the images and cut them into face "pieces" in python in bulk.
The next step I am unable to do is to "align" these faces so that all the pieces form a face when they are merged or put back together.
Im also unsure how to merge or put them back together.
Once i have a bunch of images of randomly pieced together images, i am able to create the gif myself.
here is the code i have so far of taking the images, converting them to jpg and cutting them into necessary pieces (which was taken from here https://leslietj.github.io/2020/06/30/Automatic-Face-Crop-Using-Dlib/ ):
import sys
import dlib
from skimage import io
import numpy as np
import cv2
import matplotlib.pylab as plt
import math
from PIL import Image
import os
def arc_points(point1, point2, num_of_points):
points = []
center_x = (point1[0] + point2[0])/2
center_y = (point1[1] + point2[1])/2
radius = abs((point1[0] - point2[0])/2)
for i in range(num_of_points):
if i == 0:
continue
point = []
x = center_x + radius * math.cos(math.pi + i * math.pi / num_of_points)
y = center_y + radius * math.sin(math.pi + i * math.pi / num_of_points)
point.append(x)
point.append(y)
points.append(point)
return points
def get_landmarks(img,mode=1):
dets = detector(img, 1)
landmarks = np.zeros((34, 2))
for k, d in enumerate(dets):
shape = predictor(img, d)
#quarter face (#1)
if mode == 1:
landmarks[0]= (shape.part(0).x, shape.part(0).y)
landmarks[1] = (shape.part(1).x, shape.part(1).y)
landmarks[2] = (shape.part(2).x, shape.part(2).y)
landmarks[3] = (shape.part(30).x, shape.part(30).y)
landmarks[4] = (shape.part(29).x, shape.part(29).y)
landmarks[5] = (shape.part(28).x, shape.part(28).y)
point1 = [shape.part(0).x, shape.part(0).y]
point2 = [shape.part(28).x, shape.part(28).y]
points = arc_points(point1, point2, 29)
for i in range(len(points)):
landmarks[33 - i] = (points[i][0], points[i][1])
#half face (#2)
if mode == 2:
landmarks[0] = (shape.part(0).x, shape.part(0).y)
landmarks[1] = (shape.part(1).x, shape.part(1).y)
landmarks[2] = (shape.part(2).x, shape.part(2).y)
landmarks[3] = (shape.part(14).x, shape.part(14).y)
landmarks[4] = (shape.part(15).x, shape.part(15).y)
landmarks[5] = (shape.part(16).x, shape.part(16).y)
point1 = [shape.part(0).x, shape.part(0).y]
point2 = [shape.part(16).x, shape.part(16).y]
points = arc_points(point1, point2, 29)
#print(points)
for i in range(len(points)):
#print(33-i)
landmarks[33 - i] = (points[i][0], points[i][1])
if mode == 3:
#3/4 face (#3)
for i in range(9):
landmarks[i] = (shape.part(i).x, shape.part(i).y)
landmarks[9] = (shape.part(31).x, shape.part(31).y)
landmarks[10] = (shape.part(14).x, shape.part(14).y)
landmarks[11] = (shape.part(15).x, shape.part(15).y)
landmarks[12] = (shape.part(16).x, shape.part(16).y)
point1 = [shape.part(0).x, shape.part(0).y]
point2 = [shape.part(16).x, shape.part(16).y]
points = arc_points(point1, point2, 22)
for i in range(len(points)):
landmarks[33 - i] = (points[i][0], points[i][1])
#full face (#4)
if mode == 4:
for i in range(17):
landmarks[i] = (shape.part(i).x, shape.part(i).y)
point1 = [shape.part(0).x, shape.part(0).y]
point2 = [shape.part(16).x, shape.part(16).y]
points = arc_points(point1, point2, 18)
for i in range(len(points)):
landmarks[33 - i] = (points[i][0], points[i][1])
return landmarks
def inside(X,Y,Region):
j=len(Region)-1
flag=False
for i in range(len(Region)):
if (Region[i][1]<Y and Region[j][1]>=Y or Region[j][1]<Y and Region[i][1]>=Y):
if (Region[i][0] + (Y - Region[i][1]) / (Region[j][1] - Region[i][1]) * (Region[j][0] - Region[i][0]) < X):
flag =not flag
j=i
return flag
count=0
files = os.listdir('yalefaces')
for filename in files:
if filename.endswith('glasses') or filename.endswith('happy') or filename.endswith('noglasses') or filename.endswith('normal'):
path = os.path.join('yalefaces',filename)
# importing the image
im = Image.open(path)
# converting to jpg
rgb_im = im.convert("RGB")
# exporting the image
rgb_im.save('temp.jpg')
count+=1
path = 'temp.jpg'
for im in range(1,5):
#path = 'subject01.jpg'
detector = dlib.get_frontal_face_detector()
# the .dat file can be downloaded following this link:
# https://sourceforge.net/projects/dclib/files/dlib/v18.10/shape_predictor_68_face_landmarks.dat.bz2/download
predictor = dlib.shape_predictor('shape_predictor_68_face_landmarks.dat')
img = io.imread(path)
region = get_landmarks(img,mode=im)
shape = list(img.shape)
cropped_img = img.copy()
for i in range(shape[0]):
for j in range(shape[1]):
if not inside(j, i, region):
#print(img[0])
cropped_img[i, j] = (img[0,0][0], img[0,0][1], img[0,0][2]) # the RGB values of the background
cropped_img = cv2.cvtColor(cropped_img, cv2.COLOR_BGR2GRAY)
cv2.imwrite(str(count).zfill(3)+'-'+str(im).zfill(2)+'.jpg', cropped_img)
since this is several steps of pre-processing:
align faces using this script https://pyimagesearch.com/2017/05/22/face-alignment-with-opencv-and-python/
cut up the faces and put them together. as mentioned before, i was doing a cv2.add which is not what i want. i actually want the layers to stack up and if there is anything underneath, ignore it. cv2.add doesnt do this so i had to make my own. so if i had two layers and i want to stack them up so that the first layer on top is the one that takes priority and the bottom layer is ignored if there is anything above it.
def reduction(layer1,layer2):
for i in range(0,layer1.shape[0]):
for j in range(0,layer1.shape[1]):
pixel1 = layer1.item(i, j)
pixel2 = layer2.item(i, j)
if layer2[i,j] != 255:
layer1[i,j]=255
return layer1
layer2 = reduction(layer2,layer1)
for i in range(0,layer1.shape[0]):
for j in range(0,layer1.shape[1]):
pixel = layer2[i,j]
if pixel != 255:
layer1[i,j]=layer2[i,j]
and thats it. i didnt realize images are just numpy arrays so i can just manipulate the arrays directly.
In order to better understand how image manipulation works, I've decided to create my own image rotation algorithm rather than using cv2.rotate() However, I'm encountering a weird picture cropping and pixel misplacement issue.
I think it may have something to do with my padding, but there may be other errors
import cv2
import math
import numpy as np
# Load & Show original image
img = cv2.imread('Lena.png', 0)
cv2.imshow('Original', img)
# Variable declarations
h = img.shape[0] # Also known as rows
w = img.shape[1] # Also known as columns
cX = h / 2 #Image Center X
cY = w / 2 #Image Center Y
theta = math.radians(100) #Change to adjust rotation angle
imgArray = np.array((img))
imgArray = np.pad(imgArray,pad_width=((100,100),(100,100)),mode='constant',constant_values=0)
#Add padding in an attempt to prevent image cropping
# loop pixel by pixel in image
for x in range(h + 1):
for y in range(w + 1):
try:
TX = int((x-cX)*math.cos(theta)+(y-cY)*math.sin(theta)+cX) #Rotation formula
TY = int(-(x-cX)*math.sin(theta)+(y-cY)*math.cos(theta)+cY) #Rotation formula
imgArray[x,y] = img[TX,TY]
except IndexError as error:
print(error)
cv2.imshow('Rotated', imgArray)
cv2.waitKey(0)
Edit:
I think the misplaced image position may have something to do with lack of proper origin point, however I cannot seem to find a functioning solution to that problem.
Though I didn't dive in the math part of the domain, but based on the given information I think the matrix rotating formula should work like this:
UPDATE:
As I promised I dived a bit into the domain and got to the solution you can see as follows. The main trick that I've swapped the source and destination indices in the looping too, so the rounding doesn't mean any problem ever:
import cv2
import math
import numpy as np
# Load & Show original image
img = cv2.imread('/home/george/Downloads/lena.png', 0)
cv2.imshow('Original', img)
# Variable declarations
h = img.shape[0] # Also known as rows
w = img.shape[1] # Also known as columns
p = 120
h += 2 * p
w += 2 * p
cX = h / 2 #Image Center X
cY = h / 2 #Image Center Y
theta = math.radians(45) #Change to adjust rotation angle
imgArray = np.zeros_like((img))
#Add padding in an attempt to prevent image cropping
imgArray = np.pad(imgArray, pad_width=p, mode='constant', constant_values=0)
img = np.pad(img, pad_width=p, mode='constant', constant_values=0)
# loop pixel by pixel in image
for TX in range(h + 1):
for TY in range(w + 1):
try:
x = int( +(TX - cX) * math.cos(theta) + (TY - cY) * math.sin(theta) + cX) #Rotation formula
y = int( -(TX - cX) * math.sin(theta) + (TY - cY) * math.cos(theta) + cY) #Rotation formula
imgArray[TX, TY] = img[x, y]
except IndexError as error:
pass
# print(error)
cv2.imshow('Rotated', imgArray)
cv2.waitKey(0)
exit()
Note: See usr2564301 comment too, if you want to dive deeper in the domain.
I am trying to implement an algorithm in python to scale images by a factor or rotate them by a given angle (or both at the same time). I am using opencv to handle the images and I know opencv has these functions built in, however I want to do this myself to better understand image transformations. I believe I calculate the rotation matrix correctly. However, when I try to implement the affine transformation, it does not come out correctly.
import numpy as np
import cv2
import math as m
import sys
img = cv2.imread(sys.argv[1])
angle = sys.argv[2]
#get rotation matrix
def getRMat((cx, cy), angle, scale):
a = scale*m.cos(angle*np.pi/180)
b = scale*(m.sin(angle*np.pi/180))
u = (1-a)*cx-b*cy
v = b*cx+(1-a)*cy
return np.array([[a,b,u], [-b,a,v]])
#determine shape of img
h, w = img.shape[:2]
#print h, w
#determine center of image
cx, cy = (w / 2, h / 2)
#calculate rotation matrix
#then grab sine and cosine of the matrix
mat = getRMat((cx,cy), -int(angle), 1)
print mat
cos = np.abs(mat[0,0])
sin = np.abs(mat[0,1])
#calculate new height and width to account for rotation
newWidth = int((h * sin) + (w * cos))
newHeight = int((h * cos) + (w * sin))
#print newWidth, newHeight
mat[0,2] += (newWidth / 2) - cx
mat[1,2] += (newHeight / 2) - cy
#this is how the image SHOULD look
dst = cv2.warpAffine(img, mat, (newWidth, newHeight))
cv2.imshow('dst', dst)
cv2.waitKey(0)
cv2.destroyAllWindows()
#apply transform
#attempt at my own warp affine function...still buggy tho
def warpAff(image, matrix, (width, height)):
dst = np.zeros((width, height, 3), dtype=np.uint8)
oldh, oldw = image.shape[:2]
#print oldh, oldw
#loop through old img and transform its coords
for x in range(oldh):
for y in range(oldw):
#print y, x
#transform the coordinates
u = int(x*matrix[0,0]+y*matrix[0,1]+matrix[0,2])
v = int(x*matrix[1,0]+y*matrix[1,1]+matrix[1,2])
#print u, v
#v -= width / 1.5
if (u >= 0 and u < height) and (v >= 0 and v < width):
dst[u,v] = image[x,y]
return dst
dst = warpAff(img, mat, (newWidth, newHeight))
cv2.imshow('dst', dst)
cv2.waitKey(0)
cv2.destroyAllWindows()
Image I am using for testing
You're applying the rotation backward.
This means that for an angle of 20, instead of rotating 20 degrees clockwise, you rotate 20 degrees counterclockwise. That on its own would be easy to fix—just negate the angle.
But it also means that, for each destination pixel, if no source pixel exactly rotates to it, you end up with an all-black pixel. You could solve that by using any interpolation algorithm, but it's making things more complicated.
If we instead just reverse the process, and instead of calculating the destination (u, v) for each (x, y), we calculate the source (x, y) for every destination (u, v), that solves both problems:
def warpAff(image, matrix, width, height):
dst = np.zeros((width, height, 3), dtype=np.uint8)
oldh, oldw = image.shape[:2]
# Loop over the destination, not the source, to ensure that you cover
# every destination pixel exactly 1 time, rather than 0-4 times.
for u in range(width):
for v in range(height):
x = u*matrix[0,0]+v*matrix[0,1]+matrix[0,2]
y = u*matrix[1,0]+v*matrix[1,1]+matrix[1,2]
intx, inty = int(x), int(y)
# We could interpolate here by using something like this linear
# interpolation matrix, but let's keep it simple and not do that.
# fracx, fracy = x%1, y%1
# interp = np.array([[fracx*fracy, (1-fracx)*fracy],
# [fracx*(1-fracy), (1-fracx)*(1-fracy)]])
if 0 < x < oldw and 0 < y < oldh:
dst[u, v] = image[intx, inty]
return dst
Now the only remaining problem is that you didn't apply the shift backward, so we end up shifting the image in the wrong direction when we turn everything else around. That's trivial to fix:
mat[0,2] += cx - (newWidth / 2)
mat[1,2] += cy - (newHeight / 2)
You do have one more problem: your code (and this updated code) only works for square images. You're getting height and width backward multiple times, and they almost all cancel out, but apparently one of them doesn't. In general, you're treating your arrays as (width, height) rather than (height, width), but you end up comparing to (original version) or looping over (new version) (height, width). So, if height and width are different, you end up trying to write past the end of the array.
Trying to find all of these and fix them is probably as much work as just starting over and doing it consistently everywhere from the start:
mat = getRMat(cx, cy, int(angle), 1)
cos = np.abs(mat[0,0])
sin = np.abs(mat[0,1])
newWidth = int((h * sin) + (w * cos))
newHeight = int((h * cos) + (w * sin))
mat[0,2] += cx - (newWidth / 2)
mat[1,2] += cy - (newHeight / 2)
def warpAff2(image, matrix, width, height):
dst = np.zeros((height, width, 3), dtype=np.uint8)
oldh, oldw = image.shape[:2]
for u in range(width):
for v in range(height):
x = u*matrix[0,0]+v*matrix[0,1]+matrix[0,2]
y = u*matrix[1,0]+v*matrix[1,1]+matrix[1,2]
intx, inty = int(x), int(y)
if 0 < intx < oldw and 0 < inty < oldh:
pix = image[inty, intx]
dst[v, u] = pix
return dst
dst = warpAff2(img, mat, newWidth, newHeight)
It's worth noting that there are much simpler (and more efficient) ways to implement this. If you build a 3x3 square matrix, you can vectorize the multiplication. Also, you can create the matrix more simply by just multiplying a shift matrix # a rotation matrix # an unshift matrix instead of manually fixing things up after the fact. But hopefully this version, since it's as close as possible to your original, should be easiest to understand.
So apologies for the length of code here. Basically I have used opencv to analyse an image of 7 shapes and read 4 features from it.
The problem is that the code is only giving me out arrays for 5 shapes and I'm unsure why. I have left out the imports etc at the start to shorten the code.
img = cv2.imread("C:\\Users\\telli\\Desktop\\Shapetest.jpg")
#print(img)
#Converting the image to Grayscale
grey = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
ret,thresh = cv2.threshold(grey,127,255,1)
im2,contours, h = cv2.findContours(thresh, 1, cv2.CHAIN_APPROX_SIMPLE)
contours.sort(key = len)
numberOfSides = []
corners = []
standardDeviationsPerimeter = []
sidesDividedByPerimeter = []
standardDeviationsAngles = []
largestAngles = []
angles = []
perimeters = []
featureVectors = []
#finding all the possible circles in the image (likely many false positives)
circles = cv2.HoughCircles(grey,cv2.HOUGH_GRADIENT,1,20,
param1=50,param2=30,minRadius=0,maxRadius=0)
#finding sides
count = 0;
#print("New perimeter")
for contour in contours:
#remove double recognitions due to thick lines
if count %2 == 0:
epsilon = 10
x, y, w, h = cv2.boundingRect(contour)
x-= epsilon
y-= epsilon
w+= 2 *epsilon
h+= 2 * epsilon
insideCircle = False
#Removing false positive circles by testing each circle against
#the bounding box of this contour
#if we find a circle that is COMPLETELY inside the bounding box
#we have found a circle
for possibleCircle in range (0, len(circles[0])):
centreX = circles[0][possibleCircle][0]
centreY = circles[0][possibleCircle][1]
radius = circles[0][possibleCircle][2]
ToRight = centreX - radius >= x
ToLeft = centreX + radius <= x + w
ToBottom = centreY - radius >= y
ToTop = centreY + radius <= y + h
insideCircle = insideCircle or ToRight and ToLeft and ToBottom and ToTop
#Finding the perimeter of the shapes
perim = cv2.arcLength(contour, True)
perimeters.append(perim)
#if we found that this contour is a circle then the number of sides is 1
if insideCircle:
numberOfSides.append(1)
#use the number of corners found in the contours to determine how many
#sides it has
else:
corner = cv2.approxPolyDP(contour, 0.01 * perim, True)
corners.append(corner)
numberOfSides.append(len(corner))
#print(numberOfSides)
count = count + 1
#finding angles in shape
for shape in range(0, len(corners)):
angles.append([])
sidesDividedByPerimeter.append([])
for corner in range(0, len(corners[shape])):
# 3 vertices we need to find the angle at vertice b
ax = corners[shape][corner % len(corners[shape])][0][0]
ay = corners[shape][corner % len(corners[shape])][0][1]
bx = corners[shape][(corner + 1) % len(corners[shape])][0][0]
by = corners[shape][(corner + 1) % len(corners[shape])][0][1]
cx = corners[shape][(corner + 2) % len(corners[shape])][0][0]
cy = corners[shape][(corner + 2) % len(corners[shape])][0][1]
#print ("A: ", ax, ", ", ay, "\tB: ", bx, ", ", by, "\tC: ", cx, ", ", cy)
dirBAx = ax - bx
dirBAy = ay - by
dirBCx = cx - bx
dirBCy = cy - by
#do dot product and find angle in degrees
dot = dirBAx * dirBCx + dirBAy * dirBCy
lengthBC = math.sqrt(dirBCx * dirBCx + dirBCy * dirBCy)
lengthBA = math.sqrt(dirBAx * dirBAx + dirBAy * dirBAy)
angle = math.acos(dot / (lengthBC * lengthBA))
angle = angle * 180 / math.pi
angles[shape].append(angle)
sidesDividedByPerimeter[shape].append(lengthBC / perimeters[shape])
#print(lengthBC / perimeters[shape])
#print(angle)
#finding max of all angles in each shape
for shape in range(0, len(angles)):
largestAngles.append(np.amax(angles[shape]))
#print(largestAngles)
#if len(approx) == 16:
# cv2.drawContours(img, [contours[0]], 0, (0,0,255), -1)
#Calculating the standard deviation of the sides divided by the perimeter
#print("Standard Devs")
for shape in range(0, len(sidesDividedByPerimeter)):
standarddevPerim = statistics.stdev(sidesDividedByPerimeter[shape])
#Caluclating the standard deviation of the angles of each shape
standarddevAngle = statistics.stdev(angles[shape])
standardDeviationsPerimeter.append(standarddevPerim)
standardDeviationsAngles.append(standarddevAngle)
for shape in range(0, len(sidesDividedByPerimeter)):
featureVectors.append([])
featureVectors[shape].append(numberOfSides[shape])
featureVectors[shape].append(standardDeviationsPerimeter[shape])
featureVectors[shape].append(standardDeviationsAngles[shape])
featureVectors[shape].append(largestAngles[shape])
print(featureVectors)
And featureVector prints out this:
[[4, 0.001743713493735165, 0.6497055601752815, 90.795723552739275],
[4, 0.0460937435599832, 0.19764217920409227, 90.204147248752378],
[1, 0.001185534503063044, 0.3034913722821194, 60.348908179729023],
[1, 0.015455289770298222, 0.8380914254332884, 109.02120657826231],
[3, 0.0169961646358455, 41.36919146079211, 136.83829993466398]]
However there should be 7 shapes.
What i cant figure out is where to append blank values for the 2nd/3rd/4th feature for a circle and allow the program to continue running. It currently appears to be giving the 2nd/3rd/4th value from the next two shapes to the circles.
Hi I am creating a program that replaces a face in a image with someone else's face. However, I am stuck on trying to insert the new face into the original, larger image. I have researched ROI and addWeight(needs the images to be the same size) but I haven't found a way to do this in python. Any advise is great. I am new to opencv.
I am using the following test images:
smaller_image:
larger_image:
Here is my Code so far... a mixer of other samples:
import cv2
import cv2.cv as cv
import sys
import numpy
def detect(img, cascade):
rects = cascade.detectMultiScale(img, scaleFactor=1.1, minNeighbors=3, minSize=(10, 10), flags = cv.CV_HAAR_SCALE_IMAGE)
if len(rects) == 0:
return []
rects[:,2:] += rects[:,:2]
return rects
def draw_rects(img, rects, color):
for x1, y1, x2, y2 in rects:
cv2.rectangle(img, (x1, y1), (x2, y2), color, 2)
if __name__ == '__main__':
if len(sys.argv) != 2: ## Check for error in usage syntax
print "Usage : python faces.py <image_file>"
else:
img = cv2.imread(sys.argv[1],cv2.CV_LOAD_IMAGE_COLOR) ## Read image file
if (img == None):
print "Could not open or find the image"
else:
cascade = cv2.CascadeClassifier("haarcascade_frontalface_alt.xml")
gray = cv2.cvtColor(img, cv.CV_BGR2GRAY)
gray = cv2.equalizeHist(gray)
rects = detect(gray, cascade)
## Extract face coordinates
x1 = rects[0][3]
y1 = rects[0][0]
x2 = rects[0][4]
y2 = rects[0][5]
y=y2-y1
x=x2-x1
## Extract face ROI
faceROI = gray[x1:x2, y1:y2]
## Show face ROI
cv2.imshow('Display face ROI', faceROI)
small = cv2.imread("average_face.png",cv2.CV_LOAD_IMAGE_COLOR)
print "here"
small=cv2.resize(small, (x, y))
cv2.namedWindow('Display image') ## create window for display
cv2.imshow('Display image', small) ## Show image in the window
print "size of image: ", img.shape ## print size of image
cv2.waitKey(1000)
A simple way to achieve what you want:
import cv2
s_img = cv2.imread("smaller_image.png")
l_img = cv2.imread("larger_image.jpg")
x_offset=y_offset=50
l_img[y_offset:y_offset+s_img.shape[0], x_offset:x_offset+s_img.shape[1]] = s_img
Update
I suppose you want to take care of the alpha channel too. Here is a quick and dirty way of doing so:
s_img = cv2.imread("smaller_image.png", -1)
y1, y2 = y_offset, y_offset + s_img.shape[0]
x1, x2 = x_offset, x_offset + s_img.shape[1]
alpha_s = s_img[:, :, 3] / 255.0
alpha_l = 1.0 - alpha_s
for c in range(0, 3):
l_img[y1:y2, x1:x2, c] = (alpha_s * s_img[:, :, c] +
alpha_l * l_img[y1:y2, x1:x2, c])
Using #fireant's idea, I wrote up a function to handle overlays. This works well for any position argument (including negative positions).
def overlay_image_alpha(img, img_overlay, x, y, alpha_mask):
"""Overlay `img_overlay` onto `img` at (x, y) and blend using `alpha_mask`.
`alpha_mask` must have same HxW as `img_overlay` and values in range [0, 1].
"""
# Image ranges
y1, y2 = max(0, y), min(img.shape[0], y + img_overlay.shape[0])
x1, x2 = max(0, x), min(img.shape[1], x + img_overlay.shape[1])
# Overlay ranges
y1o, y2o = max(0, -y), min(img_overlay.shape[0], img.shape[0] - y)
x1o, x2o = max(0, -x), min(img_overlay.shape[1], img.shape[1] - x)
# Exit if nothing to do
if y1 >= y2 or x1 >= x2 or y1o >= y2o or x1o >= x2o:
return
# Blend overlay within the determined ranges
img_crop = img[y1:y2, x1:x2]
img_overlay_crop = img_overlay[y1o:y2o, x1o:x2o]
alpha = alpha_mask[y1o:y2o, x1o:x2o, np.newaxis]
alpha_inv = 1.0 - alpha
img_crop[:] = alpha * img_overlay_crop + alpha_inv * img_crop
Example usage:
import numpy as np
from PIL import Image
# Prepare inputs
x, y = 50, 0
img = np.array(Image.open("img_large.jpg"))
img_overlay_rgba = np.array(Image.open("img_small.png"))
# Perform blending
alpha_mask = img_overlay_rgba[:, :, 3] / 255.0
img_result = img[:, :, :3].copy()
img_overlay = img_overlay_rgba[:, :, :3]
overlay_image_alpha(img_result, img_overlay, x, y, alpha_mask)
# Save result
Image.fromarray(img_result).save("img_result.jpg")
Result:
If you encounter errors or unusual outputs, please ensure:
img should not contain an alpha channel. (e.g. If it is RGBA, convert to RGB first.)
img_overlay has the same number of channels as img.
Based on fireant's excellent answer above, here is the alpha blending but a bit more human legible. You may need to swap 1.0-alpha and alpha depending on which direction you're merging (mine is swapped from fireant's answer).
o* == s_img.*
b* == b_img.*
for c in range(0,3):
alpha = s_img[oy:oy+height, ox:ox+width, 3] / 255.0
color = s_img[oy:oy+height, ox:ox+width, c] * (1.0-alpha)
beta = l_img[by:by+height, bx:bx+width, c] * (alpha)
l_img[by:by+height, bx:bx+width, c] = color + beta
Here it is:
def put4ChannelImageOn4ChannelImage(back, fore, x, y):
rows, cols, channels = fore.shape
trans_indices = fore[...,3] != 0 # Where not transparent
overlay_copy = back[y:y+rows, x:x+cols]
overlay_copy[trans_indices] = fore[trans_indices]
back[y:y+rows, x:x+cols] = overlay_copy
#test
background = np.zeros((1000, 1000, 4), np.uint8)
background[:] = (127, 127, 127, 1)
overlay = cv2.imread('imagee.png', cv2.IMREAD_UNCHANGED)
put4ChannelImageOn4ChannelImage(background, overlay, 5, 5)
A simple function that blits an image front onto an image back and returns the result. It works with both 3 and 4-channel images and deals with the alpha channel. Overlaps are handled as well.
The output image has the same size as back, but always 4 channels.
The output alpha channel is given by (u+v)/(1+uv) where u,v are the alpha channels of the front and back image and -1 <= u,v <= 1. Where there is no overlap with front, the alpha value from back is taken.
import cv2
def merge_image(back, front, x,y):
# convert to rgba
if back.shape[2] == 3:
back = cv2.cvtColor(back, cv2.COLOR_BGR2BGRA)
if front.shape[2] == 3:
front = cv2.cvtColor(front, cv2.COLOR_BGR2BGRA)
# crop the overlay from both images
bh,bw = back.shape[:2]
fh,fw = front.shape[:2]
x1, x2 = max(x, 0), min(x+fw, bw)
y1, y2 = max(y, 0), min(y+fh, bh)
front_cropped = front[y1-y:y2-y, x1-x:x2-x]
back_cropped = back[y1:y2, x1:x2]
alpha_front = front_cropped[:,:,3:4] / 255
alpha_back = back_cropped[:,:,3:4] / 255
# replace an area in result with overlay
result = back.copy()
print(f'af: {alpha_front.shape}\nab: {alpha_back.shape}\nfront_cropped: {front_cropped.shape}\nback_cropped: {back_cropped.shape}')
result[y1:y2, x1:x2, :3] = alpha_front * front_cropped[:,:,:3] + (1-alpha_front) * back_cropped[:,:,:3]
result[y1:y2, x1:x2, 3:4] = (alpha_front + alpha_back) / (1 + alpha_front*alpha_back) * 255
return result
For just add an alpha channel to s_img I just use cv2.addWeighted before the line
l_img[y_offset:y_offset+s_img.shape[0], x_offset:x_offset+s_img.shape[1]] = s_img
as following:
s_img=cv2.addWeighted(l_img[y_offset:y_offset+s_img.shape[0], x_offset:x_offset+s_img.shape[1]],0.5,s_img,0.5,0)
When attempting to write to the destination image using any of these answers above and you get the following error:
ValueError: assignment destination is read-only
A quick potential fix is to set the WRITEABLE flag to true.
img.setflags(write=1)
A simple 4on4 pasting function that works-
def paste(background,foreground,pos=(0,0)):
#get position and crop pasting area if needed
x = pos[0]
y = pos[1]
bgWidth = background.shape[0]
bgHeight = background.shape[1]
frWidth = foreground.shape[0]
frHeight = foreground.shape[1]
width = bgWidth-x
height = bgHeight-y
if frWidth<width:
width = frWidth
if frHeight<height:
height = frHeight
# normalize alpha channels from 0-255 to 0-1
alpha_background = background[x:x+width,y:y+height,3] / 255.0
alpha_foreground = foreground[:width,:height,3] / 255.0
# set adjusted colors
for color in range(0, 3):
fr = alpha_foreground * foreground[:width,:height,color]
bg = alpha_background * background[x:x+width,y:y+height,color] * (1 - alpha_foreground)
background[x:x+width,y:y+height,color] = fr+bg
# set adjusted alpha and denormalize back to 0-255
background[x:x+width,y:y+height,3] = (1 - (1 - alpha_foreground) * (1 - alpha_background)) * 255
return background
I reworked #fireant's concept to allow for optional alpha masks and allow any x or y, including values outside of the bounds of the image. It will crop to the bounds.
def overlay_image_alpha(img, img_overlay, x, y, alpha_mask=None):
"""Overlay `img_overlay` onto `img` at (x, y) and blend using optional `alpha_mask`.
`alpha_mask` must have same HxW as `img_overlay` and values in range [0, 1].
"""
if y < 0 or y + img_overlay.shape[0] > img.shape[0] or x < 0 or x + img_overlay.shape[1] > img.shape[1]:
y_origin = 0 if y > 0 else -y
y_end = img_overlay.shape[0] if y < 0 else min(img.shape[0] - y, img_overlay.shape[0])
x_origin = 0 if x > 0 else -x
x_end = img_overlay.shape[1] if x < 0 else min(img.shape[1] - x, img_overlay.shape[1])
img_overlay_crop = img_overlay[y_origin:y_end, x_origin:x_end]
alpha = alpha_mask[y_origin:y_end, x_origin:x_end] if alpha_mask is not None else None
else:
img_overlay_crop = img_overlay
alpha = alpha_mask
y1 = max(y, 0)
y2 = min(img.shape[0], y1 + img_overlay_crop.shape[0])
x1 = max(x, 0)
x2 = min(img.shape[1], x1 + img_overlay_crop.shape[1])
img_crop = img[y1:y2, x1:x2]
img_crop[:] = alpha * img_overlay_crop + (1.0 - alpha) * img_crop if alpha is not None else img_overlay_crop