Color Space Mapping YCbCr to RGB - python

I am experimenting with JPEG compression using python. I load in a tiff image and store it as numpy uint8 RGB array. I was doing this for color mapping.
def rgb2ycbcr(im):
cbcr = np.empty_like(im)
r = im[:,:,0]
g = im[:,:,1]
b = im[:,:,2]
# Y
cbcr[:,:,0] = .299 * r + .587 * g + .114 * b
# Cb
cbcr[:,:,1] = 128 - .169 * r - .331 * g + .5 * b
# Cr
cbcr[:,:,2] = 128 + .5 * r - .419 * g - .081 * b
return np.uint8(cbcr)
def ycbcr2rgb(im):
rgb = np.empty_like(im)
y = im[:,:,0]
cb = im[:,:,1] - 128
cr = im[:,:,2] - 128
# R
rgb[:,:,0] = y + 1.402 * cr
# G
rgb[:,:,1] = y - .34414 * cb - .71414 * cr
# B
rgb[:,:,2] = y + 1.772 * cb
return np.uint8(rgb)
I did a simple RGB to YCbCr transformation followed with a inverse transformation.
img = rgb2ycbcr(img)
imshow(img)
img = ycbcr2rgb(img)
imshow(img)
I got these two output image as YCbCr and RGB output after the color space transformation.
It seems that something is wrong with my color conversion and I cannot figure out what is wrong. I was using the JPEG color space conversion provided by
Wikipedia. Thanks you for the help.

You have to do your intermediate calculations in floating point. The posterization should tip you off; you have a lot of "hot" (saturated) pixels.
def rgb2ycbcr(im):
xform = np.array([[.299, .587, .114], [-.1687, -.3313, .5], [.5, -.4187, -.0813]])
ycbcr = im.dot(xform.T)
ycbcr[:,:,[1,2]] += 128
return np.uint8(ycbcr)
def ycbcr2rgb(im):
xform = np.array([[1, 0, 1.402], [1, -0.34414, -.71414], [1, 1.772, 0]])
rgb = im.astype(np.float)
rgb[:,:,[1,2]] -= 128
rgb = rgb.dot(xform.T)
np.putmask(rgb, rgb > 255, 255)
np.putmask(rgb, rgb < 0, 0)
return np.uint8(rgb)

Related

Sepia filter in Python

I have a code to write and I need to create a sepia filter for my image. I came up with this but it is not the result that I need to have my image made with the color (159,85,30) that is not exactly the right sepia filter.
#FILTRE SÉPIA
from PIL import Image
Chateau = Image.open("Chateau.png")
Taille = Chateau.size
Chateau_Sepia = Image.new("RGB", (Taille))
for x in range (0, Taille[0]):
for y in range (0, Taille[1]):
Pixel = Chateau.getpixel((x, y))
R = Pixel[0]
G = Pixel[1]
B = Pixel[2]
taux_rouge = int(0.393 * R + 0.769 * G + 0.189 * B)
taux_vert = int(0.349 * R + 0.686 * G + 0.168 * B)
taux_bleu = int(0.272 * R + 0.534 * G + 0.131 * B)
if taux_rouge > 255:
taux_rouge = 255
if taux_vert > 255:
taux_vert = 255
if taux_bleu > 255:
taux_bleu = 255
Chateau_Sepia.putpixel((x, y), (taux_rouge, taux_vert, taux_bleu))
Chateau_Sepia.save("Chateau Sépia.png")
Chateau_Sepia.show()
for your problem try add the alpha channel and play with this value to get the tone you want.
Chateau_Sepia.putpixel((x, y), (taux_rouge, taux_vert, taux_bleu, 255))

Manipulate RGB values in image

I would like to apply a simple algebraic operation to the RBG values of an image, that I have loaded via PIL. My current version works, but is slow:
from PIL import Image
import numpy as np
file_name = '1'
im = Image.open('data/' + file_name + '.jpg').convert('RGB')
pixels = np.array(im)
s = pixels.shape
p = pixels.reshape((s[0] * s[1], s[2]))
def update(ratio=0.5):
p2 = np.array([[min(rgb[0] + rgb[0] * ratio, 1), max(rgb[1] - rgb[1] * ratio, 0), rgb[2]] for rgb in p])
img = Image.fromarray(np.uint8(p2.reshape(s)))
img.save('result/' + file_name + '_test.png')
return 0
update(0.5)
Has someone a more efficient idea?
Make use of NumPy's vectorized operations to get rid of the loop.
I modified your original approach to compare performance between the following, different solutions. Also, I added a PIL only approach using ImageMath, if you want to get rid of NumPy completely.
Furthermore, I assume, there is/was a bug:
p2 = np.array([[min(rgb[0] + rgb[0] * ratio, 1), max(rgb[1] - rgb[1] * ratio, 0), rgb[2]] for rgb in p])
You actually do NOT convert to float, so it should be 255 instead of 1 in the min call.
Here's, what I've done:
import numpy as np
from PIL import Image, ImageMath
import time
# Modified, original implementation; fixed most likely wrong compare value in min (255 instead of 1)
def update_1(ratio=0.5):
pixels = np.array(im)
s = pixels.shape
p = pixels.reshape((s[0] * s[1], s[2]))
p2 = np.array([[min(rgb[0] + rgb[0] * ratio, 255), max(rgb[1] - rgb[1] * ratio, 0), rgb[2]] for rgb in p])
img = Image.fromarray(np.uint8(p2.reshape(s)))
img.save('result_update_1.png')
return 0
# More efficient vectorized approach using NumPy
def update_2(ratio=0.5):
pixels = np.array(im)
pixels[:, :, 0] = np.minimum(pixels[:, :, 0] * (1 + ratio), 255)
pixels[:, :, 1] = np.maximum(pixels[:, :, 1] * (1 - ratio), 0)
img = Image.fromarray(pixels)
img.save('result_update_2.png')
return 0
# More efficient approach only using PIL
def update_3(ratio=0.5):
(r, g, b) = im.split()
r = ImageMath.eval('min(float(r) / 255 * (1 + ratio), 1) * 255', r=r, ratio=ratio).convert('L')
g = ImageMath.eval('max(float(g) / 255 * (1 - ratio), 0) * 255', g=g, ratio=ratio).convert('L')
Image.merge('RGB', (r, g, b)).save('result_update_3.png')
return 0
im = Image.open('path/to/your/image.png')
t1 = time.perf_counter()
update_1(0.5)
print(time.perf_counter() - t1)
t1 = time.perf_counter()
update_2(0.5)
print(time.perf_counter() - t1)
t1 = time.perf_counter()
update_3(0.5)
print(time.perf_counter() - t1)
The performance on a [400, 400] RGB image on my machine:
1.723889293 s # your approach
0.055316339 s # vectorized NumPy approach
0.062502050 s # PIL only approach
Hope that helps!

merge two images with alpha channel

I have two images, one with and other without alpha channel. Thus, image A and B has a shape of (x,y,4) and (x,y,3) respectively.
I want to merge both images in a single tensor using python, where B is the background and A is the upper image. The final image must have a shape of (x, y, 3). I tried if scikit-image or cv2 is capable of doing this, but I couldn't found any solution.
here is alpha blending in python
import numpy as np
import cv2
alpha = 0.4
img1 = cv2.imread('Desert.jpg')
img2 = cv2.imread('Penguins.jpg')
#r,c,z = img1.shape
out_img = np.zeros(img1.shape,dtype=img1.dtype)
out_img[:,:,:] = (alpha * img1[:,:,:]) + ((1-alpha) * img2[:,:,:])
'''
# if want to loop over the whole image
for y in range(r):
for x in range(c):
out_img[y,x,0] = (alpha * img1[y,x,0]) + ((1-alpha) * img2[y,x,0])
out_img[y,x,1] = (alpha * img1[y,x,1]) + ((1-alpha) * img2[y,x,1])
out_img[y,x,2] = (alpha * img1[y,x,2]) + ((1-alpha) * img2[y,x,2])
'''
cv2.imshow('Output',out_img)
cv2.waitKey(0)
The above solution works, however I have a more efficient one:
alpha = A[:,:,3]
A1 = A[:,:,:3]
C = np.multiply(A1, alpha.reshape(x,y,1)) + np.multiply(B, 1-alpha.reshape(x,y,1))

tensorflow: how to rotate an image for data augmentation?

In tensorflow, I would like to rotate an image from a random angle, for data augmentation. But I don't find this transformation in the tf.image module.
This can be done in tensorflow now:
tf.contrib.image.rotate(images, degrees * math.pi / 180, interpolation='BILINEAR')
Because I wanted to be able to rotate tensors I came up with the following piece of code, which rotates a [height, width, depth] tensor by a given angle:
def rotate_image_tensor(image, angle, mode='black'):
"""
Rotates a 3D tensor (HWD), which represents an image by given radian angle.
New image has the same size as the input image.
mode controls what happens to border pixels.
mode = 'black' results in black bars (value 0 in unknown areas)
mode = 'white' results in value 255 in unknown areas
mode = 'ones' results in value 1 in unknown areas
mode = 'repeat' keeps repeating the closest pixel known
"""
s = image.get_shape().as_list()
assert len(s) == 3, "Input needs to be 3D."
assert (mode == 'repeat') or (mode == 'black') or (mode == 'white') or (mode == 'ones'), "Unknown boundary mode."
image_center = [np.floor(x/2) for x in s]
# Coordinates of new image
coord1 = tf.range(s[0])
coord2 = tf.range(s[1])
# Create vectors of those coordinates in order to vectorize the image
coord1_vec = tf.tile(coord1, [s[1]])
coord2_vec_unordered = tf.tile(coord2, [s[0]])
coord2_vec_unordered = tf.reshape(coord2_vec_unordered, [s[0], s[1]])
coord2_vec = tf.reshape(tf.transpose(coord2_vec_unordered, [1, 0]), [-1])
# center coordinates since rotation center is supposed to be in the image center
coord1_vec_centered = coord1_vec - image_center[0]
coord2_vec_centered = coord2_vec - image_center[1]
coord_new_centered = tf.cast(tf.pack([coord1_vec_centered, coord2_vec_centered]), tf.float32)
# Perform backward transformation of the image coordinates
rot_mat_inv = tf.dynamic_stitch([[0], [1], [2], [3]], [tf.cos(angle), tf.sin(angle), -tf.sin(angle), tf.cos(angle)])
rot_mat_inv = tf.reshape(rot_mat_inv, shape=[2, 2])
coord_old_centered = tf.matmul(rot_mat_inv, coord_new_centered)
# Find nearest neighbor in old image
coord1_old_nn = tf.cast(tf.round(coord_old_centered[0, :] + image_center[0]), tf.int32)
coord2_old_nn = tf.cast(tf.round(coord_old_centered[1, :] + image_center[1]), tf.int32)
# Clip values to stay inside image coordinates
if mode == 'repeat':
coord_old1_clipped = tf.minimum(tf.maximum(coord1_old_nn, 0), s[0]-1)
coord_old2_clipped = tf.minimum(tf.maximum(coord2_old_nn, 0), s[1]-1)
else:
outside_ind1 = tf.logical_or(tf.greater(coord1_old_nn, s[0]-1), tf.less(coord1_old_nn, 0))
outside_ind2 = tf.logical_or(tf.greater(coord2_old_nn, s[1]-1), tf.less(coord2_old_nn, 0))
outside_ind = tf.logical_or(outside_ind1, outside_ind2)
coord_old1_clipped = tf.boolean_mask(coord1_old_nn, tf.logical_not(outside_ind))
coord_old2_clipped = tf.boolean_mask(coord2_old_nn, tf.logical_not(outside_ind))
coord1_vec = tf.boolean_mask(coord1_vec, tf.logical_not(outside_ind))
coord2_vec = tf.boolean_mask(coord2_vec, tf.logical_not(outside_ind))
coord_old_clipped = tf.cast(tf.transpose(tf.pack([coord_old1_clipped, coord_old2_clipped]), [1, 0]), tf.int32)
# Coordinates of the new image
coord_new = tf.transpose(tf.cast(tf.pack([coord1_vec, coord2_vec]), tf.int32), [1, 0])
image_channel_list = tf.split(2, s[2], image)
image_rotated_channel_list = list()
for image_channel in image_channel_list:
image_chan_new_values = tf.gather_nd(tf.squeeze(image_channel), coord_old_clipped)
if (mode == 'black') or (mode == 'repeat'):
background_color = 0
elif mode == 'ones':
background_color = 1
elif mode == 'white':
background_color = 255
image_rotated_channel_list.append(tf.sparse_to_dense(coord_new, [s[0], s[1]], image_chan_new_values,
background_color, validate_indices=False))
image_rotated = tf.transpose(tf.pack(image_rotated_channel_list), [1, 2, 0])
return image_rotated
for tensorflow 2.0:
import tensorflow_addons as tfa
tfa.image.transform_ops.rotate(image, radian)
Rotation and cropping in TensorFlow
I personally needed image rotation and cropping out black borders functions in TensorFlow as below.
And I could implement this function as below.
def _rotate_and_crop(image, output_height, output_width, rotation_degree, do_crop):
"""Rotate the given image with the given rotation degree and crop for the black edges if necessary
Args:
image: A `Tensor` representing an image of arbitrary size.
output_height: The height of the image after preprocessing.
output_width: The width of the image after preprocessing.
rotation_degree: The degree of rotation on the image.
do_crop: Do cropping if it is True.
Returns:
A rotated image.
"""
# Rotate the given image with the given rotation degree
if rotation_degree != 0:
image = tf.contrib.image.rotate(image, math.radians(rotation_degree), interpolation='BILINEAR')
# Center crop to ommit black noise on the edges
if do_crop == True:
lrr_width, lrr_height = _largest_rotated_rect(output_height, output_width, math.radians(rotation_degree))
resized_image = tf.image.central_crop(image, float(lrr_height)/output_height)
image = tf.image.resize_images(resized_image, [output_height, output_width], method=tf.image.ResizeMethod.BILINEAR, align_corners=False)
return image
def _largest_rotated_rect(w, h, angle):
"""
Given a rectangle of size wxh that has been rotated by 'angle' (in
radians), computes the width and height of the largest possible
axis-aligned rectangle within the rotated rectangle.
Original JS code by 'Andri' and Magnus Hoff from Stack Overflow
Converted to Python by Aaron Snoswell
Source: http://stackoverflow.com/questions/16702966/rotate-image-and-crop-out-black-borders
"""
quadrant = int(math.floor(angle / (math.pi / 2))) & 3
sign_alpha = angle if ((quadrant & 1) == 0) else math.pi - angle
alpha = (sign_alpha % math.pi + math.pi) % math.pi
bb_w = w * math.cos(alpha) + h * math.sin(alpha)
bb_h = w * math.sin(alpha) + h * math.cos(alpha)
gamma = math.atan2(bb_w, bb_w) if (w < h) else math.atan2(bb_w, bb_w)
delta = math.pi - alpha - gamma
length = h if (w < h) else w
d = length * math.cos(alpha)
a = d * math.sin(alpha) / math.sin(delta)
y = a * math.cos(gamma)
x = y * math.tan(gamma)
return (
bb_w - 2 * x,
bb_h - 2 * y
)
If you need further implementation of example and visualization in TensorFlow, you can use this repository.
I hope this could be helpful to other people.
Update: see #astromme's answer below. Tensorflow now supports rotating images natively.
What you can do while there is no native method in tensorflow is something like this:
from PIL import Image
sess = tf.InteractiveSession()
# Pass image tensor object to a PIL image
image = Image.fromarray(image.eval())
# Use PIL or other library of the sort to rotate
rotated = Image.Image.rotate(image, degrees)
# Convert rotated image back to tensor
rotated_tensor = tf.convert_to_tensor(np.array(rotated))
tf.contrib is not available in tensorflow 2.
For tensorflow >= 2.* the following can be used:
tf.keras.preprocessing.image.random_rotation(x, rg, row_axis=1,col_axis=2, channel_axis=0,fill_mode='nearest', cval=0., interpolation_order=1);
you can find the documantation here:
https://www.tensorflow.org/api_docs/python/tf/keras/preprocessing/image/random_rotation
Here's the #zimmermc answer updated to Tensorflow v0.12
Changes:
pack() is now stack()
order of split parameters reversed
def rotate_image_tensor(image, angle, mode='white'):
"""
Rotates a 3D tensor (HWD), which represents an image by given radian angle.
New image has the same size as the input image.
mode controls what happens to border pixels.
mode = 'black' results in black bars (value 0 in unknown areas)
mode = 'white' results in value 255 in unknown areas
mode = 'ones' results in value 1 in unknown areas
mode = 'repeat' keeps repeating the closest pixel known
"""
s = image.get_shape().as_list()
assert len(s) == 3, "Input needs to be 3D."
assert (mode == 'repeat') or (mode == 'black') or (mode == 'white') or (mode == 'ones'), "Unknown boundary mode."
image_center = [np.floor(x/2) for x in s]
# Coordinates of new image
coord1 = tf.range(s[0])
coord2 = tf.range(s[1])
# Create vectors of those coordinates in order to vectorize the image
coord1_vec = tf.tile(coord1, [s[1]])
coord2_vec_unordered = tf.tile(coord2, [s[0]])
coord2_vec_unordered = tf.reshape(coord2_vec_unordered, [s[0], s[1]])
coord2_vec = tf.reshape(tf.transpose(coord2_vec_unordered, [1, 0]), [-1])
# center coordinates since rotation center is supposed to be in the image center
coord1_vec_centered = coord1_vec - image_center[0]
coord2_vec_centered = coord2_vec - image_center[1]
coord_new_centered = tf.cast(tf.stack([coord1_vec_centered, coord2_vec_centered]), tf.float32)
# Perform backward transformation of the image coordinates
rot_mat_inv = tf.dynamic_stitch([[0], [1], [2], [3]], [tf.cos(angle), tf.sin(angle), -tf.sin(angle), tf.cos(angle)])
rot_mat_inv = tf.reshape(rot_mat_inv, shape=[2, 2])
coord_old_centered = tf.matmul(rot_mat_inv, coord_new_centered)
# Find nearest neighbor in old image
coord1_old_nn = tf.cast(tf.round(coord_old_centered[0, :] + image_center[0]), tf.int32)
coord2_old_nn = tf.cast(tf.round(coord_old_centered[1, :] + image_center[1]), tf.int32)
# Clip values to stay inside image coordinates
if mode == 'repeat':
coord_old1_clipped = tf.minimum(tf.maximum(coord1_old_nn, 0), s[0]-1)
coord_old2_clipped = tf.minimum(tf.maximum(coord2_old_nn, 0), s[1]-1)
else:
outside_ind1 = tf.logical_or(tf.greater(coord1_old_nn, s[0]-1), tf.less(coord1_old_nn, 0))
outside_ind2 = tf.logical_or(tf.greater(coord2_old_nn, s[1]-1), tf.less(coord2_old_nn, 0))
outside_ind = tf.logical_or(outside_ind1, outside_ind2)
coord_old1_clipped = tf.boolean_mask(coord1_old_nn, tf.logical_not(outside_ind))
coord_old2_clipped = tf.boolean_mask(coord2_old_nn, tf.logical_not(outside_ind))
coord1_vec = tf.boolean_mask(coord1_vec, tf.logical_not(outside_ind))
coord2_vec = tf.boolean_mask(coord2_vec, tf.logical_not(outside_ind))
coord_old_clipped = tf.cast(tf.transpose(tf.stack([coord_old1_clipped, coord_old2_clipped]), [1, 0]), tf.int32)
# Coordinates of the new image
coord_new = tf.transpose(tf.cast(tf.stack([coord1_vec, coord2_vec]), tf.int32), [1, 0])
image_channel_list = tf.split(image, s[2], 2)
image_rotated_channel_list = list()
for image_channel in image_channel_list:
image_chan_new_values = tf.gather_nd(tf.squeeze(image_channel), coord_old_clipped)
if (mode == 'black') or (mode == 'repeat'):
background_color = 0
elif mode == 'ones':
background_color = 1
elif mode == 'white':
background_color = 255
image_rotated_channel_list.append(tf.sparse_to_dense(coord_new, [s[0], s[1]], image_chan_new_values,
background_color, validate_indices=False))
image_rotated = tf.transpose(tf.stack(image_rotated_channel_list), [1, 2, 0])
return image_rotated
For rotating an image or a batch of images counter-clockwise by multiples of 90 degrees, you can use tf.image.rot90(image,k=1,name=None).
k denotes the number of 90 degrees rotations you want to make.
In case of a single image, image is a 3-D Tensor of shape [height, width, channels] and in case of a batch of images, image is a 4-D Tensor of shape [batch, height, width, channels]

overlay a smaller image on a larger image python OpenCv

Hi I am creating a program that replaces a face in a image with someone else's face. However, I am stuck on trying to insert the new face into the original, larger image. I have researched ROI and addWeight(needs the images to be the same size) but I haven't found a way to do this in python. Any advise is great. I am new to opencv.
I am using the following test images:
smaller_image:
larger_image:
Here is my Code so far... a mixer of other samples:
import cv2
import cv2.cv as cv
import sys
import numpy
def detect(img, cascade):
rects = cascade.detectMultiScale(img, scaleFactor=1.1, minNeighbors=3, minSize=(10, 10), flags = cv.CV_HAAR_SCALE_IMAGE)
if len(rects) == 0:
return []
rects[:,2:] += rects[:,:2]
return rects
def draw_rects(img, rects, color):
for x1, y1, x2, y2 in rects:
cv2.rectangle(img, (x1, y1), (x2, y2), color, 2)
if __name__ == '__main__':
if len(sys.argv) != 2: ## Check for error in usage syntax
print "Usage : python faces.py <image_file>"
else:
img = cv2.imread(sys.argv[1],cv2.CV_LOAD_IMAGE_COLOR) ## Read image file
if (img == None):
print "Could not open or find the image"
else:
cascade = cv2.CascadeClassifier("haarcascade_frontalface_alt.xml")
gray = cv2.cvtColor(img, cv.CV_BGR2GRAY)
gray = cv2.equalizeHist(gray)
rects = detect(gray, cascade)
## Extract face coordinates
x1 = rects[0][3]
y1 = rects[0][0]
x2 = rects[0][4]
y2 = rects[0][5]
y=y2-y1
x=x2-x1
## Extract face ROI
faceROI = gray[x1:x2, y1:y2]
## Show face ROI
cv2.imshow('Display face ROI', faceROI)
small = cv2.imread("average_face.png",cv2.CV_LOAD_IMAGE_COLOR)
print "here"
small=cv2.resize(small, (x, y))
cv2.namedWindow('Display image') ## create window for display
cv2.imshow('Display image', small) ## Show image in the window
print "size of image: ", img.shape ## print size of image
cv2.waitKey(1000)
A simple way to achieve what you want:
import cv2
s_img = cv2.imread("smaller_image.png")
l_img = cv2.imread("larger_image.jpg")
x_offset=y_offset=50
l_img[y_offset:y_offset+s_img.shape[0], x_offset:x_offset+s_img.shape[1]] = s_img
Update
I suppose you want to take care of the alpha channel too. Here is a quick and dirty way of doing so:
s_img = cv2.imread("smaller_image.png", -1)
y1, y2 = y_offset, y_offset + s_img.shape[0]
x1, x2 = x_offset, x_offset + s_img.shape[1]
alpha_s = s_img[:, :, 3] / 255.0
alpha_l = 1.0 - alpha_s
for c in range(0, 3):
l_img[y1:y2, x1:x2, c] = (alpha_s * s_img[:, :, c] +
alpha_l * l_img[y1:y2, x1:x2, c])
Using #fireant's idea, I wrote up a function to handle overlays. This works well for any position argument (including negative positions).
def overlay_image_alpha(img, img_overlay, x, y, alpha_mask):
"""Overlay `img_overlay` onto `img` at (x, y) and blend using `alpha_mask`.
`alpha_mask` must have same HxW as `img_overlay` and values in range [0, 1].
"""
# Image ranges
y1, y2 = max(0, y), min(img.shape[0], y + img_overlay.shape[0])
x1, x2 = max(0, x), min(img.shape[1], x + img_overlay.shape[1])
# Overlay ranges
y1o, y2o = max(0, -y), min(img_overlay.shape[0], img.shape[0] - y)
x1o, x2o = max(0, -x), min(img_overlay.shape[1], img.shape[1] - x)
# Exit if nothing to do
if y1 >= y2 or x1 >= x2 or y1o >= y2o or x1o >= x2o:
return
# Blend overlay within the determined ranges
img_crop = img[y1:y2, x1:x2]
img_overlay_crop = img_overlay[y1o:y2o, x1o:x2o]
alpha = alpha_mask[y1o:y2o, x1o:x2o, np.newaxis]
alpha_inv = 1.0 - alpha
img_crop[:] = alpha * img_overlay_crop + alpha_inv * img_crop
Example usage:
import numpy as np
from PIL import Image
# Prepare inputs
x, y = 50, 0
img = np.array(Image.open("img_large.jpg"))
img_overlay_rgba = np.array(Image.open("img_small.png"))
# Perform blending
alpha_mask = img_overlay_rgba[:, :, 3] / 255.0
img_result = img[:, :, :3].copy()
img_overlay = img_overlay_rgba[:, :, :3]
overlay_image_alpha(img_result, img_overlay, x, y, alpha_mask)
# Save result
Image.fromarray(img_result).save("img_result.jpg")
Result:
If you encounter errors or unusual outputs, please ensure:
img should not contain an alpha channel. (e.g. If it is RGBA, convert to RGB first.)
img_overlay has the same number of channels as img.
Based on fireant's excellent answer above, here is the alpha blending but a bit more human legible. You may need to swap 1.0-alpha and alpha depending on which direction you're merging (mine is swapped from fireant's answer).
o* == s_img.*
b* == b_img.*
for c in range(0,3):
alpha = s_img[oy:oy+height, ox:ox+width, 3] / 255.0
color = s_img[oy:oy+height, ox:ox+width, c] * (1.0-alpha)
beta = l_img[by:by+height, bx:bx+width, c] * (alpha)
l_img[by:by+height, bx:bx+width, c] = color + beta
Here it is:
def put4ChannelImageOn4ChannelImage(back, fore, x, y):
rows, cols, channels = fore.shape
trans_indices = fore[...,3] != 0 # Where not transparent
overlay_copy = back[y:y+rows, x:x+cols]
overlay_copy[trans_indices] = fore[trans_indices]
back[y:y+rows, x:x+cols] = overlay_copy
#test
background = np.zeros((1000, 1000, 4), np.uint8)
background[:] = (127, 127, 127, 1)
overlay = cv2.imread('imagee.png', cv2.IMREAD_UNCHANGED)
put4ChannelImageOn4ChannelImage(background, overlay, 5, 5)
A simple function that blits an image front onto an image back and returns the result. It works with both 3 and 4-channel images and deals with the alpha channel. Overlaps are handled as well.
The output image has the same size as back, but always 4 channels.
The output alpha channel is given by (u+v)/(1+uv) where u,v are the alpha channels of the front and back image and -1 <= u,v <= 1. Where there is no overlap with front, the alpha value from back is taken.
import cv2
def merge_image(back, front, x,y):
# convert to rgba
if back.shape[2] == 3:
back = cv2.cvtColor(back, cv2.COLOR_BGR2BGRA)
if front.shape[2] == 3:
front = cv2.cvtColor(front, cv2.COLOR_BGR2BGRA)
# crop the overlay from both images
bh,bw = back.shape[:2]
fh,fw = front.shape[:2]
x1, x2 = max(x, 0), min(x+fw, bw)
y1, y2 = max(y, 0), min(y+fh, bh)
front_cropped = front[y1-y:y2-y, x1-x:x2-x]
back_cropped = back[y1:y2, x1:x2]
alpha_front = front_cropped[:,:,3:4] / 255
alpha_back = back_cropped[:,:,3:4] / 255
# replace an area in result with overlay
result = back.copy()
print(f'af: {alpha_front.shape}\nab: {alpha_back.shape}\nfront_cropped: {front_cropped.shape}\nback_cropped: {back_cropped.shape}')
result[y1:y2, x1:x2, :3] = alpha_front * front_cropped[:,:,:3] + (1-alpha_front) * back_cropped[:,:,:3]
result[y1:y2, x1:x2, 3:4] = (alpha_front + alpha_back) / (1 + alpha_front*alpha_back) * 255
return result
For just add an alpha channel to s_img I just use cv2.addWeighted before the line
l_img[y_offset:y_offset+s_img.shape[0], x_offset:x_offset+s_img.shape[1]] = s_img
as following:
s_img=cv2.addWeighted(l_img[y_offset:y_offset+s_img.shape[0], x_offset:x_offset+s_img.shape[1]],0.5,s_img,0.5,0)
When attempting to write to the destination image using any of these answers above and you get the following error:
ValueError: assignment destination is read-only
A quick potential fix is to set the WRITEABLE flag to true.
img.setflags(write=1)
A simple 4on4 pasting function that works-
def paste(background,foreground,pos=(0,0)):
#get position and crop pasting area if needed
x = pos[0]
y = pos[1]
bgWidth = background.shape[0]
bgHeight = background.shape[1]
frWidth = foreground.shape[0]
frHeight = foreground.shape[1]
width = bgWidth-x
height = bgHeight-y
if frWidth<width:
width = frWidth
if frHeight<height:
height = frHeight
# normalize alpha channels from 0-255 to 0-1
alpha_background = background[x:x+width,y:y+height,3] / 255.0
alpha_foreground = foreground[:width,:height,3] / 255.0
# set adjusted colors
for color in range(0, 3):
fr = alpha_foreground * foreground[:width,:height,color]
bg = alpha_background * background[x:x+width,y:y+height,color] * (1 - alpha_foreground)
background[x:x+width,y:y+height,color] = fr+bg
# set adjusted alpha and denormalize back to 0-255
background[x:x+width,y:y+height,3] = (1 - (1 - alpha_foreground) * (1 - alpha_background)) * 255
return background
I reworked #fireant's concept to allow for optional alpha masks and allow any x or y, including values outside of the bounds of the image. It will crop to the bounds.
def overlay_image_alpha(img, img_overlay, x, y, alpha_mask=None):
"""Overlay `img_overlay` onto `img` at (x, y) and blend using optional `alpha_mask`.
`alpha_mask` must have same HxW as `img_overlay` and values in range [0, 1].
"""
if y < 0 or y + img_overlay.shape[0] > img.shape[0] or x < 0 or x + img_overlay.shape[1] > img.shape[1]:
y_origin = 0 if y > 0 else -y
y_end = img_overlay.shape[0] if y < 0 else min(img.shape[0] - y, img_overlay.shape[0])
x_origin = 0 if x > 0 else -x
x_end = img_overlay.shape[1] if x < 0 else min(img.shape[1] - x, img_overlay.shape[1])
img_overlay_crop = img_overlay[y_origin:y_end, x_origin:x_end]
alpha = alpha_mask[y_origin:y_end, x_origin:x_end] if alpha_mask is not None else None
else:
img_overlay_crop = img_overlay
alpha = alpha_mask
y1 = max(y, 0)
y2 = min(img.shape[0], y1 + img_overlay_crop.shape[0])
x1 = max(x, 0)
x2 = min(img.shape[1], x1 + img_overlay_crop.shape[1])
img_crop = img[y1:y2, x1:x2]
img_crop[:] = alpha * img_overlay_crop + (1.0 - alpha) * img_crop if alpha is not None else img_overlay_crop

Categories