I would like to apply a simple algebraic operation to the RBG values of an image, that I have loaded via PIL. My current version works, but is slow:
from PIL import Image
import numpy as np
file_name = '1'
im = Image.open('data/' + file_name + '.jpg').convert('RGB')
pixels = np.array(im)
s = pixels.shape
p = pixels.reshape((s[0] * s[1], s[2]))
def update(ratio=0.5):
p2 = np.array([[min(rgb[0] + rgb[0] * ratio, 1), max(rgb[1] - rgb[1] * ratio, 0), rgb[2]] for rgb in p])
img = Image.fromarray(np.uint8(p2.reshape(s)))
img.save('result/' + file_name + '_test.png')
return 0
update(0.5)
Has someone a more efficient idea?
Make use of NumPy's vectorized operations to get rid of the loop.
I modified your original approach to compare performance between the following, different solutions. Also, I added a PIL only approach using ImageMath, if you want to get rid of NumPy completely.
Furthermore, I assume, there is/was a bug:
p2 = np.array([[min(rgb[0] + rgb[0] * ratio, 1), max(rgb[1] - rgb[1] * ratio, 0), rgb[2]] for rgb in p])
You actually do NOT convert to float, so it should be 255 instead of 1 in the min call.
Here's, what I've done:
import numpy as np
from PIL import Image, ImageMath
import time
# Modified, original implementation; fixed most likely wrong compare value in min (255 instead of 1)
def update_1(ratio=0.5):
pixels = np.array(im)
s = pixels.shape
p = pixels.reshape((s[0] * s[1], s[2]))
p2 = np.array([[min(rgb[0] + rgb[0] * ratio, 255), max(rgb[1] - rgb[1] * ratio, 0), rgb[2]] for rgb in p])
img = Image.fromarray(np.uint8(p2.reshape(s)))
img.save('result_update_1.png')
return 0
# More efficient vectorized approach using NumPy
def update_2(ratio=0.5):
pixels = np.array(im)
pixels[:, :, 0] = np.minimum(pixels[:, :, 0] * (1 + ratio), 255)
pixels[:, :, 1] = np.maximum(pixels[:, :, 1] * (1 - ratio), 0)
img = Image.fromarray(pixels)
img.save('result_update_2.png')
return 0
# More efficient approach only using PIL
def update_3(ratio=0.5):
(r, g, b) = im.split()
r = ImageMath.eval('min(float(r) / 255 * (1 + ratio), 1) * 255', r=r, ratio=ratio).convert('L')
g = ImageMath.eval('max(float(g) / 255 * (1 - ratio), 0) * 255', g=g, ratio=ratio).convert('L')
Image.merge('RGB', (r, g, b)).save('result_update_3.png')
return 0
im = Image.open('path/to/your/image.png')
t1 = time.perf_counter()
update_1(0.5)
print(time.perf_counter() - t1)
t1 = time.perf_counter()
update_2(0.5)
print(time.perf_counter() - t1)
t1 = time.perf_counter()
update_3(0.5)
print(time.perf_counter() - t1)
The performance on a [400, 400] RGB image on my machine:
1.723889293 s # your approach
0.055316339 s # vectorized NumPy approach
0.062502050 s # PIL only approach
Hope that helps!
Related
I'm looking to make an "image quilt" or "image array" very fast using PIL. I'm looking for a method which takes in a list of images and image_per_row parameters and return a new PIL Image.
I currently have a working model of this, but it's not fast enough for 1k+ images at a time. I was thinking maybe using a numpy array would be faster? Would it be?
Here's what I have right now. Please note the image width will always be 1920, and the half parameter is there to make the final image size smaller.
Any fun ideas to optimize this or ways of doing it better would be appreciated :)
from math import ceil
from PIL import Image
def _generate_array(
images: List[Image.Image],
images_per_row: int,
half: bool = False
) -> io.BytesIO:
frame_width = 1920
padding = 3
images_per_row = images_per_row
img_width, img_height = images[0].size
sf = (frame_width - (images_per_row - 1) * padding) / (images_per_row * img_width)
scaled_img_width = ceil(img_width * sf)
scaled_img_height = ceil(img_height * sf)
number_of_rows = ceil(len(images) / images_per_row)
frame_height = ceil(sf * img_height * number_of_rows)
new_im = Image.new('RGB', (frame_width, frame_height), (0, 0, 0))
i, j = 0, 0
for num, im in enumerate(images):
if num % images_per_row == 0:
i = 0
im.thumbnail((scaled_img_width, scaled_img_height))
y_cord = (j // images_per_row) * scaled_img_height
new_im.paste(im, (i, y_cord))
i = (i + scaled_img_width) + padding
j += 1
if half:
width, height = new_im.size
width = round(width / 2)
height = round(height / 2)
new_im = new_im.resize((width, height), Image.ANTIALIAS)
return new_im
I'm trying to draw a spiral square in python using OpenCV and Numpy.
I know that I can do it via turtle and there are so many examples on the internet but I need to do it as I described in the title.
So I drew chessboard chessboard via python OpenCV
This is code for it
import cv2
import numpy as np
mySize = 256
myOffset = 16
mySquare = 32
myNumberY = mySize // mySquare
myNumberX = mySize // mySquare
myColor = 255
img = np.ones((mySize + 2 * myOffset, mySize + 2 * myOffset), dtype = np.uint8) * 127
for y in range(myNumberY) :
for x in range(myNumberX) :
myColor = 0 if (x + y) % 2 == 0 else 255
print(y, x, myColor)
for ix in range(mySquare) :
for iy in range(mySquare) :
img[myOffset + y * mySquare + iy][myOffset + x * mySquare + ix] = myColor
cv2.imshow('my image', img)
cv2.waitKey(0)
cv2.destroyAllWindows()
Next, I have some kind of squares - one inside the other.
And this is code for it.
import cv2, numpy
mySize, myOffset, mySquare, myColor = 256, 16, 16, 0
img = numpy.ones((mySize + 2 * myOffset, mySize + 2 * myOffset), dtype = numpy.uint8) * 127
for item in range(mySize // mySquare // 2) :
myTempOffsetStart = myOffset + item * mySquare
myTempOffsetFinish = myOffset + mySize - item * mySquare
myColor = 0 if myColor == 255 else 255
img[myTempOffsetStart : myTempOffsetFinish, myTempOffsetStart : myTempOffsetFinish] = myColor
cv2.imshow('my image', img)
cv2.waitKey(0)
cv2.destroyAllWindows()
You can change the size of drawing pictures (lines) by changing mySquare value.
So for me important is to draw square spirals like this in the following picture.
If you look at the picture you see that the pattern is actually pretty simple. You have to go right, up, left, down and repeat a few times. Also the distance you have to go starts with 3 and goes up by two every other step.
After having down that I calculate how much space I need to draw that and shift the points so the coordinates are not negative. Here is my code:
import cv2
import matplotlib.pyplot as plt
import itertools
import numpy as np
repeates = 10
directions = [(1,0),(0,1),(-1,0),(0,-1)]
moves = [(3+2*(i//2))*np.array(d)
for i,d in enumerate(itertools.chain(*itertools.repeat(directions, repeates)))]
points = (np.array([0,0]),*itertools.accumulate(moves))
coordinates = np.array(points).reshape(-1)
r1,r2 = coordinates.min(), coordinates.max()
n = r2-r1+1
img = np.zeros((n,n))
for p,q in zip(points[0:-1],points[1:]):
cv2.line(img, tuple(p-r1), tuple(q-r1), (1,1,1))
plt.imshow(img, cmap='gray')
And it looks like this
I am using Python OpenCV to split channels and remove black background like this...
b_channel, g_channel, r_channel = cv2.split(image_1)
alpha_channel = np.zeros_like(gray)
for p in range(alpha_channel.shape[0]):
for q in range(alpha_channel.shape[1]):
if b_channel[p][q]!=0 or g_channel[p][q]!=0 or r_channel[p][q]!=0:
alpha_channel[p][q] = 255
merged = cv2.merge((b_channel, g_channel, r_channel, alpha_channel))
This is working, but it is taking around 10 seconds to complete on an image that is only 200kb
Is there a more efficient way to do this or is there some speed gains I could make using the code I have?
Iterating over pixels using for loop is literally very slow and inefficient. Also, as per the documentation here,
cv2.split() is a costly operation (in terms of time). So do it only if
you need it. Otherwise go for Numpy indexing.
You can try vectorising and indexing with numpy as below:
# create the image with alpha channel
img_rgba = cv2.cvtColor(img, cv2.COLOR_RGB2RGBA)
# mask: elements are True any of the pixel value is 0
mask = (img[:, :, 0:3] != [0,0,0]).any(2)
#assign the mask to the last channel of the image
img_rgba[:,:,3] = (mask*255).astype(np.uint8)
For what you're doing, using cv2.bitwise_or seems to be the fastest method:
image_1 = img
# your method
start_time = time.time()
b_channel, g_channel, r_channel = cv2.split(image_1)
alpha_channel = np.zeros_like(gray)
for p in range(alpha_channel.shape[0]):
for q in range(alpha_channel.shape[1]):
if b_channel[p][q]!=0 or g_channel[p][q]!=0 or r_channel[p][q]!=0:
alpha_channel[p][q] = 255
elapsed_time = time.time() - start_time
print('for cycles: ' + str(elapsed_time*1000.0) + ' milliseconds')
# my method
start_time = time.time()
b_channel, g_channel, r_channel = cv2.split(image_1)
alpha_channel2 = cv2.bitwise_or(g_channel,r_channel)
alpha_channel2 = cv2.bitwise_or(alpha_channel2, b_channel)
_,alpha_channel2 = cv2.threshold(alpha_channel2,0,255,cv2.THRESH_BINARY)
elapsed_time2 = time.time() - start_time
print('bitwise + threshold: '+ str(elapsed_time2*1000.0) + ' milliseconds')
# annubhav's method
start_time = time.time()
img_rgba = cv2.cvtColor(image_1, cv2.COLOR_RGB2RGBA)
# mask: elements are True any of the pixel value is 0
mask = (img[:, :, 0:3] != [0,0,0]).any(2)
#assign the mask to the last channel of the image
img_rgba[:,:,3] = (mask*255).astype(np.uint8)
elapsed_time3 = time.time() - start_time
print('anubhav: ' + str(elapsed_time3*1000.0) + ' milliseconds')
for cycles: 2146.300792694092 milliseconds
bitwise + threshold: 4.959583282470703 milliseconds
anubhav: 27.924776077270508 milliseconds
Fastest Solution
Let us consider a function that uses cv2.split and we know that it is very inefficient, we can go ahead and resize or crop a certain part of the image and then perform our calculation on that. In my case where I had to calculate the colorfulness of the image using cv2.split I went ahead and resized and cropped the image to make cv2.split work.
A faster and more reasonable cv2.split calculation can be performed by Resizing
Code
def image_colorfulness(self,image):
# split the image into its respective RGB components
(B, G, R) = cv2.split(image.astype("float"))
print(f'Split Image to B G R {(B, G, R)}')
# compute rg = R - G
rg = np.absolute(R - G)
print(f'Computed RG to {rg}')
# compute yb = 0.5 * (R + G) - B
yb = np.absolute(0.5 * (R + G) - B)
# compute the mean and standard deviation of both `rg` and `yb`
print('Performing Absolute')
(rbMean, rbStd) = (np.mean(rg), np.std(rg))
(ybMean, ybStd) = (np.mean(yb), np.std(yb))
# combine the mean and standard deviations
print('Performing Standard Deviation')
stdRoot = np.sqrt((rbStd ** 2) + (ybStd ** 2))
meanRoot = np.sqrt((rbMean ** 2) + (ybMean ** 2))
# derive the "colorfulness" metric and return it
return stdRoot + (0.3 * meanRoot)
def crop_square(self, img, size, interpolation=cv2.INTER_AREA):
h, w = img.shape[:2]
min_size = np.amin([h,w])
# Centralize and crop
crop_img = img[int(h/2-min_size/2):int(h/2+min_size/2), int(w/2-min_size/2):int(w/2+min_size/2)]
resized = cv2.resize(crop_img, (size, size), interpolation=interpolation)
return resized
img = cv2.imread(image_path)
resize_img = self.crop_square(img, 300)
## perform your calculation on the resized_img and continue with the original img then
colorness = self.image_colorfulness(resize_img)
Resizing Only
If you prefer not to crop and only resize the image, that can be achieved by taking a look at this line of code from the square_crop function.
resized = cv2.resize(crop_img, (size, size), interpolation=interpolation)
Testing Results
Before
I tested a 5.0 MB *.PNG Image, before using standard image input in cv2.split it processed in 8 Minutes.
After
After the Image Resizing it was reduced to 0.001 ms on the resized image.
Standard Image
Resized Image
I have two images, one with and other without alpha channel. Thus, image A and B has a shape of (x,y,4) and (x,y,3) respectively.
I want to merge both images in a single tensor using python, where B is the background and A is the upper image. The final image must have a shape of (x, y, 3). I tried if scikit-image or cv2 is capable of doing this, but I couldn't found any solution.
here is alpha blending in python
import numpy as np
import cv2
alpha = 0.4
img1 = cv2.imread('Desert.jpg')
img2 = cv2.imread('Penguins.jpg')
#r,c,z = img1.shape
out_img = np.zeros(img1.shape,dtype=img1.dtype)
out_img[:,:,:] = (alpha * img1[:,:,:]) + ((1-alpha) * img2[:,:,:])
'''
# if want to loop over the whole image
for y in range(r):
for x in range(c):
out_img[y,x,0] = (alpha * img1[y,x,0]) + ((1-alpha) * img2[y,x,0])
out_img[y,x,1] = (alpha * img1[y,x,1]) + ((1-alpha) * img2[y,x,1])
out_img[y,x,2] = (alpha * img1[y,x,2]) + ((1-alpha) * img2[y,x,2])
'''
cv2.imshow('Output',out_img)
cv2.waitKey(0)
The above solution works, however I have a more efficient one:
alpha = A[:,:,3]
A1 = A[:,:,:3]
C = np.multiply(A1, alpha.reshape(x,y,1)) + np.multiply(B, 1-alpha.reshape(x,y,1))
I am experimenting with JPEG compression using python. I load in a tiff image and store it as numpy uint8 RGB array. I was doing this for color mapping.
def rgb2ycbcr(im):
cbcr = np.empty_like(im)
r = im[:,:,0]
g = im[:,:,1]
b = im[:,:,2]
# Y
cbcr[:,:,0] = .299 * r + .587 * g + .114 * b
# Cb
cbcr[:,:,1] = 128 - .169 * r - .331 * g + .5 * b
# Cr
cbcr[:,:,2] = 128 + .5 * r - .419 * g - .081 * b
return np.uint8(cbcr)
def ycbcr2rgb(im):
rgb = np.empty_like(im)
y = im[:,:,0]
cb = im[:,:,1] - 128
cr = im[:,:,2] - 128
# R
rgb[:,:,0] = y + 1.402 * cr
# G
rgb[:,:,1] = y - .34414 * cb - .71414 * cr
# B
rgb[:,:,2] = y + 1.772 * cb
return np.uint8(rgb)
I did a simple RGB to YCbCr transformation followed with a inverse transformation.
img = rgb2ycbcr(img)
imshow(img)
img = ycbcr2rgb(img)
imshow(img)
I got these two output image as YCbCr and RGB output after the color space transformation.
It seems that something is wrong with my color conversion and I cannot figure out what is wrong. I was using the JPEG color space conversion provided by
Wikipedia. Thanks you for the help.
You have to do your intermediate calculations in floating point. The posterization should tip you off; you have a lot of "hot" (saturated) pixels.
def rgb2ycbcr(im):
xform = np.array([[.299, .587, .114], [-.1687, -.3313, .5], [.5, -.4187, -.0813]])
ycbcr = im.dot(xform.T)
ycbcr[:,:,[1,2]] += 128
return np.uint8(ycbcr)
def ycbcr2rgb(im):
xform = np.array([[1, 0, 1.402], [1, -0.34414, -.71414], [1, 1.772, 0]])
rgb = im.astype(np.float)
rgb[:,:,[1,2]] -= 128
rgb = rgb.dot(xform.T)
np.putmask(rgb, rgb > 255, 255)
np.putmask(rgb, rgb < 0, 0)
return np.uint8(rgb)