I was writing a custom function to apply a blur to an image using a convolution kernel. When I show the image, however, there is a weird result. In some ways is seems that the image was inverted, but I am not sure why. Here is the original image:
Here is the result:
I have already tried re writing the code, changing the image, changing the blur kernel, printing and personally going through many convolutions by eye, etc.
import cv2
import numpy as np
import matplotlib.pyplot as plt
def showImage(image):
plt.imshow(image, cmap='gray')
plt.show()
def gaussianBlur(image):
tempImage = image.copy()
tempImage = np.pad(tempImage, 1, "constant")
showImage(tempImage)
max = 0
i = 0
for x in range(1, len(image)-1):
for y in range(1, len(image[0])-1):
roi = image[x-1:x+2, y-1:y+2]
kernel = np.array([
[0.0625, 0.125, 0.0625],
[0.125, 0.25, 0.125],
[0.0625, 0.125, 0.0625]
])
if np.matmul(roi, kernel).sum() > max:
max = np.matmul(roi, kernel).sum()
tempImage[x][y] = np.matmul(roi, kernel).sum()
i += 1
print(np.matmul(roi, kernel).sum())
# if(i % 1000 == 0):
# showImage(tempImage)
divAmount = max / 255
for x in range(1, len(image)-1):
for y in range(1, len(image[0])-1):
tempImage[x][y] = tempImage[x][y] / divAmount
return tempImage.tolist()
# Load and view the image
image = cv2.imread("image_1_small.jpg", 0)
showImage(image)
# Apply Blur
image = gaussianBlur(image)
print(image)
# image = cv2.GaussianBlur(image, (5, 5), 0)
showImage(image)
The expected outcome should look like the original image only blurred.
This is caused by overflows. You compute convolution wrong. Use np.multiply in place of np.matmul.
Related
I would like to slice up an image in python
and paste it back together again as a window.
The tiles measure as 8pixels by 9pixels and each row needs to skip 1 pixel
I would then need to merge the tiles back together again with a 1 pixel padding around each tile to give a windowed effect.
The image is black and white but for the example I have used color to show that the windowed effect would need to have a white background
input example
Desired Output
Update: change tiles dimension to bigger for illustration, you can adjust per your need
Use this:
import cv2
image = cv2.imread('test.jpg')
tiles_height = 50
tiles_width = 30
# white padding
padding_x = 10
padding_y = 20
num_y = int(image.shape[0]/tiles_height)
num_x = int(image.shape[1]/tiles_width)
new_img = np.full((image.shape[0] + num_y*padding_y, image.shape[1] + num_x*padding_x,3),255)
for incre_i,i in enumerate(range(0,image.shape[0],tiles_height)):
for incre_j,j in enumerate(range(0, image.shape[1], tiles_width)):
new_img[i+incre_i*padding_y:i+tiles_height+incre_i*padding_y
,j+incre_j*padding_x:j+tiles_width+incre_j*padding_x,:] = image[i:i+tiles_height,j:j+tiles_width,:]
cv2.imwrite('res.jpg',new_img)
print(image.shape, new_img.shape)
Update 1:
Because you want to latter remove tiles, I added code that can help you with that. Now all you have to do is changing variables in tiles config, white padding, tile index to be removed:
import cv2
image = cv2.imread('test.jpg')
# tiles config
tiles_height = 50
tiles_width = 30
# white padding
padding_x = 10
padding_y = 20
# tile index to be removed
remove_indices = [(0,0),(3,6)]
num_y = int(image.shape[0]/tiles_height)
num_x = int(image.shape[1]/tiles_width)
new_img = np.full((image.shape[0] + num_y*padding_y, image.shape[1] + num_x*padding_x,3),255)
for incre_i,i in enumerate(range(0,image.shape[0],tiles_height)):
for incre_j,j in enumerate(range(0, image.shape[1], tiles_width)):
if (incre_i,incre_j) in remove_indices:
new_img[i+incre_i*padding_y:i+tiles_height+incre_i*padding_y
,j+incre_j*padding_x:j+tiles_width+incre_j*padding_x,:] = 255
else:
new_img[i+incre_i*padding_y:i+tiles_height+incre_i*padding_y
,j+incre_j*padding_x:j+tiles_width+incre_j*padding_x,:] = image[i:i+tiles_height,j:j+tiles_width,:]
cv2.imwrite('remove_tiles.jpg',new_img)
print(image.shape, new_img.shape)
test.jpg
res.jpg
remove_tiles.jpg
print(image.shape, new_img.shape) gives (952, 1429, 3) (1332, 1899, 3)
You can try with skimage.utils.view_as_windows from the scikit-image package:
from skimage.util import view_as_windows
import matplotlib.pyplot as plt
import numpy as np
img = np.random.rand(90, 90, 1) # gray-scale image, you can change the channels accordingly
img[8::9,] = 0
tiles = view_as_windows(img, (9, 9, 1), (9, 9, 1)).squeeze(2) # squeeze out unneded dim
tiles = tiles[:, :, :-1, :, :] # Remove last row of each tile
# plot the original image
plt.axis("off")
plt.imshow(img.squeeze(2))
plt.show()
# plot the tiles
fig, axes = plt.subplots(10, 10)
for i in range(10):
for j in range(10):
axes[i, j].axis("off")
axes[i, j].imshow(tiles[i, j, ...].squeeze(-1))
plt.show()
Here is the result:
Original
Sliced
The torch.Tensor.unfold operator from PyTorch could be an option too.
I'm trying to blurr an image by mapping each pixel to the average of the N pixels to the right of it (in the same row). My iterative solution produces good output, but my linear-algebra solution is producing bad output.
From testing, I believe my kernel-matrix is correct; and, I know the last N rows don't get blurred, but that's fine for now. I'd appreciate any hints or solutions.
iterative-solution output (good), linear-algebra output (bad)
original image; and here is the failing linear-algebra code:
def blur(orig_img):
# turn image-mat into a vector
flattened_img = orig_img.flatten()
L = flattened_img.shape[0]
N = 3
# kernel
kernel = np.zeros((L, L))
for r, row in enumerate(kernel[0:-N]):
row[r:r+N] = [round(1/N, 3)]*N
print(kernel)
# blurr the img
print('starting blurring')
blurred_img = np.matmul(kernel, flattened_img)
blurred_img = blurred_img.reshape(orig_img.shape)
return blurred_img
The equation I'm modelling is this:
One option might be to just use a kernel and a convolution?
For example if we load a gray scale image like so:
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
from scipy import ndimage
# load a hackinsh grayscale image
image = np.asarray(Image.open('cup.jpg')).mean(axis=2)
plt.imshow(image)
plt.title('Gray scale image')
plt.show()
Now one can use a kernel and convolution. For example to create a filter that filters just one rows and compute the value of the center pixel as the difference between the pixels to the right and left one can do the following:
# Create a kernel that takes the difference between neighbors horizontal pixes
k = np.array([[-1,0,1]])
plt.subplot(121)
plt.title('Kernel')
plt.imshow(k)
plt.subplot(122)
plt.title('Output')
plt.imshow(ndimage.convolve(image, k, mode='constant', cval=0.0))
plt.show()
Therefore, one can blurr an image by mapping each pixel to the average of the N pixels to the right of it by creating the appropiate kernel.
# Create a kernel that takes the average of N pixels to the right
n=10
k = np.zeros(n*2);k[n:]=1/n
k = k[np.newaxis,...]
plt.subplot(121)
plt.title('Kernel')
plt.imshow(k)
plt.subplot(122)
plt.title('Output')
plt.imshow(ndimage.convolve(image, k, mode='constant', cval=0.0))
plt.show()
The issue was incorrect usage of cv2.imshow() in displaying the output image. It expects floating-point pixel values to be in [0, 1]; which, is done in the below code (near bottom):
def blur(orig_img):
flattened_img = orig_img.flatten()
L = flattened_img.shape[0]
N = int(round(0.1 * orig_img.shape[0], 0))
# mask (A)
mask = np.zeros((L, L))
for r, row in enumerate(mask[0:-N]):
row[r:r+N] = [round(1/N, 2)]*N
# blurred img = A * flattened_img
print('starting blurring')
blurred_img = np.matmul(mask, flattened_img)
blurred_img = blurred_img.reshape(orig_img.shape)
cv2.imwrite('blurred_img.png', blurred_img)
# normalize img to [0,1]
blurred_img = (
blurred_img - blurred_img.min()) / (blurred_img.max()-blurred_img.min())
return blurred_img
Ammended output
Thank you to #CrisLuengo for identifying the issue.
I'm facing an issue, and would like some inputs from the community on how to improve the disparity map. I'm following this tutorial for calculating the disparity map between 2 images. The code I have is as follows:
import cv2
import numpy as np
import sys
from matplotlib import pyplot as plt
num_disparities = 64 # number of disparities to check
block = 9 # block size to match
def preprocess_frame(path):
image = cv2.imread(path, 0)
image = cv2.equalizeHist(image)
image = cv2.GaussianBlur(image, (5, 5), 0)
return image
def calculate_disparity_matrix(args):
left_image = preprocess_frame(args[1])
right_image = preprocess_frame(args[2])
rows, cols = left_image.shape
kernel = np.ones([block, block]) / block
disparity_maps = np.zeros(
[left_image.shape[0], left_image.shape[1], num_disparities])
for d in range(0, num_disparities):
# shift image
translation_matrix = np.float32([[1, 0, d], [0, 1, 0]])
shifted_image = cv2.warpAffine(
right_image, translation_matrix,
(right_image.shape[1], right_image.shape[0]))
# calculate squared differences
SAD = abs(np.float32(left_image) - np.float32(shifted_image))
# convolve with kernel and find SAD at each point
filtered_image = cv2.filter2D(SAD, -1, kernel)
disparity_maps[:, :, d] = filtered_image
disparity = np.argmin(disparity_maps, axis=2)
disparity = np.uint8(disparity * 255 / num_disparities)
disparity = cv2.equalizeHist(disparity)
plt.imshow(disparity, cmap='gray', vmin=0, vmax=255)
plt.show()
def calculate_disparity_inbuilt(args):
left_image = preprocess_frame(args[1])
right_image = preprocess_frame(args[2])
rows, cols = left_image.shape
stereo = cv2.StereoBM_create(numDisparities=num_disparities,
blockSize=block)
disparity = stereo.compute(left_image, right_image)
plt.imshow(disparity, cmap='gray', vmin=0, vmax=255)
plt.show()
The problem is that the output that I get from the inbuilt function in OpenCV is hardly similar to the one I've implemented. I was expecting at least a slight similarity between the 2. Is this expected? or am I doing something wrong here?
Implemented Algorithm
OpenCV Algorithm
I am working on a project where I should apply and OCR on some documents.
The first step is to threshold the image and let only the writing (whiten the background).
Example of an input image: (For the GDPR and privacy reasons, this image is from the Internet)
Here is my code:
import cv2
import numpy as np
image = cv2.imread('b.jpg')
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
h = image.shape[0]
w = image.shape[1]
for y in range(0, h):
for x in range(0, w):
if image[y, x] >= 120:
image[y, x] = 255
else:
image[y, x] = 0
cv2.imwrite('output.jpg', image)
Here is the result that I got:
When I applied pytesseract to the output image, the results were not satisfying (I know that an OCR is not perfect). Although I tried to adjust the threshold value (in this code it is equal to 120), the result was not as clear as I wanted.
Is there a way to make a better threshold in order to only keep the writing in black and whiten the rest?
After digging deep in StackOverflow questions, I found this answer which is about removing watermark using opencv.
I adapted the code to my needs and this is what I got:
import numpy as np
import cv2
image = cv2.imread('a.png')
img = image.copy()
alpha =2.75
beta = -160.0
denoised = alpha * img + beta
denoised = np.clip(denoised, 0, 255).astype(np.uint8)
#denoised = cv2.fastNlMeansDenoising(denoised, None, 31, 7, 21)
img = cv2.cvtColor(denoised, cv2.COLOR_BGR2GRAY)
h = img.shape[0]
w = img.shape[1]
for y in range(0, h):
for x in range(0, w):
if img[y, x] >= 220:
img[y, x] = 255
else:
img[y, x] = 0
cv2.imwrite('outpu.jpg', img)
Here is the output image:
The good thing about this code is that it gives good results not only with this image, but also with all the images that I tested.
I hope it helps anyone who had the same problem.
You can use adaptive thresholding. From documentation :
In this, the algorithm calculate the threshold for a small regions of the image. So we get different thresholds for different regions of the same image and it gives us better results for images with varying illumination.
import numpy as np
import cv2
image = cv2.imread('b.jpg')
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
image = cv2.medianBlur(image ,5)
th1 = cv2.adaptiveThreshold(image,255,cv2.ADAPTIVE_THRESH_MEAN_C,\
cv2.THRESH_BINARY,11,2)
th2 = cv2.adaptiveThreshold(image,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,\
cv2.THRESH_BINARY,11,2)
cv2.imwrite('output1.jpg', th1 )
cv2.imwrite('output2.jpg', th2 )
from PIL import Image
fp="C:\\lena.jpg"
img=Image.open(fp)
w,h=img.size
pixels=img.load()
imgsharp=Image.new(img.mode,img.size,color=0)
sharp=[0,-1,0,-1,8,-1,0,-1,0]
for i in range(w):
for j in range(h):
for k in range(3):
for m in range(3):
l=pixels[i-k+1,j-m+1]*sharp[i]
if l>255:
l=255
elif l<0:
l=0
imgsharp.putpixel((i,j),l)
imgsharp.show()
I want to apply a high pass (sharpening) filter with 3x3 mask size to a grayscale image. But I am getting an error:
Traceback (most recent call last):
File "C:\sharp.py", line 16, in <module>
l=pixels[i-k+1,j-m+1]*sharp[i]
IndexError: image index out of range
How can I fix my mistake and how can I get the image sharpening to work in this code?
The specific error you mentioned is because you are not dealing with the borders of the image. A solution is to pad the image or deal with the width and height limits. For example: replace i-k+1 and j-m+1 by max(0, min(w, i-k+1)) and max(0, min(h, j-m+1))) respectively.
There are other issues with your code:
The element of the filter you are accessing is not right... you probably meant sharp[3*m+k] where you wrote sharp[i].
Are you using colored or greyscale image? For colored images, l has 3 dimensions and can't be directly compared to a single number (0 or 255).
Also, the clipping of l value and the putpixel call should be inside the innerest loop.
Your kernel looks a bit odd. Is that 8 supposed to be a 5? Or maybe a 9 and 0 become -1? Take a look at kernels and at this example.
This implementation with several nested loops is not very efficient.
I recommend the following solutions to your problem.
If you want to sharpen the image and that's all, you can use PIL.Image.filter:
from PIL import Image, ImageFilter
img = Image.open('lena.png')
img_sharp = img.filter(ImageFilter.SHARPEN)
img_sharp.show()
If you do want to specify the kernel, try the following with scipy. Be sure to take a look at convolve documentation.
from PIL import Image
from scipy import ndimage, misc
import numpy as np
img = misc.imread('lena.png').astype(np.float) # read as float
kernel = np.array([0, -1, 0, -1, 5, -1, 0, -1, 0]).reshape((3, 3, 1))
# here we do the convolution with the kernel
imgsharp = ndimage.convolve(img, kernel, mode='nearest')
# then we clip (0 to 255) and convert to unsigned int
imgsharp = np.clip(imgsharp, 0, 255).astype(np.uint8)
Image.fromarray(imgsharp).show() # display
Another approach is to use OpenCV. Take a look at this article. It will clearify things about many implementation details.
We can sharpen an RGB image with scipy.convolve2d as well. We have to apply the convolution separately for each image channel. The below code shows the same for the lena image
from scipy import misc, signal
import numpy as np
im = misc.imread('../images/lena.jpg')/255. # scale pixel values in [0,1] for each channel
print(np.max(im))
# 1.0
print(im.shape)
# (220, 220, 3)
sharpen_kernel = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]])
im_sharpened = np.ones(im.shape)
for i in range(3):
im_sharpened[...,i] = np.clip(signal.convolve2d(im[...,i], sharpen_kernel, mode='same', boundary="symm"),0,1)
fig, ax = plt.subplots(nrows=2, figsize=(10, 20))
ax[0].imshow(im)
ax[0].set_title('Original Image', size=20)
ax[1].imshow(im_sharpened)
ax[1].set_title('Sharpened Image', size=20)
plt.show()
We can use the gaussian kernel to first blur the image and subtract from the original image to get a sharpened image as well, as shown in the following code:
from scipy import misc, ndimage
im = misc.imread('../images/lena.jpg') / 255 # scale pixel values in [0,1] for each channel
# First a 1-D Gaussian
t = np.linspace(-10, 10, 30)
bump = np.exp(-0.1*t**2)
bump /= np.trapz(bump) # normalize the integral to 1
# make a 2-D kernel out of it
kernel = bump[:, np.newaxis] * bump[np.newaxis, :]
im_blur = ndimage.convolve(im, kernel.reshape(30,30,1))
im_sharp = np.clip(2*im - im_blur, 0, 1)
fig, ax = plt.subplots(nrows=2, figsize=(10, 20))
ax[0].imshow(im)
ax[0].set_title('Original Image', size=20)
ax[1].imshow(im_sharp)
ax[1].set_title('Sharpened Image', size=20)
plt.show()