Deskew MNIST images - python

I found on https://fsix.github.io/mnist/Deskewing.html how to deskew the images of the MNIST dataset. It seems to work. My problem is that before deskewing each pixel has a value between 0 and 1. But after deskewing the image the values are not between 0 and 1 any more. They can be negative and can be greater than 1. How can this be fixed?
Here is the code:
def moments(image):
c0,c1 = np.mgrid[:image.shape[0],:image.shape[1]] # A trick in numPy to create a mesh grid
totalImage = np.sum(image) #sum of pixels
m0 = np.sum(c0*image)/totalImage #mu_x
m1 = np.sum(c1*image)/totalImage #mu_y
m00 = np.sum((c0-m0)**2*image)/totalImage #var(x)
m11 = np.sum((c1-m1)**2*image)/totalImage #var(y)
m01 = np.sum((c0-m0)*(c1-m1)*image)/totalImage #covariance(x,y)
mu_vector = np.array([m0,m1]) # Notice that these are \mu_x, \mu_y respectively
covariance_matrix = np.array([[m00,m01],[m01,m11]]) # Do you see a similarity between the covariance matrix
return mu_vector, covariance_matrix
def deskew(image):
c,v = moments(image)
alpha = v[0,1]/v[0,0]
affine = np.array([[1,0],[alpha,1]])
ocenter = np.array(image.shape)/2.0
offset = c-np.dot(affine,ocenter)
return interpolation.affine_transform(image,affine,offset=offset)

You can just normalize the image to a range between 0 and 1 after the skewing process.
img = deskew(img)
img = (img - img.min()) / (img.max() - img.min())
See this question.
To incorporate this in the deskew function, you could rewrite it like this:
def deskew(image):
c,v = moments(image)
alpha = v[0,1]/v[0,0]
affine = np.array([[1,0],[alpha,1]])
ocenter = np.array(image.shape)/2.0
offset = c-np.dot(affine,ocenter)
img = interpolation.affine_transform(image,affine,offset=offset)
return (img - img.min()) / (img.max() - img.min())

Related

Fourier Transform shift property implementation in pytorch

I'm trying to implement the phase shift property of Fourier Transform with pytorch.
What I mean by the shift property is this:
I think that I've got most of the things correctly but somehow get a noisy image.
I'm having a hard time solving this issue. Would it be a numerical issue? Or maybe something due to odd or even pixel numbers? (My images are 1020 x 678 x 3)
These are the shifted image and the original image.
This is my implementation code:
def phase_shifters(y_alpha=0, x_alpha=0, shape=None):
# HxWxC
line = torch.zeros(shape)
# x shift
line_x = torch.linspace(-shape[1]/2,shape[1]/2,shape[1])
line_x = line_x.expand(shape[0], shape[2], shape[1]).transpose(1, 2)
line_x = line_x/shape[1]
line_x = x_alpha * line_x
# y shift
line_y = torch.linspace(-shape[0]/2,shape[0]/2,shape[0])
line_y = line_y.expand(shape[2], shape[1], shape[0]).transpose(0, 2)
line_y = line_y/shape[0]
line_y = y_alpha * line_y
return x_alpha*line_x + y_alpha*line_y
img = cv2.imread("test.png")
img_fft = torch.fft.fft2(img, dim=(0,1))
mag = torch.abs(img_fft)
phase = torch.angle(img_fft)
# alpha means pixel shift amount in spatial domain!
p_shift = phase_shifters(y_alpha=0,x_alpha=50, shape=phase.shape)
phase = (phase+p_shift) % (2*pi) # for wrapping
recon = torch.polar(mag,phase)
recon = torch.fft.ifft2(recon, dim=(0,1)).real
recon = torch.clamp(recon,0,255)
cv2.imshow("recon",np.array(recon, dtype=np.uint8))
cv2.waitKey(0)

How to demonstrate that the impulse response of the Gaussian Pyramid is Scale Invariant?

I built a Gaussian Pyramid from a 512x512 image with one Dirac pulse at the centre(256,256), then tried to follow the following procedure to prove that this pyramid is scale-invariant, and it has the same impulse response at each level, but the results doesn't seem to be very correct!
Can you please advise me how to do it?
Edit:
I edited the code to fix some bugs, thanks to #CrisLuengo for his notes.
Code:
import numpy as np
import matplotlib.pyplot as plt
import cv2
import skimage.exposure as exposure
from math import sqrt, ceil
#=================
# Resize Function
#=================
def _resize(image, downscale=2, step=0.5, minSize=(7, 7)):
if(image.shape > minSize ):
# newSize = (image.shape[0]// downscale, image.shape[1]//downscale)
# newImage = cv2.resize(image, dsize=newSize, fx=step, fy=step)
newImage = cv2.resize(image, None, fx=step, fy=step)
return newImage
else:
return 0
#--------------------------------------------------------------
#===========================
# Gaussian Pyramid Function
#===========================
def pyramid(image, sigma_0=1):
'''
Function to create a Gaussian pyramid from an image for given standard deviation sigma_0
Parameters:
-----------
#param: image: nd-array.
The original image.
#param: sigma_0: float.
standard deviation of the Gaussian distribution.
returns:
List of images with different scales, the pyramid
'''
# Resize All input images into a standard size
image = cv2.resize(image,(512,512))
# level 0
if ceil(6*sigma_0)%2 ==0 :
Gimage = cv2.GaussianBlur(image, (ceil(6*sigma_0)+1, ceil(6*sigma_0)+1), sigmaX=sigma_0, sigmaY=sigma_0)
else:
Gimage = cv2.GaussianBlur(image, (ceil(6*sigma_0)+2, ceil(6*sigma_0)+2), sigmaX=sigma_0, sigmaY=sigma_0)
# sigma_k
sigma_k = 4*sigma_0
# sigma_k = sqrt(2)*sigma_0
# Pyramid as list
GaussPyr = [Gimage]
# Loop of other levels of the pyramid
for k in range(1,6):
if ceil(6*sigma_k)%2 ==0 :
# smoothed = cv2.GaussianBlur(GaussPyr[k-1], (ceil(6*sigma_k)+1, ceil(6*sigma_k)+1), sigmaX=sigma_k, sigmaY=sigma_0)
smoothed = cv2.GaussianBlur(GaussPyr[k-1], (ceil(6*sigma_k)+1, ceil(6*sigma_k)+1), sigmaX=sigma_k, sigmaY=sigma_k)
else:
# smoothed = cv2.GaussianBlur(GaussPyr[k-1], (ceil(6*sigma_k)+2, ceil(6*sigma_k)+2), sigmaX=sigma_k, sigmaY=sigma_0)
smoothed = cv2.GaussianBlur(GaussPyr[k-1], (ceil(6*sigma_k)+2, ceil(6*sigma_k)+2), sigmaX=sigma_k, sigmaY=sigma_k)
# Downscaled Image
resized = _resize(smoothed ) # ,step=0.25*sigma_k
GaussPyr.append(resized)
return GaussPyr
#====================
# Impulse Response
#====================
# Zeros 512x512 Black Image
delta = np.zeros((512, 512), dtype=np.float32)
# Dirac
delta[255,255] = 255
# sigmas
sigma1 = 1
sigma2 = sqrt(2)
# Pyramids
deltaPyramid1 = pyramid(delta, sigma_0=sigma1)
deltaPyramid2 = pyramid(delta, sigma_0=sigma2)
# Impulse Response for each level
ImpResp1 = np.zeros((len(deltaPyramid1), 13),dtype=float)
ImpResp2 = np.zeros((len(deltaPyramid2), 13),dtype=float)
# sigma = 1
for idx, level in enumerate(deltaPyramid1):
# # 1
# level = cv2.resize(level, (512, 512))# , interpolation=cv2.INTER_AREA
# ImpResp1[idx,:] = exposure.rescale_intensity(level[255, 249:262], in_range='image', out_range=(0,255)).astype(np.uint8)
# ImpResp1[idx,:] = level[255, 249:262]
# # 2
centery = level.shape[0]//2
centerx = level.shape[1]//2
ImpResp1[idx,:] = exposure.rescale_intensity(level[centery, (centerx-7):(centerx+6)], out_range=(0,255), in_range='image').astype(np.uint8)
# ImpResp1[idx,:] = level[centery, (centerx-7):(centerx+6)]
# sigma = sqrt(2)
for idx, level in enumerate(deltaPyramid2):
# # 1
# level = cv2.resize(level, (512, 512))# , interpolation=cv2.INTER_AREA
# ImpResp2[idx,:] = exposure.rescale_intensity(level[255, 249:262], in_range='image', out_range=(0,255)).astype(np.uint8)
# ImpResp2[idx,:] = level[255, 249:262]
# # 2
centery = level.shape[0]//2
centerx = level.shape[1]//2
ImpResp2[idx,:] = exposure.rescale_intensity(level[centery, (centerx-7):(centerx+6)], out_range=(0,255), in_range='image').astype(np.uint8)
# ImpResp2[idx,:] = level[centery, (centerx-7):(centerx+6)]
#====================
# Visualize Results
#====================
labels = []
for c in range(13):
label = 'C{}'.format(c+1)
labels.append(label)
x = np.arange(len(labels)) # the label locations
width = 0.1 # the width of the bars
fig, ax = plt.subplots()
rects1 = []
for k in range(ImpResp1.shape[0]):
rects1.append(ax.bar(x - 2*k*width, ImpResp1[k], width, label='K{}'.format(k)))
# Add some text for labels, title and custom x-axis tick labels, etc.
ax.set_ylabel('values')
ax.set_title('sigma0=1')
ax.set_xticks(x)
ax.set_xticklabels(labels)
ax.legend()
fig.tight_layout()
fig2, ax2 = plt.subplots()
rects2 = []
for k in range(ImpResp1.shape[0]):
rects2.append(ax2.bar(x + 2*k*width, ImpResp2[k], width, label='K{}'.format(k)))
# Add some text for labels, title and custom x-axis tick labels, etc.
ax2.set_ylabel('values')
ax2.set_title('sigma0=sqrt(2)')
ax2.set_xticks(x)
ax2.set_xticklabels(labels)
ax2.legend()
fig2.tight_layout()
plt.show()
First, let’s simplify to a situation that is simple enough to see the scaling property of the Gaussian. Convolving a delta image with a Gaussian yields that Gaussian. A Gaussian B twice the size of a Gaussian A, and then scaled spatially by half, is identical to A (up to intensity scaling of course, B is 1/4 as high as A in 2D).
delta = <all zeros except one pixel in the middle>
A = GaussianBlur(delta, 1)
B = GaussianBlur(delta, 2)
B = resize(B, 1/2)
A == B * 2**2
C = GaussianBlur(delta, sigma=7.489)
C = resize(C, 1/7.489)
A == C * 7.489**2
Now, if we’re chaining the blur operations, we obtain a stronger blur. The square of the output sigma is equal to the sum of squares of the sigmas applied:
A = GaussianBlur(delta, 1)
B = GaussianBlur(delta, 2)
C = GaussianBlur(A, sqrt(3))
B == C
That is, 1**2 + sqrt(3)**2 = 2**2.
So, at each step in the pyramid, we need to compute how much blurring we’ve already applied, and apply the right amount to get to the necessary level of blurring. Every time we blur, we increase the blur by a given amount, every time we rescale we reduce the blur by a given amount.
If sigma0 is the initial smoothing, and sigma1 is the smoothing applied before downscaling, and downscaling is by a factor k>1, then this relationship:
sqrt(sigma0**2 + sigma1**2) / k == sigma0
will ensure that the downscaled delta image is the same as the original smoothed delta image (up to intensity scaling). We obtain:
sigma1 = sqrt((sigma0 * k)**2 - sigma0**2)
(if I did they right, here on my phone screen).
Since we’re back to an image identical to the original, subsequent pyramid levels will use these same values.
An additional issue I noticed in your code is that you rescale the delta image “to a standard size” before starting to process. Don’t do this, the delta image will no longer be a delta image, and the relationships above will no longer hold. The input must have exactly one pixel set to 1, the rest being 0.

Color Transfer Between Two Image with One Image as Plain Color

I came across this particular color-transfer tutorial using OpenCV:
https://www.pyimagesearch.com/2014/06/30/super-fast-color-transfer-images/
and implemented it like this:
def color_transfer(source, target):
# compute color statistics for the source and target images
source = cv2.cvtColor(source, cv2.COLOR_BGR2LAB).astype("float32")
target = cv2.cvtColor(target, cv2.COLOR_BGR2LAB).astype("float32")
# compute color stats for both images
(lMeanSrc, lStdSrc, aMeanSrc, aStdSrc, bMeanSrc, bStdSrc) = self.image_stats(source)
(lMeanTar, lStdTar, aMeanTar, aStdTar, bMeanTar, bStdTar) = self.image_stats(target)
# split the color space
(l, a, b) = cv2.split(target)
# substract the means from target image
l -= lMeanTar
a -= aMeanTar
b -= bMeanTar
# check values
print(lStdSrc, aStdSrc, bStdSrc)
print(lStdTar, aStdTar, bStdTar)
print(lMeanSrc, aStdSrc, bMeanSrc)
# process lab computation
l = (lStdSrc / lStdTar) * l
a = (aStdSrc / aStdTar) * a
b = (bStdSrc / bStdTar) * b
# add the source mean
l += lMeanSrc
a += aMeanSrc
b += bMeanSrc
# clipping the pixels between 0 and 255
l = np.clip(l, 0, 255)
a = np.clip(a, 0, 255)
b = np.clip(b, 0, 255)
# merge the channels
transfer = cv2.merge([l, a, b])
# converting back to BGR
transfer = cv2.cvtColor(transfer.astype("uint8"), cv2.COLOR_LAB2BGR)
return transfer
In this particular code:
# process lab computation
l = (lStdSrc / lStdTar) * l
a = (aStdSrc / aStdTar) * a
b = (bStdSrc / bStdTar) * b
it gets the standard deviation of the source, so when we combine the source and the target image, it will become a plain image as well since the lab will all be 0.
How can I fix this? It works when the source image is not a plain image with color.

imflatfield MATLAB for Python use

I am trying to find an equivalent Python function for MATLAB imflatfield function.
I have a section of code that modifies an image and I want to convert it to Python.
Here is the MATLAB code:
I = imread('lcs2.png');
out2 = imflatfield(I,30);
shadow_lab = rgb2lab(out2);
max_luminosity = 100;
L = shadow_lab(:,:,1)/max_luminosity;
shadow_adapthisteq = shadow_lab;
shadow_adapthisteq(:,:,1) = adapthisteq(L)*max_luminosity;
shadow_adapthisteq = lab2rgb(shadow_adapthisteq);
imwrite(shadow_adapthisteq,'lcs2_adap.jpg');
Original image
Final results from MATLAB
Since MATLAB releases the source code of imflatfield, it is not so difficult to implement it in Python using OpenCV.
Note: The implementation is specific to uint8 type and colored image (BGR format in Python).
Here is a MATLAB "manual" implementation of imflatfield:
function B = my_imflatfield(I, sigma)
A = im2single(I);
Ihsv = rgb2hsv(A);
A = Ihsv(:,:,3);
filterSize = 2*ceil(2*sigma)+1;
shading = imgaussfilt(A, sigma, 'Padding', 'symmetric', 'FilterSize', filterSize); % Calculate shading
meanVal = mean(A(:),'omitnan');
% Limit minimum to 1e-6 instead of testing using isnan and isinf after division.
shading = max(shading, 1e-6);
B = A*meanVal./shading;
%B(isnan(B)) = 0; % sometimes instances of 0/0 happen, making NaN values.
%B(isinf(B)) = 0; % sometimes values are divided by 0, making Inf values.
% Put processed V channel back into HSV image, convert to RGB
Ihsv(:,:,3) = B;
B = hsv2rgb(Ihsv);
B = im2uint8(B);
end
Here is an equivalent Python implementation (using OpenCV):
import cv2
import numpy as np
def imflatfield(I, sigma):
"""Python equivalent imflatfield implementation
I format must be BGR and type of I must be uint8"""
A = I.astype(np.float32) / 255 # A = im2single(I);
Ihsv = cv2.cvtColor(A, cv2.COLOR_BGR2HSV) # Ihsv = rgb2hsv(A);
A = Ihsv[:, :, 2] # A = Ihsv(:,:,3);
filterSize = int(2*np.ceil(2*sigma) + 1); # filterSize = 2*ceil(2*sigma)+1;
# shading = imgaussfilt(A, sigma, 'Padding', 'symmetric', 'FilterSize', filterSize); % Calculate shading
shading = cv2.GaussianBlur(A, (filterSize, filterSize), sigma, borderType=cv2.BORDER_REFLECT)
meanVal = np.mean(A) # meanVal = mean(A(:),'omitnan')
#% Limit minimum to 1e-6 instead of testing using isnan and isinf after division.
shading = np.maximum(shading, 1e-6) # shading = max(shading, 1e-6);
B = A*meanVal / shading # B = A*meanVal./shading;
#% Put processed V channel back into HSV image, convert to RGB
Ihsv[:, :, 2] = B # Ihsv(:,:,3) = B;
B = cv2.cvtColor(Ihsv, cv2.COLOR_HSV2BGR) # B = hsv2rgb(Ihsv);
B = np.round(np.clip(B*255, 0, 255)).astype(np.uint8) # B = im2uint8(B);
return B
# Read input imgae
I = cv2.imread('destroyer.jpg')
sigma = 30
out2 = imflatfield(I, sigma)
cv2.imwrite('imflatfield_py_destroyer.png', out2)
The above implementation reads the input image, and write the result to image file.
Comparing results using MATLAB (for testing):
I = imread('destroyer.jpg');
out1 = imflatfield(I, 30);
out2 = my_imflatfield(I, 30);
% Compare results of imflatfield and my_imflatfield:
max(max(max(imabsdiff(out1, out2))))
% figure;imshow(out2)
imwrite(out2, 'imflatfield_destroyer.png');
% Read Python result
out3 = imread('imflatfield_py_destroyer.png');
% Compare results of imflatfield and Python imflatfield:
max(max(max(imabsdiff(out1, out3))))
The maximum absolute difference between MATALB imflatfield and my_imflatfield is 0.
The maximum absolute difference between MATALB imflatfield and Python imflatfield is 1.
Converting the complete MATLAB code to Python:
sigma = 30
out2 = imflatfield(I, sigma)
# Conver out2 to float32 before converting to LAB
out2 = out2.astype(np.float32) / 255 # out2 = im2single(out2);
shadow_lab = cv2.cvtColor(out2, cv2.COLOR_BGR2Lab) # shadow_lab = rgb2lab(out2);
max_luminosity = 100
L = shadow_lab[:, :, 0] / max_luminosity # L = shadow_lab(:,:,1)/max_luminosity;
shadow_adapthisteq = shadow_lab.copy() # shadow_adapthisteq = shadow_lab;
# shadow_adapthisteq(:,:,1) = adapthisteq(L)*max_luminosity;
clahe = cv2.createCLAHE(clipLimit=20, tileGridSize=(8,8))
cl1 = clahe.apply((L*(2**16-1)).astype(np.uint16)) # CLAHE in OpenCV does not support float32 (convert to uint16 and back).
shadow_adapthisteq[:, :, 0] = cl1.astype(np.float32) * max_luminosity / (2**16-1)
shadow_adapthisteq = cv2.cvtColor(shadow_adapthisteq, cv2.COLOR_Lab2BGR) # shadow_adapthisteq = lab2rgb(shadow_adapthisteq);
# Convert shadow_adapthisteq to uint8
shadow_adapthisteq = np.round(np.clip(shadow_adapthisteq*255, 0, 255)).astype(np.uint8) # B = im2uint8(B);
cv2.imwrite('shadow_adapthisteq.jpg', shadow_adapthisteq) # imwrite(shadow_adapthisteq,'lcs2_adap.jpg');
Result is not going to be identical to MATLAB, because adapthisteq in MATLAB is not identical to CLAHE in OpenCV.
Result:

Independent component analysis to separate objects from frame

I have 200 frames of (100*100 ) each in a numpy array of (200, 100, 100).
The frames have circles at random positions, mixed with noise. I want to extract the circles from these frames using Independent Component analysis.
I tried working it with sklearn FastICA, it gives random mixed frame instead of separated out circles.
I was expecting each linear combination(lc_test) to give circles (separated out) from each frame but it is giving random frames.
input: output code is giving (incorrect):
import numpy as np
import random
from sklearn.decomposition import FastICA, PCA
#Creating Data
def createCircle(width,height , rad ):
w = random.randint(1, height)
h = random.randint(1, height)
center = [int(w), int(h)]
radius = rad
Y, X = np.ogrid[:height, :width]
dist_from_center = np.sqrt((X - center[0])**2 + (Y-center[1])**2)
mask = dist_from_center <= radius
return mask
def addCircle(test_image):
m = createCircle(width = 100, height = 100 , rad = 8 )
masked_img = test_image.copy()
masked_img[m] = 0
return masked_img
def noise(image):
row,col= image.shape
mean = 0
var = 0.1
sigma = var**0.5
gauss = np.random.normal(mean,sigma,(row,col))
gauss = gauss.reshape(row,col)
noisy = image + gauss #adding gauss noise
s1 = np.sin(8) #adding sin fill
noisy += s1
return noisy
img = np.zeros([100,100],dtype=np.uint8)
img.fill(20)
img_test = img
#Making 200 frames
ims = np.zeros((200, 100, 100)) # initialize your array
for i in range(1,200):
for j in range(1,5):
img_test = addCircle(test_image=img_test)
im1 = noise(img_test)
img_test = img
ims[i, ...] = im1
print(ims.shape) #(200,100,100)
#Apply Independent Component Analysis on ims
ims_flat = ims.reshape(200,10000)
ica_test = FastICA(max_iter = 3000, tol = 0.3)
s_test= ica_test.fit_transform(ims_flat.T) #source matrix
print(s_test.shape) #(10000, 200)
a_test = ica_test.mixing_ #mixing matrix
print(a_test.shape) #(200, 200)
#unmixing matrix : inverse of mixing matrix
w_test = np.linalg.inv(a_test) #unmixing matrix
print(w_test.shape) #(200, 200)
#Taking Linear combination of unmixing matrix and ims_flat(original) to extract Independent components
lc_test = np.linalg.solve(w_test, ims_flat) #Linear combinations
print(lc_test.shape) #(200, 10000)
#plotting Linear combinations
for i in range(1,200): #Does not extract circles
plt.imshow(lc_test[i,:].reshape(100, 100))
plt.show()
The goal is to extract/separate circles from all 200 frames.

Categories