Fourier Transform shift property implementation in pytorch - python

I'm trying to implement the phase shift property of Fourier Transform with pytorch.
What I mean by the shift property is this:
I think that I've got most of the things correctly but somehow get a noisy image.
I'm having a hard time solving this issue. Would it be a numerical issue? Or maybe something due to odd or even pixel numbers? (My images are 1020 x 678 x 3)
These are the shifted image and the original image.
This is my implementation code:
def phase_shifters(y_alpha=0, x_alpha=0, shape=None):
# HxWxC
line = torch.zeros(shape)
# x shift
line_x = torch.linspace(-shape[1]/2,shape[1]/2,shape[1])
line_x = line_x.expand(shape[0], shape[2], shape[1]).transpose(1, 2)
line_x = line_x/shape[1]
line_x = x_alpha * line_x
# y shift
line_y = torch.linspace(-shape[0]/2,shape[0]/2,shape[0])
line_y = line_y.expand(shape[2], shape[1], shape[0]).transpose(0, 2)
line_y = line_y/shape[0]
line_y = y_alpha * line_y
return x_alpha*line_x + y_alpha*line_y
img = cv2.imread("test.png")
img_fft = torch.fft.fft2(img, dim=(0,1))
mag = torch.abs(img_fft)
phase = torch.angle(img_fft)
# alpha means pixel shift amount in spatial domain!
p_shift = phase_shifters(y_alpha=0,x_alpha=50, shape=phase.shape)
phase = (phase+p_shift) % (2*pi) # for wrapping
recon = torch.polar(mag,phase)
recon = torch.fft.ifft2(recon, dim=(0,1)).real
recon = torch.clamp(recon,0,255)
cv2.imshow("recon",np.array(recon, dtype=np.uint8))
cv2.waitKey(0)

Related

How to reverse Fourier Transform on an image with np.fft.ifft2 - Python

I am new to Fourier Transform in Python.
I want to isolate a field on an image thanks to Fourier Transform. Here is my picture :
And here is what I am supposed to obtain :
Here is my code until now :
import numpy as np
import matplotlib.pyplot as plt
import cv2
path = "C:/Users/cecil/OneDrive/Bureau/00_TP_M2TSI/TP Traitement Image/BE1 PDF/BE1 PDF/champs.png"
img = plt.imread(path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) #converting to grayscale
plt.set_cmap("gray")
plt.subplot(131)
plt.imshow(img)
plt.axis("off")
# Calculate the Fourier transform of the grating
ft = np.fft.ifftshift(img)
ft = np.fft.fft2(ft)
ft = np.fft.fftshift(ft)
plt.subplot(132)
plt.imshow(np.log(abs(ft)))
plt.axis("off")
#isolating the band we want to keep (we define 2 affine functions to border the band and keep only the
# values that are inbetween)
for x in range(0,512):
y1 = 0.77*x + 55
y2 = 0.77*x + 65
for y in range(0,512):
if not(y > y1 and y < y2) :
ft[y,x] = 0
# Calculate the inverse Fourier transform of
# the Fourier transform
ift = np.fft.ifftshift(ft)
ift = np.fft.ifft2(ift)
ift = np.fft.fftshift(ift)
ift = ift.real # Take only the real part
plt.subplot(133)
plt.imshow(ift)
plt.axis("off")
plt.show()
My problem is I obtain this image instead of the one I want (see above) :
Can anyone help me please ?
So apparently I fixed the problem. It seems I actually needed to do the shift and fft and ishift and ifft in segragated lines like this :
# Calculate the Fourier transform of the grating
spectre = fft2(imgris)
spectre = fftshift(spectre)
plt.imshow(np.log(abs(spectre)))
plt.show()
bande = np.zeros((512,512), dtype=complex)
for x in range(0,512):
y1 = 0.77*x + 40
y2 = 0.77*x + 80
for y in range(0,512):
if y > y1 and y < y2 :
bande[y,x] = spectre[y,x]
if x > (512/2-20) and x < (512/2+20):
bande[y,x] = 0
plt.imshow(abs(bande))
plt.show()
real = bande.real
phases = bande.imag
bande_mod = np.empty(real.shape, dtype=complex)
bande_mod.real = real
bande_mod.imag = phases
champisolé= ifftshift(bande_mod)
champisolé= np.abs(ifft2(champisolé))
plt.imshow(champisolé)
plt.show()
The obtained image (first picture) is kinda blurred and there is quite a lot of unwanted pixels around the field I wanted to isolate but with a few filters I obtain a perfectly isolated field (even better than the teacher's !) (second picture).

What are the inaccuracies of this 'inverse map' function in OpenCV?

I am trying to horizontally stretch an image in a very specific way. Each x prime coordinate should follow a tangent path with respect to the original x coordinate. I believe there are two ways to do this:
Inverse the tangent function and map it normally
Map the tangent function and then inverse the mapping
Using this answer for map inversion, Im trying to figure out why the two images are not the same. I know that the first method gives me the correct image that I'm looking for, so why doesnt the second method work? Is it because of the "limited precision" that #ChristophRackwitz commented on the answer?
import cv2
import glob
import numpy as np
import math
A = -1010
B = -3.931
C = 5.258
D = 978.3
M = -193.8
N = 1740
def get_tan_func_value(x):
return A * math.tan((((x-N)/M)+B)/C) + D
def get_inverse_tan_func_value(x):
return M * (C*math.atan((x-D)/A) - B) + N
# answer from linked post
def invert_map(F, shape):
I = np.zeros_like(F)
I[:,:,1], I[:,:,0] = np.indices(shape)
P = np.copy(I)
for i in range(10):
P += I - cv2.remap(F, P, None, interpolation=cv2.INTER_LINEAR)
return P
# import image
images = glob.glob('*.jpg')
img = cv2.imread(images[0])
h, w = img.shape[:2]
map_x_tan = np.zeros((img.shape[0], img.shape[1]), dtype=np.float32)
map_x_inverse_tan = np.zeros((img.shape[0], img.shape[1]), dtype=np.float32)
map_y = np.zeros((img.shape[0], img.shape[1]), dtype=np.float32)
# x tan function map
for i in range(map_x_tan.shape[0]):
map_x_tan[i,:] = [get_tan_func_value(x) for x in range(map_x_tan.shape[1])]
# x inverse tan function map
for i in range(map_x_inverse_tan.shape[0]):
map_x_inverse_tan[i,:] = [get_inverse_tan_func_value(x) for x in range(map_x_inverse_tan.shape[1])]
# default y map
for j in range(map_y.shape[1]):
map_y[:,j] = [y for y in range(map_y.shape[0])]
# convert x tan map to 2 channel (x,y) map
(xymap_tan, _) = cv2.convertMaps(map1=map_x_tan, map2=map_y, dstmap1type=cv2.CV_32FC2)
# invert the 2 channel x tan map
xymap_inverted = invert_map(xymap_tan, (h,w))
# remap and write the target image (inverse tan function with normal map)
target = cv2.remap(img, map_x_inverse_tan, map_y, cv2.INTER_LINEAR)
cv2.imwrite("target.jpg", target)
# remap and write the attempted image (normal tan function with inverted map)
attempt = cv2.remap(img, xymap_inverted, None, cv2.INTER_LINEAR)
cv2.imwrite("attempt.jpg", attempt)
Method 1: Target Image
Method 2: Attempt Image
The results show that the attempt (normal tan function with inverted map) has less stretching near the edges of the image than expected. Almost everywhere else on the images are identical except the edges. I did not post the original picture to save space.
I've played around with that invert_map procedure. It seems slightly susceptible to oscillation.
use this instead:
def invert_map(F):
(h, w) = F.shape[:2] # (h, w, 2), "xymap"
I = np.zeros_like(F)
I[:,:,1], I[:,:,0] = np.indices((h,w)) # identity map
P = np.copy(I)
for i in range(10):
correction = I - cv2.remap(F, P, None, interpolation=cv2.INTER_LINEAR)
P += correction * 0.5
return P
I simply damped the correction by 0.5, which makes the fixed point iteration tamer, converging a lot faster too.
In my experiments with your tan map, I've found that 5-10 iterations are good enough already, and there's no further progress in further iterations.
Entire notebook of my explorations: https://gist.github.com/crackwitz/67f76f8a9eff21476b080c06d20660d0
Feature request: https://github.com/opencv/opencv/issues/22120

Padding scipy affine_transform output to show non-overlapping regions of transformed images

I have source (src) image(s) I wish to align to a destination (dst) image using an Affine Transformation whilst retaining the full extent of both images during alignment (even the non-overlapping areas).
I am already able to calculate the Affine Transformation rotation and offset matrix, which I feed to scipy.ndimage.interpolate.affine_transform to recover the dst-aligned src image.
The problem is that, when the images are not fuly overlapping, the resultant image is cropped to only the common footprint of the two images. What I need is the full extent of both images, placed on the same pixel coordinate system. This question is almost a duplicate of this one - and the excellent answer and repository there provides this functionality for OpenCV transformations. I unfortunately need this for scipy's implementation.
Much too late, after repeatedly hitting a brick wall trying to translate the above question's answer to scipy, I came across this issue and subsequently followed to this question. The latter question did give some insight into the wonderful world of scipy's affine transformation, but I have as yet been unable to crack my particular needs.
The transformations from src to dst can have translations and rotation. I can get translations only working (an example is shown below) and I can get rotations only working (largely hacking around the below and taking inspiration from the use of the reshape argument in scipy.ndimage.interpolation.rotate). However, I am getting thoroughly lost combining the two. I have tried to calculate what should be the correct offset (see this question's answers again), but I can't get it working in all scenarios.
Translation-only working example of padded affine transformation, which follows largely this repo, explained in this answer:
from scipy.ndimage import rotate, affine_transform
import numpy as np
import matplotlib.pyplot as plt
nblob = 50
shape = (200, 100)
buffered_shape = (300, 200) # buffer for rotation and translation
def affine_test(angle=0, translate=(0, 0)):
np.random.seed(42)
# Maxiumum translation allowed is half difference between shape and buffered_shape
# Generate a buffered_shape-sized base image with random blobs
base = np.zeros(buffered_shape, dtype=np.float32)
random_locs = np.random.choice(np.arange(2, buffered_shape[0] - 2), nblob * 2, replace=False)
i = random_locs[:nblob]
j = random_locs[nblob:]
for k, (_i, _j) in enumerate(zip(i, j)):
# Use different values, just to make it easier to distinguish blobs
base[_i - 2 : _i + 2, _j - 2 : _j + 2] = k + 10
# Impose a rotation and translation on source
src = rotate(base, angle, reshape=False, order=1, mode="constant")
bsc = (np.array(buffered_shape) / 2).astype(int)
sc = (np.array(shape) / 2).astype(int)
src = src[
bsc[0] - sc[0] + translate[0] : bsc[0] + sc[0] + translate[0],
bsc[1] - sc[1] + translate[1] : bsc[1] + sc[1] + translate[1],
]
# Cut-out destination from the centre of the base image
dst = base[bsc[0] - sc[0] : bsc[0] + sc[0], bsc[1] - sc[1] : bsc[1] + sc[1]]
src_y, src_x = src.shape
def get_matrix_offset(centre, angle, scale):
"""Follows OpenCV.getRotationMatrix2D"""
angle = angle * np.pi / 180
alpha = scale * np.cos(angle)
beta = scale * np.sin(angle)
return (
np.array([[alpha, beta], [-beta, alpha]]),
np.array(
[
(1 - alpha) * centre[0] - beta * centre[1],
beta * centre[0] + (1 - alpha) * centre[1],
]
),
)
# Obtain the rotation matrix and offset that describes the transformation
# between src and dst
matrix, offset = get_matrix_offset(np.array([src_y / 2, src_x / 2]), angle, 1)
offset = offset - translate
# Determine the outer bounds of the new image
lin_pts = np.array([[0, src_x, src_x, 0], [0, 0, src_y, src_y]])
transf_lin_pts = np.dot(matrix.T, lin_pts) - offset[::-1].reshape(2, 1)
# Find min and max bounds of the transformed image
min_x = np.floor(np.min(transf_lin_pts[0])).astype(int)
min_y = np.floor(np.min(transf_lin_pts[1])).astype(int)
max_x = np.ceil(np.max(transf_lin_pts[0])).astype(int)
max_y = np.ceil(np.max(transf_lin_pts[1])).astype(int)
# Add translation to the transformation matrix to shift to positive values
anchor_x, anchor_y = 0, 0
if min_x < 0:
anchor_x = -min_x
if min_y < 0:
anchor_y = -min_y
shifted_offset = offset - np.dot(matrix, [anchor_y, anchor_x])
# Create padded destination image
dst_h, dst_w = dst.shape[:2]
pad_widths = [anchor_y, max(max_y, dst_h) - dst_h, anchor_x, max(max_x, dst_w) - dst_w]
dst_padded = np.pad(
dst,
((pad_widths[0], pad_widths[1]), (pad_widths[2], pad_widths[3])),
"constant",
constant_values=-1,
)
dst_pad_h, dst_pad_w = dst_padded.shape
# Create the aligned and padded source image
source_aligned = affine_transform(
src,
matrix.T,
offset=shifted_offset,
output_shape=(dst_pad_h, dst_pad_w),
order=3,
mode="constant",
cval=-1,
)
# Plot the images
fig, axes = plt.subplots(1, 4, figsize=(10, 5), sharex=True, sharey=True)
axes[0].imshow(src, cmap="viridis", vmin=-1, vmax=nblob)
axes[0].set_title("Source")
axes[1].imshow(dst, cmap="viridis", vmin=-1, vmax=nblob)
axes[1].set_title("Dest")
axes[2].imshow(source_aligned, cmap="viridis", vmin=-1, vmax=nblob)
axes[2].set_title("Source aligned to Dest padded")
axes[3].imshow(dst_padded, cmap="viridis", vmin=-1, vmax=nblob)
axes[3].set_title("Dest padded")
plt.show()
e.g.:
affine_test(0, (-20, 40))
gives:
With a zoom in showing the aligned in the padded images:
I require the full extent of the src and dst images aligned on the same pixel coordinates, with both rotations and translations.
Any help is greatly appreciated!
Complexity analysis
The problem is to determine three parameters
Let's suppose that you have a grid for angle, x and y displacements, each with size O(n) and that your images are of size O(n x n) so, rotation, translation, and comparison of the images all take O(n^2), since you have O(n^3) candidate transforms to try, you end up with complexity O(n^5), and probably that's why you are asking the question.
However the part of the displacement can be computed slightly more efficiently by computing maximum correlation using Fourier transforms. The Fourier transforms can be performed with complexity O(n log n) each axis, and we have to perform them to the two spatial dimensions, the complete correlation matrix can be computed in O(n^2 log^2 n), then we find the maximum with complexity O(n^2), so the overall time complexity of determining the best alignment is O(n^2 log^2 n). However you still want to search for the best angle, since we have O(n) candidate angles the overall complexity of this search will be O(n^3 log^2 n). Remember we are using python and we may have some significant overhead, so this complexity only gives us an idea of how difficult it will be, and I have handled problems like this before so I start confident.
Preparing some example
I will start by downloading an image and applying rotation and centering the image padding with zeros.
def centralized(a, width, height):
'''
Image centralized to the given width and height
by padding with zeros (black)
'''
assert width >= a.shape[0] and height >= a.shape[1]
ap = np.zeros((width, height) + a.shape[2:], a.dtype)
ccx = (width - a.shape[0])//2
ccy = (height - a.shape[1])//2
ap[ccx:ccx+a.shape[0], ccy:ccy+a.shape[1], ...] = a
return ap
def image_pair(im, width, height, displacement=(0,0), angle=0):
'''
this build an a pair of images as numpy arrays
from the input image.
Both images will be padded with zeros (black)
and roughly centralized.
and will have the specified shape
make sure that the width and height chosen are enough
to fit the rotated image
'''
a = np.array(im)
a1 = centralized(a, width, height)
a2 = centralized(ndimage.rotate(a, angle), width, height)
a2 = np.roll(a2, displacement, axis=(0,1))
return a1, a2
def random_transform():
angle = np.random.rand() * 360
displacement = np.random.randint(-100, 100, 2)
return displacement, angle
a1, a2 = image_pair(im, 512, 512, *random_transform())
plt.subplot(121)
plt.imshow(a1)
plt.subplot(122)
plt.imshow(a2)
The displacement search
The first thing is to compute the correlation of the image
def compute_correlation(a1, a2):
A1 = np.fft.rfftn(a1, axes=(0,1))
A2 = np.fft.rfftn(a2, axes=(0,1))
C = np.fft.irfftn(np.sum(A1 * np.conj(A2), axis=2))
return C
Then, let's create an example without rotation and confirm that the with the index of the maximum correlation we can find the displacement that fit one image to the other.
displacement, _ = random_transform()
a1, a2 = image_pair(im, 521, 512, displacement, angle=0)
C = compute_correlation(a1, a2)
np.unravel_index(np.argmax(C), C.shape), displacement
a3 = np.roll(a2, np.unravel_index(np.argmax(C), C.shape), axis=(0,1))
assert np.all(a3 == a1)
With rotation or interpolation this result may not be exact but it gives the displacement that will give us the closest possible alignment.
Let's put this in a function for future use
def get_aligned(a1, a2, angle):
a1_rotated = ndimage.rotate(a1, angle, reshape=False)
C = compute_correlation(a2, a1_rotated)
found_displacement = np.unravel_index(np.argmax(C), C.shape)
a1_aligned = np.roll(a1_rotated, found_displacement, axis=(0,1))
return a1_aligned
Searching for the angle
Now we can do something in two steps,
in one we compute the correlation for each angle, then with the angle that gives maximum correlation find the alignment.
displacement, angle = random_transform()
a1, a2 = image_pair(im, 521, 512, displacement, angle)
C_max = []
C_argmax = []
angle_guesses = np.arange(0, 360, 5)
for angle_guess in angle_guesses:
a1_rotated = ndimage.rotate(a1, angle_guess, reshape=False)
C = compute_correlation(a1_rotated, a2)
i = np.argmax(C)
v = C.reshape(-1)[i]
C_max.append(v)
C_argmax.append(i)
Let's see how the correlation looks like
plt.plot(angle_guesses, C_max);
We have a clear winner looking at this curve, even if a sunflower has some sort of rotation symmetry.
Let's apply the transformation to the original image and see how it looks like
a1_aligned = get_aligned(a1, a2, angle_guesses[np.argmax(C_max)])
plt.subplot(121)
plt.imshow(a2)
plt.subplot(122)
plt.imshow(a1_aligned)
Great, I wouldn't have done better than this manually.
I am using a sunflower image for beauty reasons, but the procedure is the same for any type of image. I use RGB showing that the image may have one additional dimension, i.e. it uses a feature vector, instead of the scalar feature, you can use reshape your data to (width, height, 1) if your feature is a scalar.
Working code below in case anyone else has this need of scipy's affine transformations:
def affine_test(angle=0, translate=(0, 0), shape=(200, 100), buffered_shape=(300, 200), nblob=50):
# Maxiumum translation allowed is half difference between shape and buffered_shape
np.random.seed(42)
# Generate a buffered_shape-sized base image
base = np.zeros(buffered_shape, dtype=np.float32)
random_locs = np.random.choice(np.arange(2, buffered_shape[0] - 2), nblob * 2, replace=False)
i = random_locs[:nblob]
j = random_locs[nblob:]
for k, (_i, _j) in enumerate(zip(i, j)):
base[_i - 2 : _i + 2, _j - 2 : _j + 2] = k + 10
# Impose a rotation and translation on source
src = rotate(base, angle, reshape=False, order=1, mode="constant")
bsc = (np.array(buffered_shape) / 2).astype(int)
sc = (np.array(shape) / 2).astype(int)
src = src[
bsc[0] - sc[0] + translate[0] : bsc[0] + sc[0] + translate[0],
bsc[1] - sc[1] + translate[1] : bsc[1] + sc[1] + translate[1],
]
# Cut-out destination from the centre of the base image
dst = base[bsc[0] - sc[0] : bsc[0] + sc[0], bsc[1] - sc[1] : bsc[1] + sc[1]]
src_y, src_x = src.shape
def get_matrix_offset(centre, angle, scale):
"""Follows OpenCV.getRotationMatrix2D"""
angle_rad = angle * np.pi / 180
alpha = np.round(scale * np.cos(angle_rad), 8)
beta = np.round(scale * np.sin(angle_rad), 8)
return (
np.array([[alpha, beta], [-beta, alpha]]),
np.array(
[
(1 - alpha) * centre[0] - beta * centre[1],
beta * centre[0] + (1 - alpha) * centre[1],
]
),
)
matrix, offset = get_matrix_offset(np.array([((src_y - 1) / 2) - translate[0], ((src_x - 1) / 2) - translate[
1]]), angle, 1)
offset += np.array(translate)
M = np.column_stack((matrix, offset))
M = np.vstack((M, [0, 0, 1]))
iM = np.linalg.inv(M)
imatrix = iM[:2, :2]
ioffset = iM[:2, 2]
# Determine the outer bounds of the new image
lin_pts = np.array([[0, src_y-1, src_y-1, 0], [0, 0, src_x-1, src_x-1]])
transf_lin_pts = np.dot(matrix, lin_pts) + offset.reshape(2, 1) # - np.array(translate).reshape(2, 1) # both?
# Find min and max bounds of the transformed image
min_x = np.floor(np.min(transf_lin_pts[1])).astype(int)
min_y = np.floor(np.min(transf_lin_pts[0])).astype(int)
max_x = np.ceil(np.max(transf_lin_pts[1])).astype(int)
max_y = np.ceil(np.max(transf_lin_pts[0])).astype(int)
# Add translation to the transformation matrix to shift to positive values
anchor_x, anchor_y = 0, 0
if min_x < 0:
anchor_x = -min_x
if min_y < 0:
anchor_y = -min_y
dot_anchor = np.dot(imatrix, [anchor_y, anchor_x])
shifted_offset = ioffset - dot_anchor
# Create padded destination image
dst_y, dst_x = dst.shape[:2]
pad_widths = [anchor_y, max(max_y, dst_y) - dst_y, anchor_x, max(max_x, dst_x) - dst_x]
dst_padded = np.pad(
dst,
((pad_widths[0], pad_widths[1]), (pad_widths[2], pad_widths[3])),
"constant",
constant_values=-10,
)
dst_pad_y, dst_pad_x = dst_padded.shape
# Create the aligned and padded source image
source_aligned = affine_transform(
src,
imatrix,
offset=shifted_offset,
output_shape=(dst_pad_y, dst_pad_x),
order=3,
mode="constant",
cval=-10,
)
E.g. running:
affine_test(angle=-25, translate=(10, -40))
will show:
and zoomed in:
Apologies the code is not nicely written as is.
Note that running this in the wild I notice it cannot handle any change in scale size of the images, but I am not certain it isn't something to do with how I calculate the transformation - so a caveat worth noting, and checking out, if you are aligning images with different scales.
If you have two images that are similar (or the same) and you want to align them, you can do it using both functions rotate and shift :
from scipy.ndimage import rotate, shift
You need to find first the difference of angle between the two images angle_to_rotate, having that you apply a rotation to src:
angle_to_rotate = 25
rotated_src = rotate(src, angle_to_rotate , reshape=True, order=1, mode="constant")
With reshape=True you avoid losing information from your original src matrix, and it pads the result so the image could be translated around the 0,0 indexes. You can calculate this translation as it is (x*cos(angle),y*sin(angle) where x and y are the dimensions of the image, but it probably won't matter.
Now you will need to translate the image to the source, for doing that you can use the shift function:
rot_translated_src = shift(rotated_src , [distance_x, distance_y])
In this case there is no reshape (because otherwise you wouldn't have any real translation) so if the image was not previously padded some information will be lost.
But you can do some padding with
np.pad(src, number, mode='constant')
To calculate distance_x and distance_y you will need to find a point that serves you as a reference between the rotated_src and the destination, then just calculate the distance in the x and y axis.
Summary
Make some padding in src, and dst
Find the angular distance between them.
Rotate src with scipy.ndimage.rotate using reshape=True
Find the horizontal and vertical distance distance_x, distance_y between the rotated image and dst
Translate your 'rotated_src' with scipy.ndimage.shift
Code
from scipy.ndimage import rotate, shift
import matplotlib.pyplot as plt
import numpy as np
First we make the destination image:
# make and plot dest
dst = np.ones([40,20])
dst = np.pad(dst,10)
dst[17,[14,24]]=4
dst[27,14:25]=4
dst[26,[14,25]]=4
rotated_dst = rotate(dst, 20, order=1)
plt.imshow(dst) # plot it
plt.imshow(rotated_dst)
plt.show()
We make the Source image:
# make_src image and plot it
src = np.zeros([40,20])
src = np.pad(src,10)
src[0:20,0:20]=1
src[7,[4,14]]=4
src[17,4:15]=4
src[16,[4,15]]=4
plt.imshow(src)
plt.show()
Then we align the src to the destination:
rotated_src = rotate(src, 20, order=1) # find the angle 20, reshape true is by default
plt.imshow(rotated_src)
plt.show()
distance_y = 8 # find this distances from rotated_src and dst
distance_x = 12 # use any visual reference or even the corners
translated_src = shift(rotated_src, [distance_y,distance_x])
plt.imshow(translated_src)
plt.show()
pd: If you find problems to find the angle and the distances in a programmatic way, please leave a comment providing a bit more of insight of what can be used as a reference that could be for example the frame of the image or some image features / data)

How to demonstrate that the impulse response of the Gaussian Pyramid is Scale Invariant?

I built a Gaussian Pyramid from a 512x512 image with one Dirac pulse at the centre(256,256), then tried to follow the following procedure to prove that this pyramid is scale-invariant, and it has the same impulse response at each level, but the results doesn't seem to be very correct!
Can you please advise me how to do it?
Edit:
I edited the code to fix some bugs, thanks to #CrisLuengo for his notes.
Code:
import numpy as np
import matplotlib.pyplot as plt
import cv2
import skimage.exposure as exposure
from math import sqrt, ceil
#=================
# Resize Function
#=================
def _resize(image, downscale=2, step=0.5, minSize=(7, 7)):
if(image.shape > minSize ):
# newSize = (image.shape[0]// downscale, image.shape[1]//downscale)
# newImage = cv2.resize(image, dsize=newSize, fx=step, fy=step)
newImage = cv2.resize(image, None, fx=step, fy=step)
return newImage
else:
return 0
#--------------------------------------------------------------
#===========================
# Gaussian Pyramid Function
#===========================
def pyramid(image, sigma_0=1):
'''
Function to create a Gaussian pyramid from an image for given standard deviation sigma_0
Parameters:
-----------
#param: image: nd-array.
The original image.
#param: sigma_0: float.
standard deviation of the Gaussian distribution.
returns:
List of images with different scales, the pyramid
'''
# Resize All input images into a standard size
image = cv2.resize(image,(512,512))
# level 0
if ceil(6*sigma_0)%2 ==0 :
Gimage = cv2.GaussianBlur(image, (ceil(6*sigma_0)+1, ceil(6*sigma_0)+1), sigmaX=sigma_0, sigmaY=sigma_0)
else:
Gimage = cv2.GaussianBlur(image, (ceil(6*sigma_0)+2, ceil(6*sigma_0)+2), sigmaX=sigma_0, sigmaY=sigma_0)
# sigma_k
sigma_k = 4*sigma_0
# sigma_k = sqrt(2)*sigma_0
# Pyramid as list
GaussPyr = [Gimage]
# Loop of other levels of the pyramid
for k in range(1,6):
if ceil(6*sigma_k)%2 ==0 :
# smoothed = cv2.GaussianBlur(GaussPyr[k-1], (ceil(6*sigma_k)+1, ceil(6*sigma_k)+1), sigmaX=sigma_k, sigmaY=sigma_0)
smoothed = cv2.GaussianBlur(GaussPyr[k-1], (ceil(6*sigma_k)+1, ceil(6*sigma_k)+1), sigmaX=sigma_k, sigmaY=sigma_k)
else:
# smoothed = cv2.GaussianBlur(GaussPyr[k-1], (ceil(6*sigma_k)+2, ceil(6*sigma_k)+2), sigmaX=sigma_k, sigmaY=sigma_0)
smoothed = cv2.GaussianBlur(GaussPyr[k-1], (ceil(6*sigma_k)+2, ceil(6*sigma_k)+2), sigmaX=sigma_k, sigmaY=sigma_k)
# Downscaled Image
resized = _resize(smoothed ) # ,step=0.25*sigma_k
GaussPyr.append(resized)
return GaussPyr
#====================
# Impulse Response
#====================
# Zeros 512x512 Black Image
delta = np.zeros((512, 512), dtype=np.float32)
# Dirac
delta[255,255] = 255
# sigmas
sigma1 = 1
sigma2 = sqrt(2)
# Pyramids
deltaPyramid1 = pyramid(delta, sigma_0=sigma1)
deltaPyramid2 = pyramid(delta, sigma_0=sigma2)
# Impulse Response for each level
ImpResp1 = np.zeros((len(deltaPyramid1), 13),dtype=float)
ImpResp2 = np.zeros((len(deltaPyramid2), 13),dtype=float)
# sigma = 1
for idx, level in enumerate(deltaPyramid1):
# # 1
# level = cv2.resize(level, (512, 512))# , interpolation=cv2.INTER_AREA
# ImpResp1[idx,:] = exposure.rescale_intensity(level[255, 249:262], in_range='image', out_range=(0,255)).astype(np.uint8)
# ImpResp1[idx,:] = level[255, 249:262]
# # 2
centery = level.shape[0]//2
centerx = level.shape[1]//2
ImpResp1[idx,:] = exposure.rescale_intensity(level[centery, (centerx-7):(centerx+6)], out_range=(0,255), in_range='image').astype(np.uint8)
# ImpResp1[idx,:] = level[centery, (centerx-7):(centerx+6)]
# sigma = sqrt(2)
for idx, level in enumerate(deltaPyramid2):
# # 1
# level = cv2.resize(level, (512, 512))# , interpolation=cv2.INTER_AREA
# ImpResp2[idx,:] = exposure.rescale_intensity(level[255, 249:262], in_range='image', out_range=(0,255)).astype(np.uint8)
# ImpResp2[idx,:] = level[255, 249:262]
# # 2
centery = level.shape[0]//2
centerx = level.shape[1]//2
ImpResp2[idx,:] = exposure.rescale_intensity(level[centery, (centerx-7):(centerx+6)], out_range=(0,255), in_range='image').astype(np.uint8)
# ImpResp2[idx,:] = level[centery, (centerx-7):(centerx+6)]
#====================
# Visualize Results
#====================
labels = []
for c in range(13):
label = 'C{}'.format(c+1)
labels.append(label)
x = np.arange(len(labels)) # the label locations
width = 0.1 # the width of the bars
fig, ax = plt.subplots()
rects1 = []
for k in range(ImpResp1.shape[0]):
rects1.append(ax.bar(x - 2*k*width, ImpResp1[k], width, label='K{}'.format(k)))
# Add some text for labels, title and custom x-axis tick labels, etc.
ax.set_ylabel('values')
ax.set_title('sigma0=1')
ax.set_xticks(x)
ax.set_xticklabels(labels)
ax.legend()
fig.tight_layout()
fig2, ax2 = plt.subplots()
rects2 = []
for k in range(ImpResp1.shape[0]):
rects2.append(ax2.bar(x + 2*k*width, ImpResp2[k], width, label='K{}'.format(k)))
# Add some text for labels, title and custom x-axis tick labels, etc.
ax2.set_ylabel('values')
ax2.set_title('sigma0=sqrt(2)')
ax2.set_xticks(x)
ax2.set_xticklabels(labels)
ax2.legend()
fig2.tight_layout()
plt.show()
First, let’s simplify to a situation that is simple enough to see the scaling property of the Gaussian. Convolving a delta image with a Gaussian yields that Gaussian. A Gaussian B twice the size of a Gaussian A, and then scaled spatially by half, is identical to A (up to intensity scaling of course, B is 1/4 as high as A in 2D).
delta = <all zeros except one pixel in the middle>
A = GaussianBlur(delta, 1)
B = GaussianBlur(delta, 2)
B = resize(B, 1/2)
A == B * 2**2
C = GaussianBlur(delta, sigma=7.489)
C = resize(C, 1/7.489)
A == C * 7.489**2
Now, if we’re chaining the blur operations, we obtain a stronger blur. The square of the output sigma is equal to the sum of squares of the sigmas applied:
A = GaussianBlur(delta, 1)
B = GaussianBlur(delta, 2)
C = GaussianBlur(A, sqrt(3))
B == C
That is, 1**2 + sqrt(3)**2 = 2**2.
So, at each step in the pyramid, we need to compute how much blurring we’ve already applied, and apply the right amount to get to the necessary level of blurring. Every time we blur, we increase the blur by a given amount, every time we rescale we reduce the blur by a given amount.
If sigma0 is the initial smoothing, and sigma1 is the smoothing applied before downscaling, and downscaling is by a factor k>1, then this relationship:
sqrt(sigma0**2 + sigma1**2) / k == sigma0
will ensure that the downscaled delta image is the same as the original smoothed delta image (up to intensity scaling). We obtain:
sigma1 = sqrt((sigma0 * k)**2 - sigma0**2)
(if I did they right, here on my phone screen).
Since we’re back to an image identical to the original, subsequent pyramid levels will use these same values.
An additional issue I noticed in your code is that you rescale the delta image “to a standard size” before starting to process. Don’t do this, the delta image will no longer be a delta image, and the relationships above will no longer hold. The input must have exactly one pixel set to 1, the rest being 0.

Deskew MNIST images

I found on https://fsix.github.io/mnist/Deskewing.html how to deskew the images of the MNIST dataset. It seems to work. My problem is that before deskewing each pixel has a value between 0 and 1. But after deskewing the image the values are not between 0 and 1 any more. They can be negative and can be greater than 1. How can this be fixed?
Here is the code:
def moments(image):
c0,c1 = np.mgrid[:image.shape[0],:image.shape[1]] # A trick in numPy to create a mesh grid
totalImage = np.sum(image) #sum of pixels
m0 = np.sum(c0*image)/totalImage #mu_x
m1 = np.sum(c1*image)/totalImage #mu_y
m00 = np.sum((c0-m0)**2*image)/totalImage #var(x)
m11 = np.sum((c1-m1)**2*image)/totalImage #var(y)
m01 = np.sum((c0-m0)*(c1-m1)*image)/totalImage #covariance(x,y)
mu_vector = np.array([m0,m1]) # Notice that these are \mu_x, \mu_y respectively
covariance_matrix = np.array([[m00,m01],[m01,m11]]) # Do you see a similarity between the covariance matrix
return mu_vector, covariance_matrix
def deskew(image):
c,v = moments(image)
alpha = v[0,1]/v[0,0]
affine = np.array([[1,0],[alpha,1]])
ocenter = np.array(image.shape)/2.0
offset = c-np.dot(affine,ocenter)
return interpolation.affine_transform(image,affine,offset=offset)
You can just normalize the image to a range between 0 and 1 after the skewing process.
img = deskew(img)
img = (img - img.min()) / (img.max() - img.min())
See this question.
To incorporate this in the deskew function, you could rewrite it like this:
def deskew(image):
c,v = moments(image)
alpha = v[0,1]/v[0,0]
affine = np.array([[1,0],[alpha,1]])
ocenter = np.array(image.shape)/2.0
offset = c-np.dot(affine,ocenter)
img = interpolation.affine_transform(image,affine,offset=offset)
return (img - img.min()) / (img.max() - img.min())

Categories