Related
I have source (src) image(s) I wish to align to a destination (dst) image using an Affine Transformation whilst retaining the full extent of both images during alignment (even the non-overlapping areas).
I am already able to calculate the Affine Transformation rotation and offset matrix, which I feed to scipy.ndimage.interpolate.affine_transform to recover the dst-aligned src image.
The problem is that, when the images are not fuly overlapping, the resultant image is cropped to only the common footprint of the two images. What I need is the full extent of both images, placed on the same pixel coordinate system. This question is almost a duplicate of this one - and the excellent answer and repository there provides this functionality for OpenCV transformations. I unfortunately need this for scipy's implementation.
Much too late, after repeatedly hitting a brick wall trying to translate the above question's answer to scipy, I came across this issue and subsequently followed to this question. The latter question did give some insight into the wonderful world of scipy's affine transformation, but I have as yet been unable to crack my particular needs.
The transformations from src to dst can have translations and rotation. I can get translations only working (an example is shown below) and I can get rotations only working (largely hacking around the below and taking inspiration from the use of the reshape argument in scipy.ndimage.interpolation.rotate). However, I am getting thoroughly lost combining the two. I have tried to calculate what should be the correct offset (see this question's answers again), but I can't get it working in all scenarios.
Translation-only working example of padded affine transformation, which follows largely this repo, explained in this answer:
from scipy.ndimage import rotate, affine_transform
import numpy as np
import matplotlib.pyplot as plt
nblob = 50
shape = (200, 100)
buffered_shape = (300, 200) # buffer for rotation and translation
def affine_test(angle=0, translate=(0, 0)):
np.random.seed(42)
# Maxiumum translation allowed is half difference between shape and buffered_shape
# Generate a buffered_shape-sized base image with random blobs
base = np.zeros(buffered_shape, dtype=np.float32)
random_locs = np.random.choice(np.arange(2, buffered_shape[0] - 2), nblob * 2, replace=False)
i = random_locs[:nblob]
j = random_locs[nblob:]
for k, (_i, _j) in enumerate(zip(i, j)):
# Use different values, just to make it easier to distinguish blobs
base[_i - 2 : _i + 2, _j - 2 : _j + 2] = k + 10
# Impose a rotation and translation on source
src = rotate(base, angle, reshape=False, order=1, mode="constant")
bsc = (np.array(buffered_shape) / 2).astype(int)
sc = (np.array(shape) / 2).astype(int)
src = src[
bsc[0] - sc[0] + translate[0] : bsc[0] + sc[0] + translate[0],
bsc[1] - sc[1] + translate[1] : bsc[1] + sc[1] + translate[1],
]
# Cut-out destination from the centre of the base image
dst = base[bsc[0] - sc[0] : bsc[0] + sc[0], bsc[1] - sc[1] : bsc[1] + sc[1]]
src_y, src_x = src.shape
def get_matrix_offset(centre, angle, scale):
"""Follows OpenCV.getRotationMatrix2D"""
angle = angle * np.pi / 180
alpha = scale * np.cos(angle)
beta = scale * np.sin(angle)
return (
np.array([[alpha, beta], [-beta, alpha]]),
np.array(
[
(1 - alpha) * centre[0] - beta * centre[1],
beta * centre[0] + (1 - alpha) * centre[1],
]
),
)
# Obtain the rotation matrix and offset that describes the transformation
# between src and dst
matrix, offset = get_matrix_offset(np.array([src_y / 2, src_x / 2]), angle, 1)
offset = offset - translate
# Determine the outer bounds of the new image
lin_pts = np.array([[0, src_x, src_x, 0], [0, 0, src_y, src_y]])
transf_lin_pts = np.dot(matrix.T, lin_pts) - offset[::-1].reshape(2, 1)
# Find min and max bounds of the transformed image
min_x = np.floor(np.min(transf_lin_pts[0])).astype(int)
min_y = np.floor(np.min(transf_lin_pts[1])).astype(int)
max_x = np.ceil(np.max(transf_lin_pts[0])).astype(int)
max_y = np.ceil(np.max(transf_lin_pts[1])).astype(int)
# Add translation to the transformation matrix to shift to positive values
anchor_x, anchor_y = 0, 0
if min_x < 0:
anchor_x = -min_x
if min_y < 0:
anchor_y = -min_y
shifted_offset = offset - np.dot(matrix, [anchor_y, anchor_x])
# Create padded destination image
dst_h, dst_w = dst.shape[:2]
pad_widths = [anchor_y, max(max_y, dst_h) - dst_h, anchor_x, max(max_x, dst_w) - dst_w]
dst_padded = np.pad(
dst,
((pad_widths[0], pad_widths[1]), (pad_widths[2], pad_widths[3])),
"constant",
constant_values=-1,
)
dst_pad_h, dst_pad_w = dst_padded.shape
# Create the aligned and padded source image
source_aligned = affine_transform(
src,
matrix.T,
offset=shifted_offset,
output_shape=(dst_pad_h, dst_pad_w),
order=3,
mode="constant",
cval=-1,
)
# Plot the images
fig, axes = plt.subplots(1, 4, figsize=(10, 5), sharex=True, sharey=True)
axes[0].imshow(src, cmap="viridis", vmin=-1, vmax=nblob)
axes[0].set_title("Source")
axes[1].imshow(dst, cmap="viridis", vmin=-1, vmax=nblob)
axes[1].set_title("Dest")
axes[2].imshow(source_aligned, cmap="viridis", vmin=-1, vmax=nblob)
axes[2].set_title("Source aligned to Dest padded")
axes[3].imshow(dst_padded, cmap="viridis", vmin=-1, vmax=nblob)
axes[3].set_title("Dest padded")
plt.show()
e.g.:
affine_test(0, (-20, 40))
gives:
With a zoom in showing the aligned in the padded images:
I require the full extent of the src and dst images aligned on the same pixel coordinates, with both rotations and translations.
Any help is greatly appreciated!
Complexity analysis
The problem is to determine three parameters
Let's suppose that you have a grid for angle, x and y displacements, each with size O(n) and that your images are of size O(n x n) so, rotation, translation, and comparison of the images all take O(n^2), since you have O(n^3) candidate transforms to try, you end up with complexity O(n^5), and probably that's why you are asking the question.
However the part of the displacement can be computed slightly more efficiently by computing maximum correlation using Fourier transforms. The Fourier transforms can be performed with complexity O(n log n) each axis, and we have to perform them to the two spatial dimensions, the complete correlation matrix can be computed in O(n^2 log^2 n), then we find the maximum with complexity O(n^2), so the overall time complexity of determining the best alignment is O(n^2 log^2 n). However you still want to search for the best angle, since we have O(n) candidate angles the overall complexity of this search will be O(n^3 log^2 n). Remember we are using python and we may have some significant overhead, so this complexity only gives us an idea of how difficult it will be, and I have handled problems like this before so I start confident.
Preparing some example
I will start by downloading an image and applying rotation and centering the image padding with zeros.
def centralized(a, width, height):
'''
Image centralized to the given width and height
by padding with zeros (black)
'''
assert width >= a.shape[0] and height >= a.shape[1]
ap = np.zeros((width, height) + a.shape[2:], a.dtype)
ccx = (width - a.shape[0])//2
ccy = (height - a.shape[1])//2
ap[ccx:ccx+a.shape[0], ccy:ccy+a.shape[1], ...] = a
return ap
def image_pair(im, width, height, displacement=(0,0), angle=0):
'''
this build an a pair of images as numpy arrays
from the input image.
Both images will be padded with zeros (black)
and roughly centralized.
and will have the specified shape
make sure that the width and height chosen are enough
to fit the rotated image
'''
a = np.array(im)
a1 = centralized(a, width, height)
a2 = centralized(ndimage.rotate(a, angle), width, height)
a2 = np.roll(a2, displacement, axis=(0,1))
return a1, a2
def random_transform():
angle = np.random.rand() * 360
displacement = np.random.randint(-100, 100, 2)
return displacement, angle
a1, a2 = image_pair(im, 512, 512, *random_transform())
plt.subplot(121)
plt.imshow(a1)
plt.subplot(122)
plt.imshow(a2)
The displacement search
The first thing is to compute the correlation of the image
def compute_correlation(a1, a2):
A1 = np.fft.rfftn(a1, axes=(0,1))
A2 = np.fft.rfftn(a2, axes=(0,1))
C = np.fft.irfftn(np.sum(A1 * np.conj(A2), axis=2))
return C
Then, let's create an example without rotation and confirm that the with the index of the maximum correlation we can find the displacement that fit one image to the other.
displacement, _ = random_transform()
a1, a2 = image_pair(im, 521, 512, displacement, angle=0)
C = compute_correlation(a1, a2)
np.unravel_index(np.argmax(C), C.shape), displacement
a3 = np.roll(a2, np.unravel_index(np.argmax(C), C.shape), axis=(0,1))
assert np.all(a3 == a1)
With rotation or interpolation this result may not be exact but it gives the displacement that will give us the closest possible alignment.
Let's put this in a function for future use
def get_aligned(a1, a2, angle):
a1_rotated = ndimage.rotate(a1, angle, reshape=False)
C = compute_correlation(a2, a1_rotated)
found_displacement = np.unravel_index(np.argmax(C), C.shape)
a1_aligned = np.roll(a1_rotated, found_displacement, axis=(0,1))
return a1_aligned
Searching for the angle
Now we can do something in two steps,
in one we compute the correlation for each angle, then with the angle that gives maximum correlation find the alignment.
displacement, angle = random_transform()
a1, a2 = image_pair(im, 521, 512, displacement, angle)
C_max = []
C_argmax = []
angle_guesses = np.arange(0, 360, 5)
for angle_guess in angle_guesses:
a1_rotated = ndimage.rotate(a1, angle_guess, reshape=False)
C = compute_correlation(a1_rotated, a2)
i = np.argmax(C)
v = C.reshape(-1)[i]
C_max.append(v)
C_argmax.append(i)
Let's see how the correlation looks like
plt.plot(angle_guesses, C_max);
We have a clear winner looking at this curve, even if a sunflower has some sort of rotation symmetry.
Let's apply the transformation to the original image and see how it looks like
a1_aligned = get_aligned(a1, a2, angle_guesses[np.argmax(C_max)])
plt.subplot(121)
plt.imshow(a2)
plt.subplot(122)
plt.imshow(a1_aligned)
Great, I wouldn't have done better than this manually.
I am using a sunflower image for beauty reasons, but the procedure is the same for any type of image. I use RGB showing that the image may have one additional dimension, i.e. it uses a feature vector, instead of the scalar feature, you can use reshape your data to (width, height, 1) if your feature is a scalar.
Working code below in case anyone else has this need of scipy's affine transformations:
def affine_test(angle=0, translate=(0, 0), shape=(200, 100), buffered_shape=(300, 200), nblob=50):
# Maxiumum translation allowed is half difference between shape and buffered_shape
np.random.seed(42)
# Generate a buffered_shape-sized base image
base = np.zeros(buffered_shape, dtype=np.float32)
random_locs = np.random.choice(np.arange(2, buffered_shape[0] - 2), nblob * 2, replace=False)
i = random_locs[:nblob]
j = random_locs[nblob:]
for k, (_i, _j) in enumerate(zip(i, j)):
base[_i - 2 : _i + 2, _j - 2 : _j + 2] = k + 10
# Impose a rotation and translation on source
src = rotate(base, angle, reshape=False, order=1, mode="constant")
bsc = (np.array(buffered_shape) / 2).astype(int)
sc = (np.array(shape) / 2).astype(int)
src = src[
bsc[0] - sc[0] + translate[0] : bsc[0] + sc[0] + translate[0],
bsc[1] - sc[1] + translate[1] : bsc[1] + sc[1] + translate[1],
]
# Cut-out destination from the centre of the base image
dst = base[bsc[0] - sc[0] : bsc[0] + sc[0], bsc[1] - sc[1] : bsc[1] + sc[1]]
src_y, src_x = src.shape
def get_matrix_offset(centre, angle, scale):
"""Follows OpenCV.getRotationMatrix2D"""
angle_rad = angle * np.pi / 180
alpha = np.round(scale * np.cos(angle_rad), 8)
beta = np.round(scale * np.sin(angle_rad), 8)
return (
np.array([[alpha, beta], [-beta, alpha]]),
np.array(
[
(1 - alpha) * centre[0] - beta * centre[1],
beta * centre[0] + (1 - alpha) * centre[1],
]
),
)
matrix, offset = get_matrix_offset(np.array([((src_y - 1) / 2) - translate[0], ((src_x - 1) / 2) - translate[
1]]), angle, 1)
offset += np.array(translate)
M = np.column_stack((matrix, offset))
M = np.vstack((M, [0, 0, 1]))
iM = np.linalg.inv(M)
imatrix = iM[:2, :2]
ioffset = iM[:2, 2]
# Determine the outer bounds of the new image
lin_pts = np.array([[0, src_y-1, src_y-1, 0], [0, 0, src_x-1, src_x-1]])
transf_lin_pts = np.dot(matrix, lin_pts) + offset.reshape(2, 1) # - np.array(translate).reshape(2, 1) # both?
# Find min and max bounds of the transformed image
min_x = np.floor(np.min(transf_lin_pts[1])).astype(int)
min_y = np.floor(np.min(transf_lin_pts[0])).astype(int)
max_x = np.ceil(np.max(transf_lin_pts[1])).astype(int)
max_y = np.ceil(np.max(transf_lin_pts[0])).astype(int)
# Add translation to the transformation matrix to shift to positive values
anchor_x, anchor_y = 0, 0
if min_x < 0:
anchor_x = -min_x
if min_y < 0:
anchor_y = -min_y
dot_anchor = np.dot(imatrix, [anchor_y, anchor_x])
shifted_offset = ioffset - dot_anchor
# Create padded destination image
dst_y, dst_x = dst.shape[:2]
pad_widths = [anchor_y, max(max_y, dst_y) - dst_y, anchor_x, max(max_x, dst_x) - dst_x]
dst_padded = np.pad(
dst,
((pad_widths[0], pad_widths[1]), (pad_widths[2], pad_widths[3])),
"constant",
constant_values=-10,
)
dst_pad_y, dst_pad_x = dst_padded.shape
# Create the aligned and padded source image
source_aligned = affine_transform(
src,
imatrix,
offset=shifted_offset,
output_shape=(dst_pad_y, dst_pad_x),
order=3,
mode="constant",
cval=-10,
)
E.g. running:
affine_test(angle=-25, translate=(10, -40))
will show:
and zoomed in:
Apologies the code is not nicely written as is.
Note that running this in the wild I notice it cannot handle any change in scale size of the images, but I am not certain it isn't something to do with how I calculate the transformation - so a caveat worth noting, and checking out, if you are aligning images with different scales.
If you have two images that are similar (or the same) and you want to align them, you can do it using both functions rotate and shift :
from scipy.ndimage import rotate, shift
You need to find first the difference of angle between the two images angle_to_rotate, having that you apply a rotation to src:
angle_to_rotate = 25
rotated_src = rotate(src, angle_to_rotate , reshape=True, order=1, mode="constant")
With reshape=True you avoid losing information from your original src matrix, and it pads the result so the image could be translated around the 0,0 indexes. You can calculate this translation as it is (x*cos(angle),y*sin(angle) where x and y are the dimensions of the image, but it probably won't matter.
Now you will need to translate the image to the source, for doing that you can use the shift function:
rot_translated_src = shift(rotated_src , [distance_x, distance_y])
In this case there is no reshape (because otherwise you wouldn't have any real translation) so if the image was not previously padded some information will be lost.
But you can do some padding with
np.pad(src, number, mode='constant')
To calculate distance_x and distance_y you will need to find a point that serves you as a reference between the rotated_src and the destination, then just calculate the distance in the x and y axis.
Summary
Make some padding in src, and dst
Find the angular distance between them.
Rotate src with scipy.ndimage.rotate using reshape=True
Find the horizontal and vertical distance distance_x, distance_y between the rotated image and dst
Translate your 'rotated_src' with scipy.ndimage.shift
Code
from scipy.ndimage import rotate, shift
import matplotlib.pyplot as plt
import numpy as np
First we make the destination image:
# make and plot dest
dst = np.ones([40,20])
dst = np.pad(dst,10)
dst[17,[14,24]]=4
dst[27,14:25]=4
dst[26,[14,25]]=4
rotated_dst = rotate(dst, 20, order=1)
plt.imshow(dst) # plot it
plt.imshow(rotated_dst)
plt.show()
We make the Source image:
# make_src image and plot it
src = np.zeros([40,20])
src = np.pad(src,10)
src[0:20,0:20]=1
src[7,[4,14]]=4
src[17,4:15]=4
src[16,[4,15]]=4
plt.imshow(src)
plt.show()
Then we align the src to the destination:
rotated_src = rotate(src, 20, order=1) # find the angle 20, reshape true is by default
plt.imshow(rotated_src)
plt.show()
distance_y = 8 # find this distances from rotated_src and dst
distance_x = 12 # use any visual reference or even the corners
translated_src = shift(rotated_src, [distance_y,distance_x])
plt.imshow(translated_src)
plt.show()
pd: If you find problems to find the angle and the distances in a programmatic way, please leave a comment providing a bit more of insight of what can be used as a reference that could be for example the frame of the image or some image features / data)
I'm attempting to extend the 'tail' of an arrow. So far I've been able to draw a line through the center of the arrow, but this line extends 'both' ways, rather than in just one direction. The script below shows my progress. Ideally I would be able to extend the tail of the arrow regardless of the orientation of the arrow image. Any suggestions on how to accomplish this. Image examples below, L:R start, progress, goal.
# import image and grayscale
image = cv2.imread("image path")
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
cv2.imshow("original",image)
# inverts black and white
gray = 255 - image
cv2.imshow("Inverted", gray)
# Extend the borders for the line
extended = cv2.copyMakeBorder(gray, 20, 20, 10, 10, cv2.BORDER_CONSTANT)
cv2.imshow("extended borders", extended)
# contour finding
contours, hierarchy = cv2.findContours(extended, 1, 2)
cont = contours[0]
rows,cols = extended.shape[:2]
[vx,vy,x,y] = cv2.fitLine(cont, cv2.DIST_L2,0,0.01,0.01)
leftish = int((-x*vy/vx) + y)
rightish = int(((cols-x)*vy/vx)+y)
line = cv2.line(extended,(cols-1,rightish),(0,leftish),(255,255,255), 6)
cv2.imshow("drawn line", line)
"Moments" can be strange things. They're building blocks and show up most often in statistics.
It helps to have a little background in statistics, and see the application of those calculations to image data, which can be considered a set of points. If you've ever calculated the weighted average or "centroid" of something, you'll recognize some of the sums that show up in "moments".
Higher order moments can be building blocks to higher statistical measures such as covariance and skewness.
Using covariance, you can calculate the major axis of your set of points, or your arrow in this case.
Using skewness, you can figure out which side of a distribution is heavier than the other... i.e. which side is the arrow's tip and which is its tail.
This should give you a very precise angle. The scale/radius however is best estimated using other ways. You'll notice that the radius estimated from the area of the arrow fluctuates a little. You could find the points belonging to the arrow that are furthest away from the center, and take that as a somewhat stable length.
Here's a longish program that implements the two ideas above and shows the direction of an arrow:
#!/usr/bin/env python3
import os
import sys
import numpy as np
import cv2 as cv
# utilities to convert between 2D vectors and complex numbers
# complex numbers are handy for rotating stuff
def to_complex(vec):
assert vec.shape[-1] == 2
if vec.dtype == np.float32:
return vec.view(np.complex64)
elif vec.dtype == np.float64:
return vec.view(np.complex128)
else:
assert False, vec.dtype
def from_complex(cplx):
if cplx.dtype == np.complex64:
return cplx.view(np.float32)
elif cplx.dtype == np.complex128:
return cplx.view(np.float64)
else:
assert False, cplx.dtype
# utilities for drawing with fractional bits of position
# just to make a pretty picture
def iround(val):
return int(round(val))
def ipt(vec, shift=0):
if isinstance(vec, (int, float)):
return iround(vec * 2**shift)
elif isinstance(vec, (tuple, list, np.ndarray)):
return tuple(iround(el * 2**shift) for el in vec)
else:
assert False, type(vec)
# utilities for affine transformation
# just to make a pretty picture
def rotate(degrees=0):
# we want positive rotation
# meaning move +x towards +y
# getRotationMatrix2D does it differently
result = np.eye(3).astype(np.float32)
result[0:2, 0:3] = cv.getRotationMatrix2D(center=(0,0), angle=-degrees, scale=1.0)
return result
def translate(dx=0, dy=0):
result = np.eye(3).astype(np.float32)
result[0:2,2] = [dx, dy]
return result
# main logic
def calculate_direction(im):
# using "nonzero" (default behavior) is a little noisy
mask = (im >= 128)
m = cv.moments(mask.astype(np.uint8), binaryImage=True)
# easier access... see below for details
m00 = m['m00']
m10 = m['m10']
m01 = m['m01']
mu00 = m00
mu20 = m['mu20']
mu11 = m['mu11']
mu02 = m['mu02']
nu30 = m['nu30']
nu03 = m['nu03']
# that's just the centroid
cx = m10 / m00
cy = m01 / m00
centroid = np.array([cx, cy]) # as a vector
# and that's the size in pixels:
size = m00
# and that's an approximate "radius", if it were a circle which it isn't
radius = (size / np.pi) ** 0.5
# (since the "size" in pixels can fluctuate due to resampling, so will the "radius")
# wikipedia helpfully mentions "image orientation" as an example:
# https://en.wikipedia.org/wiki/Image_moment#Examples_2
# we'll use that for the major axis
mup20 = mu20 / mu00
mup02 = mu02 / mu00
mup11 = mu11 / mu00
theta = 0.5 * np.arctan2(2 * mup11, mup20 - mup02)
#print(f"angle: {theta / np.pi * 180:+6.1f} degrees")
# we only have the axis, not yet the direction
# we will assess "skewness" now
# https://en.wikipedia.org/wiki/Skewness#Definition
# note how "positive" skewness appears in a distribution:
# it points away from the heavy side, towards the light side
# fortunately, cv.moments() also calculates those "standardized moments"
# https://en.wikipedia.org/wiki/Standardized_moment#Standard_normalization
skew = np.array([nu30, nu03])
#print("skew:", skew)
# we'll have to *rotate* that so it *roughly* lies along the x axis
# then assess which end is the heavy/light end
# then use that information to maybe flip the axis,
# so it points in the direction of the arrow
skew_complex = to_complex(skew) # reinterpret two reals as one complex number
rotated_skew_complex = skew_complex * np.exp(1j * -theta) # rotation
rotated_skew = from_complex(rotated_skew_complex)
#print("rotated skew:", rotated_skew)
if rotated_skew[0] > 0: # pointing towards tail
theta = (theta + np.pi) % (2*np.pi) # flip direction 180 degrees
else: # pointing towards head
pass
print(f"angle: {theta / np.pi * 180:+6.1f} degrees")
# construct a vector that points like the arrow in the picture
direction = np.exp([1j * theta])
direction = from_complex(direction)
return (radius, centroid, direction)
def draw_a_picture(im, radius, centroid, direction):
height, width = im.shape[:2]
# take the source at half brightness
canvas = cv.cvtColor(im // 2, cv.COLOR_GRAY2BGR)
shift = 4 # prettier drawing
cv.circle(canvas,
center=ipt(centroid, shift),
radius=ipt(radius, shift),
thickness=iround(radius * 0.1),
color=(0,0,255),
lineType=cv.LINE_AA,
shift=shift)
# (-direction) meaning point the *opposite* of the arrow's direction, i.e. towards tail
cv.line(canvas,
pt1=ipt(centroid + direction * radius * -3.0, shift),
pt2=ipt(centroid + direction * radius * +3.0, shift),
thickness=iround(radius * 0.05),
color=(0,255,255),
lineType=cv.LINE_AA,
shift=shift)
cv.line(canvas,
pt1=ipt(centroid + (-direction) * radius * 3.5, shift),
pt2=ipt(centroid + (-direction) * radius * 4.5, shift),
thickness=iround(radius * 0.15),
color=(0,255,255),
lineType=cv.LINE_AA,
shift=shift)
return canvas
if __name__ == '__main__':
imfile = sys.argv[1] if len(sys.argv) >= 2 else "p7cmR.png"
src = cv.imread(imfile, cv.IMREAD_GRAYSCALE)
src = 255 - src # invert (white arrow on black background)
height, width = src.shape[:2]
diagonal = np.hypot(height, width)
outsize = int(np.ceil(diagonal * 1.3)) # fudge factor
cv.namedWindow("arrow", cv.WINDOW_NORMAL)
cv.resizeWindow("arrow", 5*outsize, 5*outsize)
angle = 0 # degrees
increment = +1
do_spin = True
while True:
print(f"{angle:+.0f} degrees")
M = translate(dx=+outsize/2, dy=+outsize/2) # rotate(degrees=angle) # translate(dx=-width/2, dy=-height/2)
im = cv.warpAffine(src, M=M[:2], dsize=(outsize, outsize), flags=cv.INTER_CUBIC, borderMode=cv.BORDER_REPLICATE)
# resampling introduces blur... except when it's an even number like 0 degrees, 90 degrees, ...
# so at even rotations, things will jump a little.
# this rotation is only for demo purposes
(radius, centroid, direction) = calculate_direction(im)
canvas = draw_a_picture(im, radius, centroid, direction)
cv.imshow("arrow", canvas)
if do_spin:
angle = (angle + increment) % 360
print()
key = cv.waitKeyEx(30 if do_spin else -1)
if key == -1:
continue
elif key in (0x0D, 0x20): # ENTER (CR), SPACE
do_spin = not do_spin # toggle spinning
elif key == 27: # ESC
break # end program
elif key == 0x250000: # VK_LEFT
increment = -abs(increment)
angle += increment
elif key == 0x270000: # VK_RIGHT
increment = +abs(increment)
angle += increment
else:
print(f"key 0x{key:02x}")
cv.destroyAllWindows()
I built a Gaussian Pyramid from a 512x512 image with one Dirac pulse at the centre(256,256), then tried to follow the following procedure to prove that this pyramid is scale-invariant, and it has the same impulse response at each level, but the results doesn't seem to be very correct!
Can you please advise me how to do it?
Edit:
I edited the code to fix some bugs, thanks to #CrisLuengo for his notes.
Code:
import numpy as np
import matplotlib.pyplot as plt
import cv2
import skimage.exposure as exposure
from math import sqrt, ceil
#=================
# Resize Function
#=================
def _resize(image, downscale=2, step=0.5, minSize=(7, 7)):
if(image.shape > minSize ):
# newSize = (image.shape[0]// downscale, image.shape[1]//downscale)
# newImage = cv2.resize(image, dsize=newSize, fx=step, fy=step)
newImage = cv2.resize(image, None, fx=step, fy=step)
return newImage
else:
return 0
#--------------------------------------------------------------
#===========================
# Gaussian Pyramid Function
#===========================
def pyramid(image, sigma_0=1):
'''
Function to create a Gaussian pyramid from an image for given standard deviation sigma_0
Parameters:
-----------
#param: image: nd-array.
The original image.
#param: sigma_0: float.
standard deviation of the Gaussian distribution.
returns:
List of images with different scales, the pyramid
'''
# Resize All input images into a standard size
image = cv2.resize(image,(512,512))
# level 0
if ceil(6*sigma_0)%2 ==0 :
Gimage = cv2.GaussianBlur(image, (ceil(6*sigma_0)+1, ceil(6*sigma_0)+1), sigmaX=sigma_0, sigmaY=sigma_0)
else:
Gimage = cv2.GaussianBlur(image, (ceil(6*sigma_0)+2, ceil(6*sigma_0)+2), sigmaX=sigma_0, sigmaY=sigma_0)
# sigma_k
sigma_k = 4*sigma_0
# sigma_k = sqrt(2)*sigma_0
# Pyramid as list
GaussPyr = [Gimage]
# Loop of other levels of the pyramid
for k in range(1,6):
if ceil(6*sigma_k)%2 ==0 :
# smoothed = cv2.GaussianBlur(GaussPyr[k-1], (ceil(6*sigma_k)+1, ceil(6*sigma_k)+1), sigmaX=sigma_k, sigmaY=sigma_0)
smoothed = cv2.GaussianBlur(GaussPyr[k-1], (ceil(6*sigma_k)+1, ceil(6*sigma_k)+1), sigmaX=sigma_k, sigmaY=sigma_k)
else:
# smoothed = cv2.GaussianBlur(GaussPyr[k-1], (ceil(6*sigma_k)+2, ceil(6*sigma_k)+2), sigmaX=sigma_k, sigmaY=sigma_0)
smoothed = cv2.GaussianBlur(GaussPyr[k-1], (ceil(6*sigma_k)+2, ceil(6*sigma_k)+2), sigmaX=sigma_k, sigmaY=sigma_k)
# Downscaled Image
resized = _resize(smoothed ) # ,step=0.25*sigma_k
GaussPyr.append(resized)
return GaussPyr
#====================
# Impulse Response
#====================
# Zeros 512x512 Black Image
delta = np.zeros((512, 512), dtype=np.float32)
# Dirac
delta[255,255] = 255
# sigmas
sigma1 = 1
sigma2 = sqrt(2)
# Pyramids
deltaPyramid1 = pyramid(delta, sigma_0=sigma1)
deltaPyramid2 = pyramid(delta, sigma_0=sigma2)
# Impulse Response for each level
ImpResp1 = np.zeros((len(deltaPyramid1), 13),dtype=float)
ImpResp2 = np.zeros((len(deltaPyramid2), 13),dtype=float)
# sigma = 1
for idx, level in enumerate(deltaPyramid1):
# # 1
# level = cv2.resize(level, (512, 512))# , interpolation=cv2.INTER_AREA
# ImpResp1[idx,:] = exposure.rescale_intensity(level[255, 249:262], in_range='image', out_range=(0,255)).astype(np.uint8)
# ImpResp1[idx,:] = level[255, 249:262]
# # 2
centery = level.shape[0]//2
centerx = level.shape[1]//2
ImpResp1[idx,:] = exposure.rescale_intensity(level[centery, (centerx-7):(centerx+6)], out_range=(0,255), in_range='image').astype(np.uint8)
# ImpResp1[idx,:] = level[centery, (centerx-7):(centerx+6)]
# sigma = sqrt(2)
for idx, level in enumerate(deltaPyramid2):
# # 1
# level = cv2.resize(level, (512, 512))# , interpolation=cv2.INTER_AREA
# ImpResp2[idx,:] = exposure.rescale_intensity(level[255, 249:262], in_range='image', out_range=(0,255)).astype(np.uint8)
# ImpResp2[idx,:] = level[255, 249:262]
# # 2
centery = level.shape[0]//2
centerx = level.shape[1]//2
ImpResp2[idx,:] = exposure.rescale_intensity(level[centery, (centerx-7):(centerx+6)], out_range=(0,255), in_range='image').astype(np.uint8)
# ImpResp2[idx,:] = level[centery, (centerx-7):(centerx+6)]
#====================
# Visualize Results
#====================
labels = []
for c in range(13):
label = 'C{}'.format(c+1)
labels.append(label)
x = np.arange(len(labels)) # the label locations
width = 0.1 # the width of the bars
fig, ax = plt.subplots()
rects1 = []
for k in range(ImpResp1.shape[0]):
rects1.append(ax.bar(x - 2*k*width, ImpResp1[k], width, label='K{}'.format(k)))
# Add some text for labels, title and custom x-axis tick labels, etc.
ax.set_ylabel('values')
ax.set_title('sigma0=1')
ax.set_xticks(x)
ax.set_xticklabels(labels)
ax.legend()
fig.tight_layout()
fig2, ax2 = plt.subplots()
rects2 = []
for k in range(ImpResp1.shape[0]):
rects2.append(ax2.bar(x + 2*k*width, ImpResp2[k], width, label='K{}'.format(k)))
# Add some text for labels, title and custom x-axis tick labels, etc.
ax2.set_ylabel('values')
ax2.set_title('sigma0=sqrt(2)')
ax2.set_xticks(x)
ax2.set_xticklabels(labels)
ax2.legend()
fig2.tight_layout()
plt.show()
First, let’s simplify to a situation that is simple enough to see the scaling property of the Gaussian. Convolving a delta image with a Gaussian yields that Gaussian. A Gaussian B twice the size of a Gaussian A, and then scaled spatially by half, is identical to A (up to intensity scaling of course, B is 1/4 as high as A in 2D).
delta = <all zeros except one pixel in the middle>
A = GaussianBlur(delta, 1)
B = GaussianBlur(delta, 2)
B = resize(B, 1/2)
A == B * 2**2
C = GaussianBlur(delta, sigma=7.489)
C = resize(C, 1/7.489)
A == C * 7.489**2
Now, if we’re chaining the blur operations, we obtain a stronger blur. The square of the output sigma is equal to the sum of squares of the sigmas applied:
A = GaussianBlur(delta, 1)
B = GaussianBlur(delta, 2)
C = GaussianBlur(A, sqrt(3))
B == C
That is, 1**2 + sqrt(3)**2 = 2**2.
So, at each step in the pyramid, we need to compute how much blurring we’ve already applied, and apply the right amount to get to the necessary level of blurring. Every time we blur, we increase the blur by a given amount, every time we rescale we reduce the blur by a given amount.
If sigma0 is the initial smoothing, and sigma1 is the smoothing applied before downscaling, and downscaling is by a factor k>1, then this relationship:
sqrt(sigma0**2 + sigma1**2) / k == sigma0
will ensure that the downscaled delta image is the same as the original smoothed delta image (up to intensity scaling). We obtain:
sigma1 = sqrt((sigma0 * k)**2 - sigma0**2)
(if I did they right, here on my phone screen).
Since we’re back to an image identical to the original, subsequent pyramid levels will use these same values.
An additional issue I noticed in your code is that you rescale the delta image “to a standard size” before starting to process. Don’t do this, the delta image will no longer be a delta image, and the relationships above will no longer hold. The input must have exactly one pixel set to 1, the rest being 0.
A picture is worth a thousand words (sorry for the shoddy work):
If the solution is preserving the value and the slope at both ends it is better.
If, in addition, the position and sharpness of the transition can be adjusted it is perfect.
But I have not found any solution yet...
Thank you very much for your help
Here is a piece of code to get started:
import matplotlib.pyplot as plt
from scipy.signal import savgol_filter
import numpy as np
def round_up_to_odd(f):
return np.int(np.ceil(f / 2.) * 2 + 1)
def generateRandomSignal(n=1000, seed=None):
"""
Parameters
----------
n : integer, optional
Number of points in the signal. The default is 1000.
Returns
-------
sig : numpy array
"""
np.random.seed(seed)
print("Seed was:", seed)
steps = np.random.choice(a=[-1, 0, 1], size=(n-1))
roughSig = np.concatenate([np.array([0]), steps]).cumsum(0)
sig = savgol_filter(roughSig, round_up_to_odd(n/20), 6)
return sig
n = 1000
t = np.linspace(0,10,n)
seed = np.random.randint(0,high=100000)
#seed = 45136
sig = generateRandomSignal(seed=seed)
###############################
# ????
# sigFilt = adaptiveFilter(sig)
###############################
# Plot
plt.figure()
plt.plot(t, sig, label="Signal")
# plt.plot(t, sigFilt, label="Signal filtered")
plt.legend()
Simple convolution does smoothing. However, as mentioned below, here we need strong smoothing first and no smoothing towards the end. I used the moving average approach with the dynamic size of the window. In the example below, the window size changes linearly.
def dynamic_smoothing(x, start_window_length=(len(x)//2), end_window_length=1):
d_sum = np.cumsum(a, dtype=float)
smoothed = list()
for i in range(len(x)):
# compute window length
a = i / len(x)
w = int(np.round(a * start_window_length + (1.0-a) * end_window_length))
# get the window
w0 = max(0, i - w) # the window must stay inside the array
w1 = min(len(x), i + w)
smoothed.append(sum(x[w0:w1])/(w1+w0))
return np.array(smoothed)
I am trying to implement a texture image as described in this tutorial using Python and skimage.
The issue is to move a 7x7 window over a large raster and replace the center of each pixel with the calculated texture from the 7x7 window. I manage to do this with the code below, but I see no other way than looping through each individual pixel, which is very slow.
One software package does that in a few seconds, so there must be some other way ... is there?
Here the code that works but is very slow ...
import matplotlib.pyplot as plt
import gdal, gdalconst
import numpy as np
from skimage.feature import greycomatrix, greycoprops
filename = "//mnt//glaciology//RS2_20140101.jpg"
outfilename = "//home//max//Documents//GLCM_contrast.tif"
sarfile = gdal.Open(filename, gdalconst.GA_ReadOnly)
sarraster = sarfile.ReadAsArray()
#sarraster is satellite image, testraster will receive texture
testraster = np.copy(sarraster)
testraster[:] = 0
for i in range(testraster.shape[0] ):
print i,
for j in range(testraster.shape[1] ):
#windows needs to fit completely in image
if i <3 or j <3:
continue
if i > (testraster.shape[0] - 4) or j > (testraster.shape[0] - 4):
continue
#Calculate GLCM on a 7x7 window
glcm_window = sarraster[i-3: i+4, j-3 : j+4]
glcm = greycomatrix(glcm_window, [1], [0], symmetric = True, normed = True )
#Calculate contrast and replace center pixel
contrast = greycoprops(glcm, 'contrast')
testraster[i,j]= contrast
sarplot = plt.imshow(testraster, cmap = 'gray')
Results:
I had the same problem, different data. Here is a script I wrote that uses parallel processing and a sliding window approach:
import gdal, osr
import numpy as np
from scipy.interpolate import RectBivariateSpline
from numpy.lib.stride_tricks import as_strided as ast
import dask.array as da
from joblib import Parallel, delayed, cpu_count
import os
from skimage.feature import greycomatrix, greycoprops
def im_resize(im,Nx,Ny):
'''
resize array by bivariate spline interpolation
'''
ny, nx = np.shape(im)
xx = np.linspace(0,nx,Nx)
yy = np.linspace(0,ny,Ny)
try:
im = da.from_array(im, chunks=1000) #dask implementation
except:
pass
newKernel = RectBivariateSpline(np.r_[:ny],np.r_[:nx],im)
return newKernel(yy,xx)
def p_me(Z, win):
'''
loop to calculate greycoprops
'''
try:
glcm = greycomatrix(Z, [5], [0], 256, symmetric=True, normed=True)
cont = greycoprops(glcm, 'contrast')
diss = greycoprops(glcm, 'dissimilarity')
homo = greycoprops(glcm, 'homogeneity')
eng = greycoprops(glcm, 'energy')
corr = greycoprops(glcm, 'correlation')
ASM = greycoprops(glcm, 'ASM')
return (cont, diss, homo, eng, corr, ASM)
except:
return (0,0,0,0,0,0)
def read_raster(in_raster):
in_raster=in_raster
ds = gdal.Open(in_raster)
data = ds.GetRasterBand(1).ReadAsArray()
data[data<=0] = np.nan
gt = ds.GetGeoTransform()
xres = gt[1]
yres = gt[5]
# get the edge coordinates and add half the resolution
# to go to center coordinates
xmin = gt[0] + xres * 0.5
xmax = gt[0] + (xres * ds.RasterXSize) - xres * 0.5
ymin = gt[3] + (yres * ds.RasterYSize) + yres * 0.5
ymax = gt[3] - yres * 0.5
del ds
# create a grid of xy coordinates in the original projection
xx, yy = np.mgrid[xmin:xmax+xres:xres, ymax+yres:ymin:yres]
return data, xx, yy, gt
def norm_shape(shap):
'''
Normalize numpy array shapes so they're always expressed as a tuple,
even for one-dimensional shapes.
'''
try:
i = int(shap)
return (i,)
except TypeError:
# shape was not a number
pass
try:
t = tuple(shap)
return t
except TypeError:
# shape was not iterable
pass
raise TypeError('shape must be an int, or a tuple of ints')
def sliding_window(a, ws, ss = None, flatten = True):
'''
Source: http://www.johnvinyard.com/blog/?p=268#more-268
Parameters:
a - an n-dimensional numpy array
ws - an int (a is 1D) or tuple (a is 2D or greater) representing the size
of each dimension of the window
ss - an int (a is 1D) or tuple (a is 2D or greater) representing the
amount to slide the window in each dimension. If not specified, it
defaults to ws.
flatten - if True, all slices are flattened, otherwise, there is an
extra dimension for each dimension of the input.
Returns
an array containing each n-dimensional window from a
'''
if None is ss:
# ss was not provided. the windows will not overlap in any direction.
ss = ws
ws = norm_shape(ws)
ss = norm_shape(ss)
# convert ws, ss, and a.shape to numpy arrays
ws = np.array(ws)
ss = np.array(ss)
shap = np.array(a.shape)
# ensure that ws, ss, and a.shape all have the same number of dimensions
ls = [len(shap),len(ws),len(ss)]
if 1 != len(set(ls)):
raise ValueError(\
'a.shape, ws and ss must all have the same length. They were %s' % str(ls))
# ensure that ws is smaller than a in every dimension
if np.any(ws > shap):
raise ValueError(\
'ws cannot be larger than a in any dimension.\
a.shape was %s and ws was %s' % (str(a.shape),str(ws)))
# how many slices will there be in each dimension?
newshape = norm_shape(((shap - ws) // ss) + 1)
# the shape of the strided array will be the number of slices in each dimension
# plus the shape of the window (tuple addition)
newshape += norm_shape(ws)
# the strides tuple will be the array's strides multiplied by step size, plus
# the array's strides (tuple addition)
newstrides = norm_shape(np.array(a.strides) * ss) + a.strides
a = ast(a,shape = newshape,strides = newstrides)
if not flatten:
return a
# Collapse strided so that it has one more dimension than the window. I.e.,
# the new array is a flat list of slices.
meat = len(ws) if ws.shape else 0
firstdim = (np.product(newshape[:-meat]),) if ws.shape else ()
dim = firstdim + (newshape[-meat:])
# remove any dimensions with size 1
dim = filter(lambda i : i != 1,dim)
return a.reshape(dim), newshape
def CreateRaster(xx,yy,std,gt,proj,driverName,outFile):
'''
Exports data to GTiff Raster
'''
std = np.squeeze(std)
std[np.isinf(std)] = -99
driver = gdal.GetDriverByName(driverName)
rows,cols = np.shape(std)
ds = driver.Create( outFile, cols, rows, 1, gdal.GDT_Float32)
if proj is not None:
ds.SetProjection(proj.ExportToWkt())
ds.SetGeoTransform(gt)
ss_band = ds.GetRasterBand(1)
ss_band.WriteArray(std)
ss_band.SetNoDataValue(-99)
ss_band.FlushCache()
ss_band.ComputeStatistics(False)
del ds
#Stuff to change
if __name__ == '__main__':
win_sizes = [7]
for win_size in win_sizes[:]:
in_raster = #Path to input raster
win = win_size
meter = str(win/4)
#Define output file names
contFile =
dissFile =
homoFile =
energyFile =
corrFile =
ASMFile =
merge, xx, yy, gt = read_raster(in_raster)
merge[np.isnan(merge)] = 0
Z,ind = sliding_window(merge,(win,win),(win,win))
Ny, Nx = np.shape(merge)
w = Parallel(n_jobs = cpu_count(), verbose=0)(delayed(p_me)(Z[k]) for k in xrange(len(Z)))
cont = [a[0] for a in w]
diss = [a[1] for a in w]
homo = [a[2] for a in w]
eng = [a[3] for a in w]
corr = [a[4] for a in w]
ASM = [a[5] for a in w]
#Reshape to match number of windows
plt_cont = np.reshape(cont , ( ind[0], ind[1] ) )
plt_diss = np.reshape(diss , ( ind[0], ind[1] ) )
plt_homo = np.reshape(homo , ( ind[0], ind[1] ) )
plt_eng = np.reshape(eng , ( ind[0], ind[1] ) )
plt_corr = np.reshape(corr , ( ind[0], ind[1] ) )
plt_ASM = np.reshape(ASM , ( ind[0], ind[1] ) )
del cont, diss, homo, eng, corr, ASM
#Resize Images to receive texture and define filenames
contrast = im_resize(plt_cont,Nx,Ny)
contrast[merge==0]=np.nan
dissimilarity = im_resize(plt_diss,Nx,Ny)
dissimilarity[merge==0]=np.nan
homogeneity = im_resize(plt_homo,Nx,Ny)
homogeneity[merge==0]=np.nan
energy = im_resize(plt_eng,Nx,Ny)
energy[merge==0]=np.nan
correlation = im_resize(plt_corr,Nx,Ny)
correlation[merge==0]=np.nan
ASM = im_resize(plt_ASM,Nx,Ny)
ASM[merge==0]=np.nan
del plt_cont, plt_diss, plt_homo, plt_eng, plt_corr, plt_ASM
del w,Z,ind,Ny,Nx
driverName= 'GTiff'
epsg_code=26949
proj = osr.SpatialReference()
proj.ImportFromEPSG(epsg_code)
CreateRaster(xx, yy, contrast, gt, proj,driverName,contFile)
CreateRaster(xx, yy, dissimilarity, gt, proj,driverName,dissFile)
CreateRaster(xx, yy, homogeneity, gt, proj,driverName,homoFile)
CreateRaster(xx, yy, energy, gt, proj,driverName,energyFile)
CreateRaster(xx, yy, correlation, gt, proj,driverName,corrFile)
CreateRaster(xx, yy, ASM, gt, proj,driverName,ASMFile)
del contrast, merge, xx, yy, gt, meter, dissimilarity, homogeneity, energy, correlation, ASM
This script calculates GLCM properties for a defined window size, with no overlap between adjacent windows.