I am trying to perform a 2d convolution in python using numpy
I have a 2d array as follows with kernel H_r for the rows and H_c for the columns
data = np.zeros((nr, nc), dtype=np.float32)
#fill array with some data here then convolve
for r in range(nr):
data[r,:] = np.convolve(data[r,:], H_r, 'same')
for c in range(nc):
data[:,c] = np.convolve(data[:,c], H_c, 'same')
data = data.astype(np.uint8);
It does not produce the output that I was expecting, does this code look OK, I think the problem is with the casting from float32 to 8bit. Whats the best way to do this
Thanks
Maybe it is not the most optimized solution, but this is an implementation I used before with numpy library for Python:
def convolution2d(image, kernel, bias):
m, n = kernel.shape
if (m == n):
y, x = image.shape
y = y - m + 1
x = x - m + 1
new_image = np.zeros((y,x))
for i in range(y):
for j in range(x):
new_image[i][j] = np.sum(image[i:i+m, j:j+m]*kernel) + bias
return new_image
I hope this code helps other guys with the same doubt.
Regards.
Edit [Jan 2019]
#Tashus comment bellow is correct, and #dudemeister's answer is thus probably more on the mark. The function he suggested is also more efficient, by avoiding a direct 2D convolution and the number of operations that would entail.
Possible Problem
I believe you are doing two 1d convolutions, the first per columns and the second per rows, and replacing the results from the first with the results of the second.
Notice that numpy.convolve with the 'same' argument returns an array of equal shape to the largest one provided, so when you make the first convolution you already populated the entire data array.
One good way to visualize your arrays during these steps is to use Hinton diagrams, so you can check which elements already have a value.
Possible Solution
You can try to add the results of the two convolutions (use data[:,c] += .. instead of data[:,c] = on the second for loop), if your convolution matrix is the result of using the one dimensional H_r and H_c matrices like so:
Another way to do that would be to use scipy.signal.convolve2d with a 2d convolution array, which is probably what you wanted to do in the first place.
Since you already have your kernel separated you should simply use the sepfir2d function from scipy:
from scipy.signal import sepfir2d
convolved = sepfir2d(data, H_r, H_c)
On the other hand, the code you have there looks all right ...
I checked out many implementations and found none for my purpose, which should be really simple. So here is a dead-simple implementation with for loop
def convolution2d(image, kernel, stride, padding):
image = np.pad(image, [(padding, padding), (padding, padding)], mode='constant', constant_values=0)
kernel_height, kernel_width = kernel.shape
padded_height, padded_width = image.shape
output_height = (padded_height - kernel_height) // stride + 1
output_width = (padded_width - kernel_width) // stride + 1
new_image = np.zeros((output_height, output_width)).astype(np.float32)
for y in range(0, output_height):
for x in range(0, output_width):
new_image[y][x] = np.sum(image[y * stride:y * stride + kernel_height, x * stride:x * stride + kernel_width] * kernel).astype(np.float32)
return new_image
It might not be the most optimized solution either, but it is approximately ten times faster than the one proposed by #omotto and it only uses basic numpy function (as reshape, expand_dims, tile...) and no 'for' loops:
def gen_idx_conv1d(in_size, ker_size):
"""
Generates a list of indices. This indices correspond to the indices
of a 1D input tensor on which we would like to apply a 1D convolution.
For instance, with a 1D input array of size 5 and a kernel of size 3, the
1D convolution product will successively looks at elements of indices [0,1,2],
[1,2,3] and [2,3,4] in the input array. In this case, the function idx_conv1d(5,3)
outputs the following array: array([0,1,2,1,2,3,2,3,4]).
args:
in_size: (type: int) size of the input 1d array.
ker_size: (type: int) kernel size.
return:
idx_list: (type: np.array) list of the successive indices of the 1D input array
access to the 1D convolution algorithm.
example:
>>> gen_idx_conv1d(in_size=5, ker_size=3)
array([0, 1, 2, 1, 2, 3, 2, 3, 4])
"""
f = lambda dim1, dim2, axis: np.reshape(np.tile(np.expand_dims(np.arange(dim1),axis),dim2),-1)
out_size = in_size-ker_size+1
return f(ker_size, out_size, 0)+f(out_size, ker_size, 1)
def repeat_idx_2d(idx_list, nbof_rep, axis):
"""
Repeats an array of indices (idx_list) a number of time (nbof_rep) "along" an axis
(axis). This function helps to browse through a 2d array of size
(len(idx_list),nbof_rep).
args:
idx_list: (type: np.array or list) a 1D array of indices.
nbof_rep: (type: int) number of repetition.
axis: (type: int) axis "along" which the repetition will be applied.
return
idx_list: (type: np.array) a 1D array of indices of size len(idx_list)*nbof_rep.
example:
>>> a = np.array([0, 1, 2])
>>> repeat_idx_2d(a, 3, 0) # repeats array 'a' 3 times along 'axis' 0
array([0, 0, 0, 1, 1, 1, 2, 2, 2])
>>> repeat_idx_2d(a, 3, 1) # repeats array 'a' 3 times along 'axis' 1
array([0, 1, 2, 0, 1, 2, 0, 1, 2])
>>> b = np.reshape(np.arange(3*4), (3,4))
>>> b[repeat_idx_2d(np.arange(3), 4, 0), repeat_idx_2d(np.arange(4), 3, 1)]
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11])
"""
assert axis in [0,1], "Axis should be equal to 0 or 1."
tile_axis = (nbof_rep,1) if axis else (1,nbof_rep)
return np.reshape(np.tile(np.expand_dims(idx_list, 1),tile_axis),-1)
def conv2d(im, ker):
"""
Performs a 'valid' 2D convolution on an image. The input image may be
a 2D or a 3D array.
The output image first two dimensions will be reduced depending on the
convolution size.
The kernel may be a 2D or 3D array. If 2D, it will be applied on every
channel of the input image. If 3D, its last dimension must match the
image one.
args:
im: (type: np.array) image (2D or 3D).
ker: (type: np.array) convolution kernel (2D or 3D).
returns:
im: (type: np.array) convolved image.
example:
>>> im = np.reshape(np.arange(10*10*3),(10,10,3))/(10*10*3) # 3D image
>>> ker = np.array([[0,1,0],[-1,0,1],[0,-1,0]]) # 2D kernel
>>> conv2d(im, ker) # 3D array of shape (8,8,3)
"""
if len(im.shape)==2: # if the image is a 2D array, it is reshaped by expanding the last dimension
im = np.expand_dims(im,-1)
im_x, im_y, im_w = im.shape
if len(ker.shape)==2: # if the kernel is a 2D array, it is reshaped so it will be applied to all of the image channels
ker = np.tile(np.expand_dims(ker,-1),[1,1,im_w]) # the same kernel will be applied to all of the channels
assert ker.shape[-1]==im.shape[-1], "Kernel and image last dimension must match."
ker_x = ker.shape[0]
ker_y = ker.shape[1]
# shape of the output image
out_x = im_x - ker_x + 1
out_y = im_y - ker_y + 1
# reshapes the image to (out_x, ker_x, out_y, ker_y, im_w)
idx_list_x = gen_idx_conv1d(im_x, ker_x) # computes the indices of a 1D conv (cf. idx_conv1d doc)
idx_list_y = gen_idx_conv1d(im_y, ker_y)
idx_reshaped_x = repeat_idx_2d(idx_list_x, len(idx_list_y), 0) # repeats the previous indices to be used in 2D (cf. repeat_idx_2d doc)
idx_reshaped_y = repeat_idx_2d(idx_list_y, len(idx_list_x), 1)
im_reshaped = np.reshape(im[idx_reshaped_x, idx_reshaped_y, :], [out_x, ker_x, out_y, ker_y, im_w]) # reshapes
# reshapes the 2D kernel
ker = np.reshape(ker,[1, ker_x, 1, ker_y, im_w])
# applies the kernel to the image and reduces the dimension back to the one of original input image
return np.squeeze(np.sum(im_reshaped*ker, axis=(1,3)))
I tried to add a lot of comments to explain the method but the global idea is to reshape the 3D input image to a 5D one of shape (output_image_height, kernel_height, output_image_width, kernel_width, output_image_channel) and then to apply the kernel directly using the basic array multiplication. Of course, this methods is then using more memory (during the execution the size of the image is thus multiply by kernel_height*kernel_width) but it is faster.
To do this reshape step, I 'over-used' the indexing methods of numpy arrays, especially, the possibility of giving a numpy array as indices into a numpy array.
This methods could also be used to re-code the 2D convolution product in Pytorch or Tensorflow using the base math functions but I have no doubt in saying that it will be slower than the existing nn.conv2d operator...
I really enjoyed coding this method by only using the numpy basic tools.
One of the most obvious is to hard code the kernel.
img = img.convert('L')
a = np.array(img)
out = np.zeros([a.shape[0]-2, a.shape[1]-2], dtype='float')
out += a[:-2, :-2]
out += a[1:-1, :-2]
out += a[2:, :-2]
out += a[:-2, 1:-1]
out += a[1:-1,1:-1]
out += a[2:, 1:-1]
out += a[:-2, 2:]
out += a[1:-1, 2:]
out += a[2:, 2:]
out /= 9.0
out = out.astype('uint8')
img = Image.fromarray(out)
This example does a box blur 3x3 completely unrolled. You can multiply the values where you have a different value and divide them by a different amount. But, if you honestly want the quickest and dirtiest method this is it. I think it beats Guillaume Mougeot's method by a factor of like 5. His method beating the others by a factor of 10.
It may lose a few steps if you're doing something like a gaussian blur. and need to multiply some stuff.
Try to first round and then cast to uint8:
data = data.round().astype(np.uint8);
I wrote this convolve_stride which uses numpy.lib.stride_tricks.as_strided. Moreover it supports both strides and dilation. It is also compatible to tensor with order > 2.
import numpy as np
from numpy.lib.stride_tricks import as_strided
from im2col import im2col
def conv_view(X, F_s, dr, std):
X_s = np.array(X.shape)
F_s = np.array(F_s)
dr = np.array(dr)
Fd_s = (F_s - 1) * dr + 1
if np.any(Fd_s > X_s):
raise ValueError('(Dilated) filter size must be smaller than X')
std = np.array(std)
X_ss = np.array(X.strides)
Xn_s = (X_s - Fd_s) // std + 1
Xv_s = np.append(Xn_s, F_s)
Xv_ss = np.tile(X_ss, 2) * np.append(std, dr)
return as_strided(X, Xv_s, Xv_ss, writeable=False)
def convolve_stride(X, F, dr=None, std=None):
if dr is None:
dr = np.ones(X.ndim, dtype=int)
if std is None:
std = np.ones(X.ndim, dtype=int)
if not (X.ndim == F.ndim == len(dr) == len(std)):
raise ValueError('X.ndim, F.ndim, len(dr), len(std) must be the same')
Xv = conv_view(X, F.shape, dr, std)
return np.tensordot(Xv, F, axes=X.ndim)
%timeit -n 100 -r 10 convolve_stride(A, F)
#31.2 ms ± 1.31 ms per loop (mean ± std. dev. of 10 runs, 100 loops each)
Super simple and fast convolution using only basic numpy:
import numpy as np
def conv2d(image, kernel):
# apply kernel to image, return image of the same shape
# assume both image and kernel are 2D arrays
# kernel = np.flipud(np.fliplr(kernel)) # optionally flip the kernel
k = kernel.shape[0]
width = k//2
# place the image inside a frame to compensate for the kernel overlap
a = framed(image, width)
b = np.zeros(image.shape) # fill the output array with zeros; do not use np.empty()
# shift the image around each pixel, multiply by the corresponding kernel value and accumulate the results
for p, dp, r, dr in [(i, i + image.shape[0], j, j + image.shape[1]) for i in range(k) for j in range(k)]:
b += a[p:dp, r:dr] * kernel[p, r]
# or just write two nested for loops if you prefer
# np.clip(b, 0, 255, out=b) # optionally clip values exceeding the limits
return b
def framed(image, width):
a = np.zeros((image.shape[0]+2*width, image.shape[1]+2*width))
a[width:-width, width:-width] = image
# alternatively fill the frame with ones or copy border pixels
return a
Run it:
Image.fromarray(conv2d(image, kernel).astype('uint8'))
Instead of sliding the kernel along the image and computing the transformation pixel by pixel, create a series of shifted versions of the image corresponding to each element in the kernel and apply the corresponding kernel value to each of the shifted image versions.
This is probably the fastest you can get using just basic numpy; the speed is already comparable to C implementation of scipy convolve2d and better than fftconvolve. The idea is similar to #Tatarize. This example works only for one color component; for RGB just repeat for each (or modify the algorithm accordingly).
Typically, Convolution 2D is a misnomer. Ideally, under the hood,
whats being done is a correlation of 2 matrices.
pad == same
returns the output as the same as input dimension
It can also take asymmetric images. In order to perform correlation(convolution in deep learning lingo) on a batch of 2d matrices, one can iterate over all the channels, calculate the correlation for each of the channel slices with the respective filter slice.
For example: If image is (28,28,3) and filter size is (5,5,3) then take each of the 3 slices from the image channel and perform the cross correlation using the custom function above and stack the resulting matrix in the respective dimension of the output.
def get_cross_corr_2d(W, X, pad = 'valid'):
if(pad == 'same'):
pr = int((W.shape[0] - 1)/2)
pc = int((W.shape[1] - 1)/2)
conv_2d = np.zeros((X.shape[0], X.shape[1]))
X_pad = np.zeros((X.shape[0] + 2*pr, X.shape[1] + 2*pc))
X_pad[pr:pr+X.shape[0], pc:pc+X.shape[1]] = X
for r in range(conv_2d.shape[0]):
for c in range(conv_2d.shape[1]):
conv_2d[r,c] = np.sum(np.inner(W, X_pad[r:r+W.shape[0], c:c+W.shape[1]]))
return conv_2d
else:
pr = W.shape[0] - 1
pc = W.shape[1] - 1
conv_2d = np.zeros((X.shape[0] - W.shape[0] + 2*pr + 1,
X.shape[1] - W.shape[1] + 2*pc + 1))
X_pad = np.zeros((X.shape[0] + 2*pr, X.shape[1] + 2*pc))
X_pad[pr:pr+X.shape[0], pc:pc+X.shape[1]] = X
for r in range(conv_2d.shape[0]):
for c in range(conv_2d.shape[1]):
conv_2d[r,c] = np.sum(np.multiply(W, X_pad[r:r+W.shape[0], c:c+W.shape[1]]))
return conv_2d
This code incorrect:
for r in range(nr):
data[r,:] = np.convolve(data[r,:], H_r, 'same')
for c in range(nc):
data[:,c] = np.convolve(data[:,c], H_c, 'same')
See Nussbaumer transformation from multidimentional convolution to one dimentional.
Related
I am trying to perform a 2d convolution in python using numpy
I have a 2d array as follows with kernel H_r for the rows and H_c for the columns
data = np.zeros((nr, nc), dtype=np.float32)
#fill array with some data here then convolve
for r in range(nr):
data[r,:] = np.convolve(data[r,:], H_r, 'same')
for c in range(nc):
data[:,c] = np.convolve(data[:,c], H_c, 'same')
data = data.astype(np.uint8);
It does not produce the output that I was expecting, does this code look OK, I think the problem is with the casting from float32 to 8bit. Whats the best way to do this
Thanks
Maybe it is not the most optimized solution, but this is an implementation I used before with numpy library for Python:
def convolution2d(image, kernel, bias):
m, n = kernel.shape
if (m == n):
y, x = image.shape
y = y - m + 1
x = x - m + 1
new_image = np.zeros((y,x))
for i in range(y):
for j in range(x):
new_image[i][j] = np.sum(image[i:i+m, j:j+m]*kernel) + bias
return new_image
I hope this code helps other guys with the same doubt.
Regards.
Edit [Jan 2019]
#Tashus comment bellow is correct, and #dudemeister's answer is thus probably more on the mark. The function he suggested is also more efficient, by avoiding a direct 2D convolution and the number of operations that would entail.
Possible Problem
I believe you are doing two 1d convolutions, the first per columns and the second per rows, and replacing the results from the first with the results of the second.
Notice that numpy.convolve with the 'same' argument returns an array of equal shape to the largest one provided, so when you make the first convolution you already populated the entire data array.
One good way to visualize your arrays during these steps is to use Hinton diagrams, so you can check which elements already have a value.
Possible Solution
You can try to add the results of the two convolutions (use data[:,c] += .. instead of data[:,c] = on the second for loop), if your convolution matrix is the result of using the one dimensional H_r and H_c matrices like so:
Another way to do that would be to use scipy.signal.convolve2d with a 2d convolution array, which is probably what you wanted to do in the first place.
Since you already have your kernel separated you should simply use the sepfir2d function from scipy:
from scipy.signal import sepfir2d
convolved = sepfir2d(data, H_r, H_c)
On the other hand, the code you have there looks all right ...
I checked out many implementations and found none for my purpose, which should be really simple. So here is a dead-simple implementation with for loop
def convolution2d(image, kernel, stride, padding):
image = np.pad(image, [(padding, padding), (padding, padding)], mode='constant', constant_values=0)
kernel_height, kernel_width = kernel.shape
padded_height, padded_width = image.shape
output_height = (padded_height - kernel_height) // stride + 1
output_width = (padded_width - kernel_width) // stride + 1
new_image = np.zeros((output_height, output_width)).astype(np.float32)
for y in range(0, output_height):
for x in range(0, output_width):
new_image[y][x] = np.sum(image[y * stride:y * stride + kernel_height, x * stride:x * stride + kernel_width] * kernel).astype(np.float32)
return new_image
It might not be the most optimized solution either, but it is approximately ten times faster than the one proposed by #omotto and it only uses basic numpy function (as reshape, expand_dims, tile...) and no 'for' loops:
def gen_idx_conv1d(in_size, ker_size):
"""
Generates a list of indices. This indices correspond to the indices
of a 1D input tensor on which we would like to apply a 1D convolution.
For instance, with a 1D input array of size 5 and a kernel of size 3, the
1D convolution product will successively looks at elements of indices [0,1,2],
[1,2,3] and [2,3,4] in the input array. In this case, the function idx_conv1d(5,3)
outputs the following array: array([0,1,2,1,2,3,2,3,4]).
args:
in_size: (type: int) size of the input 1d array.
ker_size: (type: int) kernel size.
return:
idx_list: (type: np.array) list of the successive indices of the 1D input array
access to the 1D convolution algorithm.
example:
>>> gen_idx_conv1d(in_size=5, ker_size=3)
array([0, 1, 2, 1, 2, 3, 2, 3, 4])
"""
f = lambda dim1, dim2, axis: np.reshape(np.tile(np.expand_dims(np.arange(dim1),axis),dim2),-1)
out_size = in_size-ker_size+1
return f(ker_size, out_size, 0)+f(out_size, ker_size, 1)
def repeat_idx_2d(idx_list, nbof_rep, axis):
"""
Repeats an array of indices (idx_list) a number of time (nbof_rep) "along" an axis
(axis). This function helps to browse through a 2d array of size
(len(idx_list),nbof_rep).
args:
idx_list: (type: np.array or list) a 1D array of indices.
nbof_rep: (type: int) number of repetition.
axis: (type: int) axis "along" which the repetition will be applied.
return
idx_list: (type: np.array) a 1D array of indices of size len(idx_list)*nbof_rep.
example:
>>> a = np.array([0, 1, 2])
>>> repeat_idx_2d(a, 3, 0) # repeats array 'a' 3 times along 'axis' 0
array([0, 0, 0, 1, 1, 1, 2, 2, 2])
>>> repeat_idx_2d(a, 3, 1) # repeats array 'a' 3 times along 'axis' 1
array([0, 1, 2, 0, 1, 2, 0, 1, 2])
>>> b = np.reshape(np.arange(3*4), (3,4))
>>> b[repeat_idx_2d(np.arange(3), 4, 0), repeat_idx_2d(np.arange(4), 3, 1)]
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11])
"""
assert axis in [0,1], "Axis should be equal to 0 or 1."
tile_axis = (nbof_rep,1) if axis else (1,nbof_rep)
return np.reshape(np.tile(np.expand_dims(idx_list, 1),tile_axis),-1)
def conv2d(im, ker):
"""
Performs a 'valid' 2D convolution on an image. The input image may be
a 2D or a 3D array.
The output image first two dimensions will be reduced depending on the
convolution size.
The kernel may be a 2D or 3D array. If 2D, it will be applied on every
channel of the input image. If 3D, its last dimension must match the
image one.
args:
im: (type: np.array) image (2D or 3D).
ker: (type: np.array) convolution kernel (2D or 3D).
returns:
im: (type: np.array) convolved image.
example:
>>> im = np.reshape(np.arange(10*10*3),(10,10,3))/(10*10*3) # 3D image
>>> ker = np.array([[0,1,0],[-1,0,1],[0,-1,0]]) # 2D kernel
>>> conv2d(im, ker) # 3D array of shape (8,8,3)
"""
if len(im.shape)==2: # if the image is a 2D array, it is reshaped by expanding the last dimension
im = np.expand_dims(im,-1)
im_x, im_y, im_w = im.shape
if len(ker.shape)==2: # if the kernel is a 2D array, it is reshaped so it will be applied to all of the image channels
ker = np.tile(np.expand_dims(ker,-1),[1,1,im_w]) # the same kernel will be applied to all of the channels
assert ker.shape[-1]==im.shape[-1], "Kernel and image last dimension must match."
ker_x = ker.shape[0]
ker_y = ker.shape[1]
# shape of the output image
out_x = im_x - ker_x + 1
out_y = im_y - ker_y + 1
# reshapes the image to (out_x, ker_x, out_y, ker_y, im_w)
idx_list_x = gen_idx_conv1d(im_x, ker_x) # computes the indices of a 1D conv (cf. idx_conv1d doc)
idx_list_y = gen_idx_conv1d(im_y, ker_y)
idx_reshaped_x = repeat_idx_2d(idx_list_x, len(idx_list_y), 0) # repeats the previous indices to be used in 2D (cf. repeat_idx_2d doc)
idx_reshaped_y = repeat_idx_2d(idx_list_y, len(idx_list_x), 1)
im_reshaped = np.reshape(im[idx_reshaped_x, idx_reshaped_y, :], [out_x, ker_x, out_y, ker_y, im_w]) # reshapes
# reshapes the 2D kernel
ker = np.reshape(ker,[1, ker_x, 1, ker_y, im_w])
# applies the kernel to the image and reduces the dimension back to the one of original input image
return np.squeeze(np.sum(im_reshaped*ker, axis=(1,3)))
I tried to add a lot of comments to explain the method but the global idea is to reshape the 3D input image to a 5D one of shape (output_image_height, kernel_height, output_image_width, kernel_width, output_image_channel) and then to apply the kernel directly using the basic array multiplication. Of course, this methods is then using more memory (during the execution the size of the image is thus multiply by kernel_height*kernel_width) but it is faster.
To do this reshape step, I 'over-used' the indexing methods of numpy arrays, especially, the possibility of giving a numpy array as indices into a numpy array.
This methods could also be used to re-code the 2D convolution product in Pytorch or Tensorflow using the base math functions but I have no doubt in saying that it will be slower than the existing nn.conv2d operator...
I really enjoyed coding this method by only using the numpy basic tools.
One of the most obvious is to hard code the kernel.
img = img.convert('L')
a = np.array(img)
out = np.zeros([a.shape[0]-2, a.shape[1]-2], dtype='float')
out += a[:-2, :-2]
out += a[1:-1, :-2]
out += a[2:, :-2]
out += a[:-2, 1:-1]
out += a[1:-1,1:-1]
out += a[2:, 1:-1]
out += a[:-2, 2:]
out += a[1:-1, 2:]
out += a[2:, 2:]
out /= 9.0
out = out.astype('uint8')
img = Image.fromarray(out)
This example does a box blur 3x3 completely unrolled. You can multiply the values where you have a different value and divide them by a different amount. But, if you honestly want the quickest and dirtiest method this is it. I think it beats Guillaume Mougeot's method by a factor of like 5. His method beating the others by a factor of 10.
It may lose a few steps if you're doing something like a gaussian blur. and need to multiply some stuff.
Try to first round and then cast to uint8:
data = data.round().astype(np.uint8);
I wrote this convolve_stride which uses numpy.lib.stride_tricks.as_strided. Moreover it supports both strides and dilation. It is also compatible to tensor with order > 2.
import numpy as np
from numpy.lib.stride_tricks import as_strided
from im2col import im2col
def conv_view(X, F_s, dr, std):
X_s = np.array(X.shape)
F_s = np.array(F_s)
dr = np.array(dr)
Fd_s = (F_s - 1) * dr + 1
if np.any(Fd_s > X_s):
raise ValueError('(Dilated) filter size must be smaller than X')
std = np.array(std)
X_ss = np.array(X.strides)
Xn_s = (X_s - Fd_s) // std + 1
Xv_s = np.append(Xn_s, F_s)
Xv_ss = np.tile(X_ss, 2) * np.append(std, dr)
return as_strided(X, Xv_s, Xv_ss, writeable=False)
def convolve_stride(X, F, dr=None, std=None):
if dr is None:
dr = np.ones(X.ndim, dtype=int)
if std is None:
std = np.ones(X.ndim, dtype=int)
if not (X.ndim == F.ndim == len(dr) == len(std)):
raise ValueError('X.ndim, F.ndim, len(dr), len(std) must be the same')
Xv = conv_view(X, F.shape, dr, std)
return np.tensordot(Xv, F, axes=X.ndim)
%timeit -n 100 -r 10 convolve_stride(A, F)
#31.2 ms ± 1.31 ms per loop (mean ± std. dev. of 10 runs, 100 loops each)
Super simple and fast convolution using only basic numpy:
import numpy as np
def conv2d(image, kernel):
# apply kernel to image, return image of the same shape
# assume both image and kernel are 2D arrays
# kernel = np.flipud(np.fliplr(kernel)) # optionally flip the kernel
k = kernel.shape[0]
width = k//2
# place the image inside a frame to compensate for the kernel overlap
a = framed(image, width)
b = np.zeros(image.shape) # fill the output array with zeros; do not use np.empty()
# shift the image around each pixel, multiply by the corresponding kernel value and accumulate the results
for p, dp, r, dr in [(i, i + image.shape[0], j, j + image.shape[1]) for i in range(k) for j in range(k)]:
b += a[p:dp, r:dr] * kernel[p, r]
# or just write two nested for loops if you prefer
# np.clip(b, 0, 255, out=b) # optionally clip values exceeding the limits
return b
def framed(image, width):
a = np.zeros((image.shape[0]+2*width, image.shape[1]+2*width))
a[width:-width, width:-width] = image
# alternatively fill the frame with ones or copy border pixels
return a
Run it:
Image.fromarray(conv2d(image, kernel).astype('uint8'))
Instead of sliding the kernel along the image and computing the transformation pixel by pixel, create a series of shifted versions of the image corresponding to each element in the kernel and apply the corresponding kernel value to each of the shifted image versions.
This is probably the fastest you can get using just basic numpy; the speed is already comparable to C implementation of scipy convolve2d and better than fftconvolve. The idea is similar to #Tatarize. This example works only for one color component; for RGB just repeat for each (or modify the algorithm accordingly).
Typically, Convolution 2D is a misnomer. Ideally, under the hood,
whats being done is a correlation of 2 matrices.
pad == same
returns the output as the same as input dimension
It can also take asymmetric images. In order to perform correlation(convolution in deep learning lingo) on a batch of 2d matrices, one can iterate over all the channels, calculate the correlation for each of the channel slices with the respective filter slice.
For example: If image is (28,28,3) and filter size is (5,5,3) then take each of the 3 slices from the image channel and perform the cross correlation using the custom function above and stack the resulting matrix in the respective dimension of the output.
def get_cross_corr_2d(W, X, pad = 'valid'):
if(pad == 'same'):
pr = int((W.shape[0] - 1)/2)
pc = int((W.shape[1] - 1)/2)
conv_2d = np.zeros((X.shape[0], X.shape[1]))
X_pad = np.zeros((X.shape[0] + 2*pr, X.shape[1] + 2*pc))
X_pad[pr:pr+X.shape[0], pc:pc+X.shape[1]] = X
for r in range(conv_2d.shape[0]):
for c in range(conv_2d.shape[1]):
conv_2d[r,c] = np.sum(np.inner(W, X_pad[r:r+W.shape[0], c:c+W.shape[1]]))
return conv_2d
else:
pr = W.shape[0] - 1
pc = W.shape[1] - 1
conv_2d = np.zeros((X.shape[0] - W.shape[0] + 2*pr + 1,
X.shape[1] - W.shape[1] + 2*pc + 1))
X_pad = np.zeros((X.shape[0] + 2*pr, X.shape[1] + 2*pc))
X_pad[pr:pr+X.shape[0], pc:pc+X.shape[1]] = X
for r in range(conv_2d.shape[0]):
for c in range(conv_2d.shape[1]):
conv_2d[r,c] = np.sum(np.multiply(W, X_pad[r:r+W.shape[0], c:c+W.shape[1]]))
return conv_2d
This code incorrect:
for r in range(nr):
data[r,:] = np.convolve(data[r,:], H_r, 'same')
for c in range(nc):
data[:,c] = np.convolve(data[:,c], H_c, 'same')
See Nussbaumer transformation from multidimentional convolution to one dimentional.
I am translating code from MATLAB to python but cannot perfectly replicate the results of MATLAB's imresize3. My input is a 101x101x101 array. First four inputs ([0,0:3,0] or (1,1:4,1)) are: 0.3819 0.4033 0.4336 0.2767. The data input for both languages is identical.
sampleQDNormSmall = imresize3(sampleQDNorm,0.5);
This results in a 51x51x51 array where the first four values (1,1:4,1) for example are: 0.3443 0.2646 0.2700 0.2835
Now I've tried two different pieces of code in python to replicate these results:
from skimage.transform import resize
from skimage.transform import rescale
sampleQDNormSmall = resize(sampleQDNorm,(0.5*sampleQDNorm.shape[0],0.5*sampleQDNorm.shape[1],0.5*sampleQDNorm.shape[2]),order=3,anti_aliasing=True);
sampleQDNormSmall1=rescale(sampleQDNorm,0.5,order=3,anti_aliasing=True)
The first one gives a 51x51x51 array that has the first four values [0,0:3,0] of: 0.3452 0.2669 0.2774 0.3099. Which is very close but not exactly the same numerical outputs. I don't know enough about the optional arguments to know might get me a better result.
The second one gives a 50x50x50 array that has the first four values [0,0:3,0] of: 0.3422 0.2623 0.2810 0.3006. This is a different output array size and also doesn't reproduce the same numerical outputs as the MATLAB code or the other python function
I don't know enough about the optional arguments to know might get me a better result. I know for this type of array, MATLAB's default is cubic interpolation which is why I am using order 3 in python. The default for anti-aliasing in both is true. I have a two bigger arrays that I am having the same issues with: a (873x873x873) array and a bool (873x873x873) array.
The MATLAB code I'm using is considered the "correct answer" for the work I am doing so I am trying to replicate the results as accurately as possible into python. Please let me know what I can try in python to reproduce the correct data.
sampleQDNorm is roughly random decimals between 0 and 1 for [0:100,0:100,0:100] and is padded with zeros on sides [:,:,101],[:,101,:],[101,:,:]
Getting the exact same result as MATLAB imresize3 is challenging.
One reason is that MATLAB enables Antialiasing filter by default, and I can't seem to find the equivalent Python implementation.
The closet existing Python alternatives are described in this post.
scipy.ndimage.zoom supports 3D resizing.
It could be that skimage.transform.resize gives closer result, but none are identical to MATLAB result.
Reimplementing imresize3:
Looking at the MATLAB implementation of imresize3 (MATLAB source code), it is apparent that MATLAB implementation "simply" uses resize along each axis:
Resize (by half) along the vertical axis.
Resize the above result (by half) along the horizontal axis.
Resize the above result (by half) along the depth axis.
Here is a MATLAB codes sample that demonstrates the implementation (using cubic interpolation):
I1 = imread('peppers.png');
I2 = imresize(imread('autumn.tif'), [size(I1, 1), size(I1, 2)]);
I3 = imresize(imread('football.jpg'), [size(I1, 1), size(I1, 2)]);
I4 = imresize(imread('cameraman.tif'), [size(I1, 1), size(I1, 2)]);
I = cat(3, I1, I2, I3, I4);
J = imresize3(I, 0.5, 'cubic', 'Antialiasing', false);
imwrite(I1, '/Tmp/I1.png');
imwrite(I2, '/Tmp/I2.png');
imwrite(I3, '/Tmp/I3.png');
imwrite(I4, '/Tmp/I4.png');
imwrite(J(:,:,1), '/Tmp/J1.png');
imwrite(J(:,:,2), '/Tmp/J2.png');
imwrite(J(:,:,3), '/Tmp/J3.png');
imwrite(J(:,:,4), '/Tmp/J4.png');
imwrite(J(:,:,5), '/Tmp/J5.png');
K = cubicResize3(I, 0.5);
max_abs_diff = max(abs(double(J(:)) - double(K(:))));
disp(['max_abs_diff = ', num2str(max_abs_diff)])
function B = cubicResize3(A, scale)
order = [1 2 3];
B = A;
for k = 1:numel(order)
dim = order(k);
B = cubicResizeAlongDim(B, dim, scale);
end
end
function out = cubicResizeAlongDim(in, dim, scale)
% If dim is 3, permute the input matrix so that the third dimension
% becomes the first dimension. This way, we can resize along the
% third dimensions as though we were resizing along the first dimension.
isThirdDimResize = (dim == 3);
if isThirdDimResize
in = permute(in, [3 2 1]);
dim = 1;
end
if dim == 1
out_rows = round(size(in, 1)*scale);
out_cols = size(in, 2);
else % dim == 2
out_rows = size(in, 1);
out_cols = round(size(in,2)*scale);
end
out = zeros(out_rows, out_cols, size(in, 3), class(in)); % Allocate array for storing the output.
for i = 1:size(in, 3)
% Resize each color plane separately
out(:, :, i) = imresize(in(:, :, i), [out_rows, out_cols], 'bicubic', 'Antialiasing', false);
end
% Permute back so that the original dimensions are restored if we were
% resizing along the third dimension.
if isThirdDimResize
out = permute(out, [3 2 1]);
end
end
The result is max_abs_diff = 0, meaning that cubicResize3 and imresize3 gave the same output.
Note:
The above implementation stores images in Tmp folder to be used a input for testing Python implementation.
Here is a Python implementation using OpenCV:
import numpy as np
import cv2
#from scipy.ndimage import zoom
def cubic_resize_along_dim(inp, dim, scale):
""" Implementation is based on MATLAB source code of resizeAlongDim function """
# If dim is 3, permute the input matrix so that the third dimension
# becomes the first dimension. This way, we can resize along the
# third dimensions as though we were resizing along the first dimension.
is_third_dim_resize = (dim == 2)
if is_third_dim_resize:
inp = np.swapaxes(inp, 2, 0).copy() # in = permute(in, [3 2 1])
dim = 0
if dim == 0:
out_rows = int(np.round(inp.shape[0]*scale)) # out_rows = round(size(in, 1)*scale);
out_cols = inp.shape[1] # out_cols = size(in, 2);
else: # dim == 1
out_rows = inp.shape[0] # out_rows = size(in, 1);
out_cols = int(np.round(inp.shape[1]*scale)) # out_cols = round(size(in,2)*scale);
out = np.zeros((out_rows, out_cols, inp.shape[2]), inp.dtype) # out = zeros(out_rows, out_cols, size(in, 3), class(in)); % Allocate array for storing the output.
for i in range(inp.shape[2]):
# Resize each color plane separately
out[:, :, i] = cv2.resize(inp[:, :, i], (out_cols, out_rows), interpolation=cv2.INTER_CUBIC) # out(:, :, i) = imresize(inp(:, :, i), [out_rows, out_cols], 'bicubic', 'Antialiasing', false);
# Permute back so that the original dimensions are restored if we were
# resizing along the third dimension.
if is_third_dim_resize:
out = np.swapaxes(out, 2, 0) # out = permute(out, [3 2 1]);
return out
def cubic_resize3(a, scale):
b = a.copy()
for k in range(3):
b = cubic_resize_along_dim(b, k, scale)
return b
# Build 3D input image (10 channels with resolution 512x384).
i1 = cv2.cvtColor(cv2.imread('/Tmp/I1.png', cv2.IMREAD_UNCHANGED), cv2.COLOR_BGR2RGB)
i2 = cv2.cvtColor(cv2.imread('/Tmp/I2.png', cv2.IMREAD_UNCHANGED), cv2.COLOR_BGR2RGB)
i3 = cv2.cvtColor(cv2.imread('/Tmp/I3.png', cv2.IMREAD_UNCHANGED), cv2.COLOR_BGR2RGB)
i4 = cv2.imread('/Tmp/I4.png', cv2.IMREAD_UNCHANGED)
im = np.dstack((i1, i2, i3, i4)) # Stack arrays along the third axis
# Read and adjust MATLAB output (out_mat is used as reference for testing).
# out_mat is the result of J = imresize3(I, 0.5, 'cubic', 'Antialiasing', false);
j1 = cv2.imread('/Tmp/J1.png', cv2.IMREAD_UNCHANGED)
j2 = cv2.imread('/Tmp/J2.png', cv2.IMREAD_UNCHANGED)
j3 = cv2.imread('/Tmp/J3.png', cv2.IMREAD_UNCHANGED)
j4 = cv2.imread('/Tmp/J4.png', cv2.IMREAD_UNCHANGED)
j5 = cv2.imread('/Tmp/J5.png', cv2.IMREAD_UNCHANGED)
out_mat = np.dstack((j1, j2, j3, j4, j5)) # Stack arrays along the third axis
#out_py = zoom(im, 0.5, order=3, mode='reflect')
# Execute 3D resize in Python
out_py = cubic_resize3(im, 0.5)
abs_diff = np.absolute(out_mat.astype(np.int16) - out_py.astype(np.int16))
print(f'max_abs_diff = {abs_diff.max()}')
The Python implementation reads the input files stored by MATLAB (and convert from BGR to RGB when required).
The implementation compares the result of cubic_resize3 with the MATLAB output of imresize3.
The maximum difference is 12 (not zero).
Apparently cv2.resize and MATLAB imresize gives slightly different results.
Update:
Replacing:
out[:, :, i] = cv2.resize(inp[:, :, i], (out_cols, out_rows), interpolation=cv2.INTER_CUBIC)
with:
out[:, :, i] = transform.resize(inp[:, :, i], (out_rows, out_cols), order=3, mode='edge', anti_aliasing=False, preserve_range=True)
Reduces the maximum difference to 4.
I have a numpy array of 300x300 where I want to keep all elements periodically. Specifically, for both axes I want to keep the first 5 elements, then discard 15, keep 5, discard 15, etc. This should result in an array of 75x75 elements. How can this be done?
You can created a 1D mask, that carries out the keep/discard function, and then repeat the mask and apply the mask to the array. Here is an example.
import numpy as np
size = 300
array = np.arange(size).reshape((size, 1)) * np.arange(size).reshape((1, size))
mask = np.concatenate((np.ones(5), np.zeros(15))).astype(bool)
period = len(mask)
mask = np.repeat(mask.reshape((1, period)), repeats=size // period, axis=0)
mask = np.concatenate(mask, axis=0)
result = array[mask][:, mask]
print(result.shape)
You can view the array as series of 20x20 blocks, of which you want to keep the upper-left 5x5 portion. Let's say you have
keep = 5
discard = 15
This only works if
assert all(s % (keep + discard) == 0 for s in arr.shape)
First compute the shape of the view and use it:
block = keep + discard
shape1 = (arr.shape[0] // block, block, arr.shape[1] // block, block)
view = arr.reshape(shape1)[:, :keep, :, :keep]
The following operation will create a copy of the data because the view creates a non-contiguous buffer:
shape2 = (shape1[0] * keep, shape1[2] * keep)
result = view.reshape(shape2)
You can compute shape1 and shape2 in a more general manner with something like
shape1 = tuple(
np.stack((np.array(arr.shape) // block,
np.full(arr.ndim, block)), -1).ravel())
shape2 = tuple(np.array(shape1[::2]) * keep)
I would recommend packaging this into a function.
Here is my first thought of a solution. Will update later if I think of one with fewer lines. This should work even if the input is not square:
output = []
for i in range(len(arr)):
tmp = []
if i % (15+5) < 5: # keep first 5, then discard next 15
for j in range(len(arr[i])):
if j % (15+5) < 5: # keep first 5, then discard next 15
tmp.append(arr[i,j])
output.append(tmp)
Update:
Building off of Yang's answer, here is another way which uses np.tile, which repeats an array a given number of times along each axis. This relies on the input array being square in dimension.
import numpy as np
# Define one instance of the keep/discard box
keep, discard = 5, 15
mask = np.concatenate([np.ones(keep), np.zeros(discard)])
mask_2d = mask.reshape((keep+discard,1)) * mask.reshape((1,keep+discard))
# Tile it out -- overshoot, then trim to match size
count = len(arr)//len(mask_2d) + 1
tiled = np.tile(mask_2d, [count,count]).astype('bool')
tiled = tiled[:len(arr), :len(arr)]
# Apply the mask to the input array
dim = sum(tiled[0])
output = arr[tiled].reshape((dim,dim))
Another option using meshgrid and a modulo:
# MyArray = 300x300 numpy array
r = np.r_[0:300] # A slide from 0->300
xv, yv = np.meshgrid(r, r) # x and y grid
mask = ((xv%20)<5) & ((yv%20)<5) # We create the boolean mask
result = MyArray[mask].reshape((75,75)) # We apply the mask and reshape the final output
So I need a ND convolutional layer that also supports complex numbers. So I decided to code it myself.
I tested this code on numpy alone and it worked. Tested with several channels, 2D and 1D and complex. However, I have problems when I do it on TF.
This is my code so far:
def call(self, inputs):
with tf.name_scope("ComplexConvolution_" + str(self.layer_number)) as scope:
inputs = self._verify_inputs(inputs) # Check inputs are of expected shape and format
inputs = self.apply_padding(inputs) # Add zeros if needed
output_np = np.zeros( # I use np because tf does not support the assigment
(inputs.shape[0],) + # Per each image
self.output_size, # Image out size
dtype=self.input_dtype # To support complex numbers
)
img_index = 0
for image in inputs:
for filter_index in range(self.filters):
for i in range(int(np.prod(self.output_size[:-1]))): # for each element in the output
index = np.unravel_index(i, self.output_size[:-1])
start_index = tuple([a * b for a, b in zip(index, self.stride_shape)])
end_index = tuple([a+b for a, b in zip(start_index, self.kernel_shape)])
# set_trace()
sector_slice = tuple(
[slice(start_index[ind], end_index[ind]) for ind in range(len(start_index))]
)
sector = image[sector_slice]
new_value = tf.reduce_sum(sector * self.kernels[filter_index]) + self.bias[filter_index]
# I use Tied Bias https://datascience.stackexchange.com/a/37748/75968
output_np[img_index][index][filter_index] = new_value # The complicated line
img_index += 1
output = apply_activation(self.activation, output_np)
return output
input_size is a tuple of shape (dim1, dim2, ..., dim3, channels). An 2D rgb conv for example will be (32, 32, 3) and inputs will have shape (None, 32, 32, 3).
The output size is calculated from an equation I found in this paper: A guide to convolution arithmetic for deep learning
out_list = []
for i in range(len(self.input_size) - 1): # -1 because the number of input channels is irrelevant
out_list.append(int(np.floor((self.input_size[i] + 2 * self.padding_shape[i] - self.kernel_shape[i]) / self.stride_shape[i]) + 1))
out_list.append(self.filters)
Basically, I use np.zeros because if I use tf.zeros I cannot assign the new_value and I get:
TypeError: 'Tensor' object does not support item assignment
However, in this current state I am getting:
NotImplementedError: Cannot convert a symbolic Tensor (placeholder_1:0) to a numpy array.
On that same assignment. I don't see an easy fix, I think I should change the strategy of the code completely.
In the end, I did it in a very inefficient way based in this comment, also commented here but at least it works:
new_value = tf.reduce_sum(sector * self.kernels[filter_index]) + self.bias[filter_index]
indices = (img_index,) + index + (filter_index,)
mask = tf.Variable(tf.fill(output_np.shape, 1))
mask = mask[indices].assign(0)
mask = tf.cast(mask, dtype=self.input_dtype)
output_np = array * mask + (1 - mask) * new_value
I say inefficient because I create a whole new array for each assignment. My code is taking ages to compute for the moment so I will keep looking for improvements and post here if I get something better.
Basic idea: I have an array of images images=np.array([10, 28, 28, 3]). So 10 images 28x28 pixels with 3 colour channels. I want to stitch them together in one long line: single_image.shape # [280, 28, 3]. What would be the best numpy based function for that?
More generally: is there a function along the lines of stitch(array, source_axis=0, target_axis=1) that would transform an array A.shape # [a0, a1, source_axis, a4, target_axis, a6] into a shape B.shape # [a0, a1, a4, target_axis*source_axis, a6] by concatenating subarrays A[:,:,i,:,:,:] along axis=target_axis
You can set it up with a single moveaxis + reshape combo -
def merge_axis(array, source_axis=0, target_axis=1):
shp = a.shape
L = shp[source_axis]*shp[target_axis] # merged axis len
out_shp = np.insert(np.delete(shp,(source_axis,target_axis)),target_axis-1,L)
return np.moveaxis(a,source_axis,target_axis-1).reshape(out_shp)
Alternatively, out_shp could be setup with array manipulations and might be easier to follow, like so -
shp = np.array(a.shape)
shp[target_axis] *= shp[source_axis]
out_shp = np.delete(shp,source_axis)
If source and target axes are adjacent ones, we can skip moveaxis and simply reshape and the additional benefit would be that the output would be a view into the input and hence virtually free on runtime. So, we will introduce a If-conditional to check and modify our implementations to something like these -
def merge_axis_v1(array, source_axis=0, target_axis=1):
shp = a.shape
L = shp[source_axis]*shp[target_axis] # merged_axis_len
out_shp = np.insert(np.delete(shp,(source_axis,target_axis)),target_axis-1,L)
if target_axis==source_axis+1:
return a.reshape(out_shp)
else:
return np.moveaxis(a,source_axis,target_axis-1).reshape(out_shp)
def merge_axis_v2(array, source_axis=0, target_axis=1):
shp = np.array(a.shape)
shp[target_axis] *= shp[source_axis]
out_shp = np.delete(shp,source_axis)
if target_axis==source_axis+1:
return a.reshape(out_shp)
else:
return np.moveaxis(a,source_axis,target_axis-1).reshape(out_shp)
Verify views -
In [156]: a = np.random.rand(10,10,10,10,10)
In [157]: np.shares_memory(merge_axis_v1(a, source_axis=0, target_axis=1),a)
Out[157]: True
Here is my take:
def merge_axis(array, source_axis=0, target_axis=1):
array = np.moveaxis(array, source_axis, 0)
array = np.moveaxis(array, target_axis, 1)
array = np.concatenate(array)
array = np.moveaxis(array, 0, target_axis-1)
return array