Following up on this answer by jorgeca:
def patchify(img, patch_shape):
img = np.ascontiguousarray(img) # won't make a copy if not needed
X, Y = img.shape
x, y = patch_shape
shape = ((X-x+1), (Y-y+1), x, y) # number of patches, patch_shape
# The right strides can be thought by:
# 1) Thinking of `img` as a chunk of memory in C order
# 2) Asking how many items through that chunk of memory are needed when indices
# i,j,k,l are incremented by one
strides = img.itemsize*np.array([Y, 1, Y, 1])
return np.lib.stride_tricks.as_strided(img, shape=shape, strides=strides)
How can those overlapping arrays be merged back again to the original image?
Approach #1
Here's one approach after converting the 4D array of patches into 2D and then simply slicing and stacking the leftover rows and columns -
def unpatchify(img_patches, block_size):
B0, B1 = block_size
N = np.prod(img_patches.shape[1::2])
patches2D = img_patches.transpose(0,2,1,3).reshape(-1,N)
m,n = patches2D.shape
row_mask = np.zeros(m,dtype=bool)
col_mask = np.zeros(n,dtype=bool)
row_mask[::B0]= 1
col_mask[::B1]= 1
row_mask[-B0:] = 1
col_mask[-B1:] = 1
return patches2D[np.ix_(row_mask, col_mask)]
Sample run -
In [233]: img = np.random.randint(0,255,(16,25))
...: block_size = (4,8)
...:
In [234]: np.allclose(img, unpatchify(patchify(img, block_size), block_size))
Out[234]: True
Approach #2
In the previous approach, use of transpose on the big 4D array would force a copy and as such that transpose operation might prove costly. To avoid that, here's another approach making heavy usage of slicing -
def unpatchify_v2(img_patches, block_size):
B0, B1 = block_size
m,n,r,q = img_patches.shape
shp = m + r - 1, n + q - 1
p1 = img_patches[::B0,::B1].swapaxes(1,2)
p1 = p1.reshape(-1,p1.shape[2]*p1.shape[3])
p2 = img_patches[:,-1,0,:]
p3 = img_patches[-1,:,:,0].T
p4 = img_patches[-1,-1]
out = np.zeros(shp,dtype=img_patches.dtype)
out[:p1.shape[0],:p1.shape[1]] = p1
out[:p2.shape[0],-p2.shape[1]:] = p2
out[-p3.shape[0]:,:p3.shape[1]] = p3
out[-p4.shape[0]:,-p4.shape[1]:] = p4
return out
Runtime test
In [16]: img = np.random.randint(0,255,(1024,1024))
...: block_size = (3,3)
...: img_patches = patchify(img, block_size)
...:
In [17]: %timeit unpatchify(img_patches, block_size)
...: %timeit unpatchify_v2(img_patches, block_size)
10 loops, best of 3: 22.9 ms per loop
100 loops, best of 3: 2.25 ms per loop
In [18]: img = np.random.randint(0,255,(1024,1024))
...: block_size = (8,8)
...: img_patches = patchify(img, block_size)
...:
In [19]: %timeit unpatchify(img_patches, block_size)
...: %timeit unpatchify_v2(img_patches, block_size)
...:
10 loops, best of 3: 114 ms per loop
1000 loops, best of 3: 1.5 ms per loop
I'd like to speed up the following calculations handling r rays and n spheres. Here is what I got so far:
# shape of mu1 and mu2 is (r, n)
# shape of rays is (r, 3)
# note that intersections has 2n columns because for every sphere one can
# get up to two intersections (secant, tangent, no intersection)
intersections = np.empty((r, 2*n, 3))
for col in range(n):
intersections[:, col, :] = rays * mu1[:, col][:, np.newaxis]
intersections[:, col + n, :] = rays * mu2[:, col][:, np.newaxis]
# [...]
# calculate euclidean distance from the center of gravity (0,0,0)
distances = np.empty((r, 2 * n))
for col in range(n):
distances[:, col] = np.linalg.norm(intersections[:, col], axis=1)
distances[:, col + n] = np.linalg.norm(intersections[:, col + n], axis=1)
I tried speeding things up by avoiding the for-Loops, but couldn't figure out how to broadcast the arrays properly so that I only need a single function call. Any help is much appreciated.
Here's a vectorized way using broadcasting -
intersections = np.hstack((mu1,mu2))[...,None]*rays[:,None,:]
distances = np.sqrt((intersections**2).sum(2))
The last step could be replaced with an use of np.einsum like so -
distances = np.sqrt(np.einsum('ijk,ijk->ij',intersections,intersections))
Or replace almost the whole thing with np.einsum for another vectorized way, like so -
mu = np.hstack((mu1,mu2))
distances = np.sqrt(np.einsum('ij,ij,ik,ik->ij',mu,mu,rays,rays))
Runtime tests and verify outputs -
def original_app(mu1,mu2,rays):
intersections = np.empty((r, 2*n, 3))
for col in range(n):
intersections[:, col, :] = rays * mu1[:, col][:, np.newaxis]
intersections[:, col + n, :] = rays * mu2[:, col][:, np.newaxis]
distances = np.empty((r, 2 * n))
for col in range(n):
distances[:, col] = np.linalg.norm(intersections[:, col], axis=1)
distances[:, col + n] = np.linalg.norm(intersections[:, col + n], axis=1)
return distances
def vectorized_app1(mu1,mu2,rays):
intersections = np.hstack((mu1,mu2))[...,None]*rays[:,None,:]
return np.sqrt((intersections**2).sum(2))
def vectorized_app2(mu1,mu2,rays):
intersections = np.hstack((mu1,mu2))[...,None]*rays[:,None,:]
return np.sqrt(np.einsum('ijk,ijk->ij',intersections,intersections))
def vectorized_app3(mu1,mu2,rays):
mu = np.hstack((mu1,mu2))
return np.sqrt(np.einsum('ij,ij,ik,ik->ij',mu,mu,rays,rays))
Timings -
In [101]: # Inputs
...: r = 1000
...: n = 1000
...: mu1 = np.random.rand(r, n)
...: mu2 = np.random.rand(r, n)
...: rays = np.random.rand(r, 3)
In [102]: np.allclose(original_app(mu1,mu2,rays),vectorized_app1(mu1,mu2,rays))
Out[102]: True
In [103]: np.allclose(original_app(mu1,mu2,rays),vectorized_app2(mu1,mu2,rays))
Out[103]: True
In [104]: np.allclose(original_app(mu1,mu2,rays),vectorized_app3(mu1,mu2,rays))
Out[104]: True
In [105]: %timeit original_app(mu1,mu2,rays)
...: %timeit vectorized_app1(mu1,mu2,rays)
...: %timeit vectorized_app2(mu1,mu2,rays)
...: %timeit vectorized_app3(mu1,mu2,rays)
...:
1 loops, best of 3: 306 ms per loop
1 loops, best of 3: 215 ms per loop
10 loops, best of 3: 140 ms per loop
10 loops, best of 3: 136 ms per loop
I want to invert a matrix without using numpy.linalg.inv.
The reason is that I am using Numba to speed up the code, but numpy.linalg.inv is not supported, so I am wondering if I can invert a matrix with 'classic' Python code.
With numpy.linalg.inv an example code would look like that:
import numpy as np
M = np.array([[1,0,0],[0,1,0],[0,0,1]])
Minv = np.linalg.inv(M)
Here is a more elegant and scalable solution, imo. It'll work for any nxn matrix and you may find use for the other methods. Note that getMatrixInverse(m) takes in an array of arrays as input. Please feel free to ask any questions.
def transposeMatrix(m):
return map(list,zip(*m))
def getMatrixMinor(m,i,j):
return [row[:j] + row[j+1:] for row in (m[:i]+m[i+1:])]
def getMatrixDeternminant(m):
#base case for 2x2 matrix
if len(m) == 2:
return m[0][0]*m[1][1]-m[0][1]*m[1][0]
determinant = 0
for c in range(len(m)):
determinant += ((-1)**c)*m[0][c]*getMatrixDeternminant(getMatrixMinor(m,0,c))
return determinant
def getMatrixInverse(m):
determinant = getMatrixDeternminant(m)
#special case for 2x2 matrix:
if len(m) == 2:
return [[m[1][1]/determinant, -1*m[0][1]/determinant],
[-1*m[1][0]/determinant, m[0][0]/determinant]]
#find matrix of cofactors
cofactors = []
for r in range(len(m)):
cofactorRow = []
for c in range(len(m)):
minor = getMatrixMinor(m,r,c)
cofactorRow.append(((-1)**(r+c)) * getMatrixDeternminant(minor))
cofactors.append(cofactorRow)
cofactors = transposeMatrix(cofactors)
for r in range(len(cofactors)):
for c in range(len(cofactors)):
cofactors[r][c] = cofactors[r][c]/determinant
return cofactors
Here is another way, using gaussian elimination instead:
def eliminate(r1, r2, col, target=0):
fac = (r2[col]-target) / r1[col]
for i in range(len(r2)):
r2[i] -= fac * r1[i]
def gauss(a):
for i in range(len(a)):
if a[i][i] == 0:
for j in range(i+1, len(a)):
if a[i][j] != 0:
a[i], a[j] = a[j], a[i]
break
else:
raise ValueError("Matrix is not invertible")
for j in range(i+1, len(a)):
eliminate(a[i], a[j], i)
for i in range(len(a)-1, -1, -1):
for j in range(i-1, -1, -1):
eliminate(a[i], a[j], i)
for i in range(len(a)):
eliminate(a[i], a[i], i, target=1)
return a
def inverse(a):
tmp = [[] for _ in a]
for i,row in enumerate(a):
assert len(row) == len(a)
tmp[i].extend(row + [0]*i + [1] + [0]*(len(a)-i-1))
gauss(tmp)
ret = []
for i in range(len(tmp)):
ret.append(tmp[i][len(tmp[i])//2:])
return ret
As of at least July 16, 2018 Numba has a fast matrix inverse. (You can see how they overload the standard NumPy inverse and other operations here.)
Here are the results of my benchmarking:
import numpy as np
from scipy import linalg as sla
from scipy import linalg as nla
import numba
def gen_ex(d0):
x = np.random.randn(d0,d0)
return x.T + x
#numba.jit
def inv_nla_jit(A):
return np.linalg.inv(A)
#numba.jit
def inv_sla_jit(A):
return sla.inv(A)
For small matrices it is particularly fast:
ex1 = gen_ex(4)
%timeit inv_nla_jit(ex1) # NumPy + Numba
%timeit inv_sla_jit(ex1) # SciPy + Numba
%timeit nla.inv(ex1) # NumPy
%timeit sla.inv(ex1) # SciPy
[Out]
2.54 µs ± 467 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
67.3 µs ± 9.18 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
63.5 µs ± 7.65 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
56.6 µs ± 5.03 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
Notice that the speedup only works for NumPy inverse, not SciPy (as expected).
Slightly larger matrix:
ex2 = gen_ex(40)
%timeit inv_nla_jit(ex2) # NumPy + Numba
%timeit inv_sla_jit(ex2) # SciPy + Numba
%timeit nla.inv(ex2) # NumPy
%timeit sla.inv(ex2) # SciPy
[Out]
131 µs ± 12.9 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
278 µs ± 26.2 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
231 µs ± 24.5 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
189 µs ± 11.2 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
So there's still a speedup here but SciPy is catching up.
For a 4 x 4 matrix it's probably just about OK to use the mathematical formula, which you can find using Googling "formula for 4 by 4 matrix inverse". For example here (I can't vouch for its accuracy):
http://www.cg.info.hiroshima-cu.ac.jp/~miyazaki/knowledge/teche23.html
In general inverting a general matrix is not for the faint-hearted. You have to be aware of all the mathematically difficult cases and know why they won't apply to your usage, and catch them when you are supplied with mathematically pathological inputs (that, or return results of low accuracy or numerical garbage in the knowledge that it won't matter in your usage case provided you don't actually end up dividing by zero or overflowing MAXFLOAT ... which you might catch with an exception handler and present as "Error: matrix is singular or very close thereto").
It's generally better as a programmer to use library code written by numerical mathematics experts, unless you are willing to spend time understanding the physical and mathematical nature of the particular problem that you are addressing and become your own mathematics expert in your own specialist field.
I found that Gaussian Jordan Elimination Algorithm helped a lot when attempting this. If you're going to use a given matrix (any size, i.e 5x5) where the hardcore formula for it is 49 pages long. It's best to use this. To inverse a matrix place it as a 2D array and then run the Inverse function
# Python test Guassion Jordan Elimination
# Inputs are 2D array not matrix
Test_Array = [[3,3,2,1,1],[2,1,3,2,3],[1,3,3,2,2],[2,3,3,1,1],
[3,1,2,1,2]]
# Creating storage & initalizing for augmented matrix
# this is the same as the np.zeros((n,2*n)) function
def nx2n(n_Rows, n_Columns):
Zeros = []
for i in range(n_Rows):
Zeros.append([])
for j in range(n_Columns*2):
Zeros[i].append(0)
return Zeros
# Applying matrix coefficients
def update(inputs, n_Rows, n_Columns, Zero):
for i in range(n_Rows):
for j in range(n_Columns):
Zero[i][j] = inputs[i][j]
return Zero
# Augmenting Identity Matrix of Order n
def identity(n_Rows, n_Columns, Matrix):
for i in range(n_Rows):
for j in range(n_Columns):
if i == j:
Matrix[i][j+n_Columns] = 1
return Matrix
# Applying & implementing the GJE algorithm
def Gussain_Jordan_Elimination(n_Rows, n_Columns, Matrix):
for i in range(n_Rows):
if Matrix[i][i] == 0:
print('error cannot divide by "0"')
for j in range(n_Columns):
if i != j:
ratio = Matrix[j][i]/Matrix[i][i]
for k in range(2*n_Columns):
Matrix[j][k] = Matrix[j][k] - ratio * Matrix[i][k]
return Matrix
# Row Operation to make Principal Diagonal Element to '1'
def row_op(n_Rows, n_Columns, Matrix):
for i in range(n_Rows):
divide = Matrix[i][i]
for j in range(2*n_Columns):
Matrix[i][j] = Matrix[i][j]/divide
return Matrix
# Display Inversed Matix
def Inverse(Matrix):
returnable = []
number_Rows = int(len(Matrix))
number_Columns = int(len(Matrix[0]))
Inversed_Matrix = (row_op(number_Rows, number_Columns,
Gussain_Jordan_Elimination(number_Rows, number_Columns,
identity(number_Rows, number_Columns,
update(Matrix, number_Rows, number_Columns,
nx2n(number_Rows, number_Columns))))))
for i in range(number_Rows):
returnable.append([])
for j in range(number_Columns, 2*number_Columns):
returnable[i].append(Inversed_Matrix[i][j])
return returnable
print(Inverse(Test_Array))
Simply add all methods
import math
def getMinorIndex(matrixLocal, x, y):
minor = []
for i in range(3):
minorRow = []
if i == x:
continue
for j in range(3):
if j == y:
continue
minorRow.append(matrixLocal[i][j])
minor.append(minorRow)
return minor
def getDeterminant2By2(matrixLocal):
determinant = matrixLocal[0][0] * matrixLocal[1][1] - matrixLocal[0][1] * matrixLocal[1][0]
return determinant
def getDeterminant(matrixLocal):
determinant = 0
for x in range(3):
t = getDeterminant2By2(getMinorIndex(matrixLocal, 0, x))
e = matrixLocal[0][x]
determinant += (t * e * math.pow(-1, x))
return determinant
def getCofactorMatrix(matrixLocal):
cofactorMatrix = []
for i in range(3):
row = []
for j in range(3):
e = matrixLocal[i][j]
t = getDeterminant2By2(getMinorIndex(matrixLocal, i, j))
row.append(t * math.pow(-1, i + j))
cofactorMatrix.append(row)
return cofactorMatrix
def transpose(matrixLocal):
transposeMatrix = []
for i in range(3):
row = []
for j in range(3):
e = matrixLocal[j][i]
row.append(e)
transposeMatrix.append(row)
return transposeMatrix
def divideMatrix(matrixLocal, divisor):
ansMatrix = []
for i in range(3):
row = []
for j in range(3):
e = matrixLocal[i][j]/divisor
row.append(e)
ansMatrix.append(row)
return ansMatrix
cofactor = getCofactorMatrix(matrix)
adjoint = transpose(cofactor)
det = getDeterminant(matrix)
inverse = divideMatrix(adjoint, det)
inverse
Inverse matrix of 3x3 without numpy [python3]
import pprint
def inverse_3X3_matrix():
I_Q_list = [[0, 1, 1],
[2, 3, -1],
[-1, 2, 1]]
det_ = I_Q_list[0][0] * (
(I_Q_list[1][1] * I_Q_list[2][2]) - (I_Q_list[1][2] * I_Q_list[2][1])) - \
I_Q_list[0][1] * (
(I_Q_list[1][0] * I_Q_list[2][2]) - (I_Q_list[1][2] * I_Q_list[2][0])) + \
I_Q_list[0][2] * (
(I_Q_list[1][0] * I_Q_list[2][1]) - (I_Q_list[1][1] * I_Q_list[2][0]))
co_fctr_1 = [(I_Q_list[1][1] * I_Q_list[2][2]) - (I_Q_list[1][2] * I_Q_list[2][1]),
-((I_Q_list[1][0] * I_Q_list[2][2]) - (I_Q_list[1][2] * I_Q_list[2][0])),
(I_Q_list[1][0] * I_Q_list[2][1]) - (I_Q_list[1][1] * I_Q_list[2][0])]
co_fctr_2 = [-((I_Q_list[0][1] * I_Q_list[2][2]) - (I_Q_list[0][2] * I_Q_list[2][1])),
(I_Q_list[0][0] * I_Q_list[2][2]) - (I_Q_list[0][2] * I_Q_list[2][0]),
-((I_Q_list[0][0] * I_Q_list[2][1]) - (I_Q_list[0][1] * I_Q_list[2][0]))]
co_fctr_3 = [(I_Q_list[0][1] * I_Q_list[1][2]) - (I_Q_list[0][2] * I_Q_list[1][1]),
-((I_Q_list[0][0] * I_Q_list[1][2]) - (I_Q_list[0][2] * I_Q_list[1][0])),
(I_Q_list[0][0] * I_Q_list[1][1]) - (I_Q_list[0][1] * I_Q_list[1][0])]
inv_list = [[1 / det_ * (co_fctr_1[0]), 1 / det_ * (co_fctr_2[0]), 1 / det_ * (co_fctr_3[0])],
[1 / det_ * (co_fctr_1[1]), 1 / det_ * (co_fctr_2[1]), 1 / det_ * (co_fctr_3[1])],
[1 / det_ * (co_fctr_1[2]), 1 / det_ * (co_fctr_2[2]), 1 / det_ * (co_fctr_3[2])]]
pprint.pprint(inv_list)
inverse_3X3_matrix()
I used the formula from http://cg.info.hiroshima-cu.ac.jp/~miyazaki/knowledge/teche23.html to write the function that does the inversion of a 4x4 matrix:
import numpy as np
def myInverse(A):
detA = np.linalg.det(A)
b00 = A[1,1]*A[2,2]*A[3,3] + A[1,2]*A[2,3]*A[3,1] + A[1,3]*A[2,1]*A[3,2] - A[1,1]*A[2,3]*A[3,2] - A[1,2]*A[2,1]*A[3,3] - A[1,3]*A[2,2]*A[3,1]
b01 = A[0,1]*A[2,3]*A[3,2] + A[0,2]*A[2,1]*A[3,3] + A[0,3]*A[2,2]*A[3,1] - A[0,1]*A[2,2]*A[3,3] - A[0,2]*A[2,3]*A[3,1] - A[0,3]*A[2,1]*A[3,2]
b02 = A[0,1]*A[1,2]*A[3,3] + A[0,2]*A[1,3]*A[3,1] + A[0,3]*A[1,1]*A[3,2] - A[0,1]*A[1,3]*A[3,2] - A[0,2]*A[1,1]*A[3,3] - A[0,3]*A[1,2]*A[3,1]
b03 = A[0,1]*A[1,3]*A[2,2] + A[0,2]*A[1,1]*A[2,3] + A[0,3]*A[1,2]*A[2,1] - A[0,1]*A[1,2]*A[2,3] - A[0,2]*A[1,3]*A[2,1] - A[0,3]*A[1,1]*A[2,2]
b10 = A[1,0]*A[2,3]*A[3,2] + A[1,2]*A[2,0]*A[3,3] + A[1,3]*A[2,2]*A[3,0] - A[1,0]*A[2,2]*A[3,3] - A[1,2]*A[2,3]*A[3,0] - A[1,3]*A[2,0]*A[3,2]
b11 = A[0,0]*A[2,2]*A[3,3] + A[0,2]*A[2,3]*A[3,0] + A[0,3]*A[2,0]*A[3,2] - A[0,0]*A[2,3]*A[3,2] - A[0,2]*A[2,0]*A[3,3] - A[0,3]*A[2,2]*A[3,0]
b12 = A[0,0]*A[1,3]*A[3,2] + A[0,2]*A[1,0]*A[3,3] + A[0,3]*A[1,2]*A[3,0] - A[0,0]*A[1,2]*A[3,3] - A[0,2]*A[1,3]*A[3,0] - A[0,3]*A[1,0]*A[3,2]
b13 = A[0,0]*A[1,2]*A[2,3] + A[0,2]*A[1,3]*A[2,0] + A[0,3]*A[1,0]*A[2,2] - A[0,0]*A[1,3]*A[2,2] - A[0,2]*A[1,0]*A[2,3] - A[0,3]*A[1,2]*A[2,0]
b20 = A[1,0]*A[2,1]*A[3,3] + A[1,1]*A[2,3]*A[3,0] + A[1,3]*A[2,0]*A[3,1] - A[1,0]*A[2,3]*A[3,1] - A[1,1]*A[2,0]*A[3,3] - A[1,3]*A[2,1]*A[3,0]
b21 = A[0,0]*A[2,3]*A[3,1] + A[0,1]*A[2,0]*A[3,3] + A[0,3]*A[2,1]*A[3,0] - A[0,0]*A[2,1]*A[3,3] - A[0,1]*A[2,3]*A[3,0] - A[0,3]*A[2,0]*A[3,1]
b22 = A[0,0]*A[1,1]*A[3,3] + A[0,1]*A[1,3]*A[3,0] + A[0,3]*A[1,0]*A[3,1] - A[0,0]*A[1,3]*A[3,1] - A[0,1]*A[1,0]*A[3,3] - A[0,3]*A[1,1]*A[3,0]
b23 = A[0,0]*A[1,3]*A[2,1] + A[0,1]*A[1,0]*A[2,3] + A[0,3]*A[1,1]*A[2,0] - A[0,0]*A[1,1]*A[2,3] - A[0,1]*A[1,3]*A[2,0] - A[0,3]*A[1,0]*A[2,1]
b30 = A[1,0]*A[2,2]*A[3,1] + A[1,1]*A[2,0]*A[3,2] + A[1,2]*A[2,1]*A[3,0] - A[1,0]*A[2,1]*A[3,2] - A[1,1]*A[2,2]*A[3,0] - A[1,2]*A[2,0]*A[3,1]
b31 = A[0,0]*A[2,1]*A[3,2] + A[0,1]*A[2,2]*A[3,0] + A[0,2]*A[2,0]*A[3,1] - A[0,0]*A[2,2]*A[3,1] - A[0,1]*A[2,0]*A[3,2] - A[0,2]*A[2,1]*A[3,0]
b32 = A[0,0]*A[1,2]*A[3,1] + A[0,1]*A[1,0]*A[3,2] + A[0,2]*A[1,1]*A[3,0] - A[0,0]*A[1,1]*A[3,2] - A[0,1]*A[1,2]*A[3,0] - A[0,2]*A[1,0]*A[3,1]
b33 = A[0,0]*A[1,1]*A[2,2] + A[0,1]*A[1,2]*A[2,0] + A[0,2]*A[1,0]*A[2,1] - A[0,0]*A[1,2]*A[2,1] - A[0,1]*A[1,0]*A[2,2] - A[0,2]*A[1,1]*A[2,0]
Ainv = np.array([[b00, b01, b02, b03], [b10, b11, b12, b13], [b20, b21, b22, b23], [b30, b31, b32, b33]]) / detA
return Ainv
I have a numpy operation that looks like the following:
for i in range(i_max):
for j in range(j_max):
r[i, j, x[i, j], y[i, j]] = c[i, j]
where x, y and c have the same shape.
Is it possible to use numpy's advanced indexing to speed this operation up?
I tried using:
i = numpy.arange(i_max)
j = numpy.arange(j_max)
r[i, j, x, y] = c
However, I didn't get the result I expected.
Using linear indexing -
d0,d1,d2,d3 = r.shape
np.put(r,np.arange(i_max)[:,None]*d1*d2*d3 + np.arange(j_max)*d2*d3 + x*d3 +y,c)
Benchmarking and verification
Define functions -
def linear_indx(r,x,y,c,i_max,j_max):
d0,d1,d2,d3 = r.shape
np.put(r,np.arange(i_max)[:,None]*d1*d2*d3 + np.arange(j_max)*d2*d3 + x*d3 +y,c)
return r
def org_app(r,x,y,c,i_max,j_max):
for i in range(i_max):
for j in range(j_max):
r[i, j, x[i,j], y[i,j]] = c[i,j]
return r
Setup input arrays and benchmark -
In [134]: # Setup input arrays
...: i_max = 40
...: j_max = 50
...: D0 = 60
...: D1 = 70
...: N = 80
...:
...: r = np.zeros((D0,D1,N,N))
...: c = np.random.rand(i_max,j_max)
...:
...: x = np.random.randint(0,N,(i_max,j_max))
...: y = np.random.randint(0,N,(i_max,j_max))
...:
In [135]: # Make copies for testing, as both functions make in-situ changes
...: r1 = r.copy()
...: r2 = r.copy()
...:
In [136]: # Verify results by comparing with original loopy approach
...: np.allclose(linear_indx(r1,x,y,c,i_max,j_max),org_app(r2,x,y,c,i_max,j_max))
Out[136]: True
In [137]: # Make copies for testing, as both functions make in-situ changes
...: r1 = r.copy()
...: r2 = r.copy()
...:
In [138]: %timeit linear_indx(r1,x,y,c,i_max,j_max)
10000 loops, best of 3: 115 µs per loop
In [139]: %timeit org_app(r2,x,y,c,i_max,j_max)
100 loops, best of 3: 2.25 ms per loop
The indexing arrays need to be broadcastable for this to work. The only change needed is to add an axis to the first index i to match the shape with the rest. The quick way to accomplish this is by indexing with None (which is equivalent to numpy.newaxis):
i = numpy.arange(i_max)
j = numpy.arange(j_max)
r[i[:,None], j, x, y] = c