Numpy syntax to assign elements of an array - python

I want to create an array whose elements are a function of their position.
Something like
N = 1000000
newarray = np.zeros([N,N,N])
for i in range(N):
for j in range(N):
for k in range(N):
newarray[i,j,k] = f(i,j,k)
Is there a way to increase the speed of this operation, by removing the for loops / parallelizing it using the numpy syntax?
This is the f function
def f(i,j,k):
indices = (R[:,0]==i) *( R[:,1]==j) * (R[:,2]==k)
return M[indices]
where for example
R = np.random.randint(0,N,[N,3])
M = np.random.randn(N)*15
and in the actual application they are not random.

You can do that operation with the at method of np.add:
import numpy as np
np.random.seed(0)
N = 100
R = np.random.randint(0, N, [N, 3])
M = np.random.randn(N) * 15
newarray = np.zeros([N, N, N])
np.add.at(newarray, (R[:, 0], R[:, 1], R[:, 2]), M)
In this case, if R has any repeated row the corresponding value in newarray will be the sum of all the corresponding values in M.
EDIT: To take the average instead of sum for repeated elements you could do something like this:
import numpy as np
np.random.seed(0)
N = 100
R = np.random.randint(0, N, [N, 3])
M = np.random.randn(N) * 15
newarray = np.zeros([N, N, N])
np.add.at(newarray, (R[:, 0], R[:, 1], R[:, 2]), M)
newarray_count = np.zeros([N, N, N])
np.add.at(newarray_count, (R[:, 0], R[:, 1], R[:, 2]), 1)
m = newarray_count > 1
newarray[m] /= newarray_count[m]

Related

How to substitute 2D IndexedBase variable in Sympy

Can I substitute a 2D IndexedBase that is in an expression for a numpy array using sympy.subs() or sympy.evalf(subs=())?
So something like:
i, j, m, n = sp.symbols('i j m n', integer=True)
x = sp.IndexedBase('x')
a = sp.IndexedBase('a')
b = sp.IndexedBase('b')
f = sp.ln(sp.Sum(sp.exp(sp.Sum(a[i, j]*x[j]+b[i], (j, 1, n))), (i, 1, m)))
which currently outputs:
expression output
Note that A is a 2D IndexedBase and B is a 1D IndexedBase. I would like to be able to substitute the IndexedBases defined above for numpy arrays, something like
A = np.random.uniform(MIN, MAX, (M, N))
B = np.random.uniform(MIN, MAX, M)
f.evalf(subs={a:A, b:B, x:(1,5), m:M, n:N}
How would I be able to achieve something like this?
You can do this with lambdify like this:
In [36]: i, j, m, n = sp.symbols('i j m n', integer=True)
...: x = sp.IndexedBase('x')
...: a = sp.IndexedBase('a')
...: b = sp.IndexedBase('b')
...:
...: f = sp.ln(sp.Sum(sp.exp(sp.Sum(a[i, j]*x[j]+b[i], (j, 0, n-1))), (i, 0, m-1)))
In [37]: f_lam = lambdify((a, b, x, m, n), f, 'numpy')
In [38]: MIN, MAX, M, N = 0, 1, 3, 4
In [39]: A = np.random.uniform(MIN, MAX, (M, N))
...: B = np.random.uniform(MIN, MAX, M)
In [40]: X = np.random.uniform(MIN, MAX, N)
In [41]: f_lam(A, B, X, M, N)
Out[41]: 4.727558334863294
Note that I shifted the limits of the Sum go from e.g. 0 to n-1 to match up with numpy indexing rules.
Also note that this uses the ordinary Python sum and a double-loop generator expression rather than more efficient numpy operations:
In [42]: import inspect
In [44]: print(inspect.getsource(f_lam))
def _lambdifygenerated(Dummy_34, Dummy_33, Dummy_32, m, n):
return log((builtins.sum(exp((builtins.sum(Dummy_32[j]*Dummy_34[i, j] + Dummy_33[i] for j in range(0, n - 1+1)))) for i in range(0, m - 1+1))))
The generated code is more efficient if you set this up using matrix symbols rather than explicit expressions involving IndexedBase:
In [22]: m, n = symbols('m, n')
In [23]: a = MatrixSymbol('a', m, n)
In [24]: b = MatrixSymbol('b', m, 1)
In [25]: x = MatrixSymbol('x', n, 1)
In [26]: one = OneMatrix(1, m)
In [28]: f = log(one*(a*x + b))
In [29]: f_lam = lambdify((a, b, x, m, n), f, 'numpy')
In [30]: f_lam(A, B, X, M, N)
Out[30]: array([1.52220638])
In [33]: print(inspect.getsource(f_lam))
def _lambdifygenerated(a, b, x, m, n):
return log((ones((1, m))).dot((a).dot(x) + b))

Can I speed up this aerodynamics calculation with Numba, vectorization, or multiprocessing?

Problem:
I am trying to increase the speed of an aerodynamics function in Python.
Function Set:
import numpy as np
from numba import njit
def calculate_velocity_induced_by_line_vortices(
points, origins, terminations, strengths, collapse=True
):
# Expand the dimensionality of the points input. It is now of shape (N x 1 x 3).
# This will allow NumPy to broadcast the upcoming subtractions.
points = np.expand_dims(points, axis=1)
# Define the vectors from the vortex to the points. r_1 and r_2 now both are of
# shape (N x M x 3). Each row/column pair holds the vector associated with each
# point/vortex pair.
r_1 = points - origins
r_2 = points - terminations
r_0 = r_1 - r_2
r_1_cross_r_2 = nb_2d_explicit_cross(r_1, r_2)
r_1_cross_r_2_absolute_magnitude = (
r_1_cross_r_2[:, :, 0] ** 2
+ r_1_cross_r_2[:, :, 1] ** 2
+ r_1_cross_r_2[:, :, 2] ** 2
)
r_1_length = nb_2d_explicit_norm(r_1)
r_2_length = nb_2d_explicit_norm(r_2)
# Define the radius of the line vortices. This is used to get rid of any
# singularities.
radius = 3.0e-16
# Set the lengths and the absolute magnitudes to zero, at the places where the
# lengths and absolute magnitudes are less than the vortex radius.
r_1_length[r_1_length < radius] = 0
r_2_length[r_2_length < radius] = 0
r_1_cross_r_2_absolute_magnitude[r_1_cross_r_2_absolute_magnitude < radius] = 0
# Calculate the vector dot products.
r_0_dot_r_1 = np.einsum("ijk,ijk->ij", r_0, r_1)
r_0_dot_r_2 = np.einsum("ijk,ijk->ij", r_0, r_2)
# Calculate k and then the induced velocity, ignoring any divide-by-zero or nan
# errors. k is of shape (N x M)
with np.errstate(divide="ignore", invalid="ignore"):
k = (
strengths
/ (4 * np.pi * r_1_cross_r_2_absolute_magnitude)
* (r_0_dot_r_1 / r_1_length - r_0_dot_r_2 / r_2_length)
)
# Set the shape of k to be (N x M x 1) to support numpy broadcasting in the
# subsequent multiplication.
k = np.expand_dims(k, axis=2)
induced_velocities = k * r_1_cross_r_2
# Set the values of the induced velocity to zero where there are singularities.
induced_velocities[np.isinf(induced_velocities)] = 0
induced_velocities[np.isnan(induced_velocities)] = 0
if collapse:
induced_velocities = np.sum(induced_velocities, axis=1)
return induced_velocities
#njit
def nb_2d_explicit_norm(vectors):
return np.sqrt(
(vectors[:, :, 0]) ** 2 + (vectors[:, :, 1]) ** 2 + (vectors[:, :, 2]) ** 2
)
#njit
def nb_2d_explicit_cross(a, b):
e = np.zeros_like(a)
e[:, :, 0] = a[:, :, 1] * b[:, :, 2] - a[:, :, 2] * b[:, :, 1]
e[:, :, 1] = a[:, :, 2] * b[:, :, 0] - a[:, :, 0] * b[:, :, 2]
e[:, :, 2] = a[:, :, 0] * b[:, :, 1] - a[:, :, 1] * b[:, :, 0]
return e
Context:
This function is used by Ptera Software, an open-source solver for flapping wing aerodynamics. As shown by the profile output below, it is by far the largest contributor to Ptera Software's run time.
Currently, Ptera Software takes just over 3 minutes to run a typical case, and my goal is to get this below 1 minute.
The function takes in a group of points, origins, terminations, and strengths. At every point, it finds the induced velocity due to the line vortices, which are characterized by the groups of origins, terminations, and strengths. If collapse is true, then the output is the cumulative velocity induced at each point due to the vortices. If false, the function outputs each vortex's contribution to the velocity at each point.
During a typical run, the velocity function is called approximately 2000 times. At first, the calls involve vectors with relatively small input arguments (around 200 points, origins, terminations, and strengths). Later calls involve large input arguments (around 400 points and around 6,000 origins, terminations, and strengths). An ideal solution would be fast for all size inputs, but increasing the speed of large input calls is more important.
For testing, I recommend running the following script with your own implementation of the function:
import timeit
import matplotlib.pyplot as plt
import numpy as np
n_repeat = 2
n_execute = 10 ** 3
min_oom = 0
max_oom = 3
times_py = []
for i in range(max_oom - min_oom + 1):
n_elem = 10 ** i
n_elem_pretty = np.format_float_scientific(n_elem, 0)
print("Number of elements: " + n_elem_pretty)
# Benchmark Python.
print("\tBenchmarking Python...")
setup = '''
import numpy as np
these_points = np.random.random((''' + str(n_elem) + ''', 3))
these_origins = np.random.random((''' + str(n_elem) + ''', 3))
these_terminations = np.random.random((''' + str(n_elem) + ''', 3))
these_strengths = np.random.random(''' + str(n_elem) + ''')
def calculate_velocity_induced_by_line_vortices(points, origins, terminations,
strengths, collapse=True):
pass
'''
statement = '''
results_orig = calculate_velocity_induced_by_line_vortices(these_points, these_origins,
these_terminations,
these_strengths)
'''
times = timeit.repeat(repeat=n_repeat, stmt=statement, setup=setup, number=n_execute)
time_py = min(times)/n_execute
time_py_pretty = np.format_float_scientific(time_py, 2)
print("\t\tAverage Time per Loop: " + time_py_pretty + " s")
# Record the times.
times_py.append(time_py)
sizes = [10 ** i for i in range(max_oom - min_oom + 1)]
fig, ax = plt.subplots()
ax.plot(sizes, times_py, label='Python')
ax.set_xscale("log")
ax.set_xlabel("Size of List or Array (elements)")
ax.set_ylabel("Average Time per Loop (s)")
ax.set_title(
"Comparison of Different Optimization Methods\nBest of "
+ str(n_repeat)
+ " Runs, each with "
+ str(n_execute)
+ " Loops"
)
ax.legend()
plt.show()
Previous Attempts:
My prior attempts at speeding up this function involved vectorizing it (which worked great, so I kept those changes) and trying out Numba's JIT compiler. I had mixed results with Numba. When I tried to use Numba on a modified version of the entire velocity function, my results were much slower than before. However, I found that Numba significantly sped up the cross-product and norm functions, which I implemented above.
Updates:
Update 1:
Based on Mercury's comment (which has since been deleted), I replaced
points = np.expand_dims(points, axis=1)
r_1 = points - origins
r_2 = points - terminations
with two calls to the following function:
#njit
def subtract(a, b):
c = np.empty((a.shape[0], b.shape[0], 3))
for i in range(a.shape[0]):
for j in range(b.shape[0]):
for k in range(3):
c[i, j, k] = a[i, k] - b[j, k]
return c
This resulted in a speed increase from 227 s to 220 s. This is better! However, it is still not fast enough.
I also have tried setting the njit fastmath flag to true, and using a numba function instead of calls to np.einsum. Neither increased the speed.
Update 2:
With Jérôme Richard's answer, the run time is now 156 s, which is a decrease of 29%! I'm satisfied enough to accept this answer, but feel free to make other suggestions if you think you can improve on their work!
First of all, Numba can perform parallel computations resulting in a faster code if you manually request it using mainly parallel=True and prange. This is useful for big arrays (but not for small ones).
Moreover, your computation is mainly memory bound. Thus, you should avoid creating big arrays when they are not reused multiple times, or more generally when they cannot be recomputed on the fly (in a relatively cheap way). This is the case for r_0 for example.
In addition, memory access pattern matters: vectorization is more efficient when accesses are contiguous in memory and the cache/RAM is use more efficiently. Consequently, arr[0, :, :] = 0 should be faster then arr[:, :, 0] = 0. Similarly, arr[:, :, 0] = arr[:, :, 1] = 0 should be mush slower than arr[:, :, 0:2] = 0 since the former performs to noncontinuous memory passes while the latter performs only one more contiguous memory pass. Sometimes, it can be beneficial to transpose your data so that the following calculations are much faster.
Moreover, Numpy tends to create many temporary arrays that are costly to allocate. This is a huge problem when the input arrays are small. The Numba jit can avoid that in most cases.
Finally, regarding your computation, it may be a good idea to use GPUs for big arrays (definitively not for small ones). You can give a look to cupy or clpy to do that quite easily.
Here is an optimized implementation working on the CPU:
import numpy as np
from numba import njit, prange
#njit(parallel=True)
def subtract(a, b):
c = np.empty((a.shape[0], b.shape[0], 3))
for i in prange(c.shape[0]):
for j in range(c.shape[1]):
for k in range(3):
c[i, j, k] = a[i, k] - b[j, k]
return c
#njit(parallel=True)
def nb_2d_explicit_norm(vectors):
res = np.empty((vectors.shape[0], vectors.shape[1]))
for i in prange(res.shape[0]):
for j in range(res.shape[1]):
res[i, j] = np.sqrt(vectors[i, j, 0] ** 2 + vectors[i, j, 1] ** 2 + vectors[i, j, 2] ** 2)
return res
# NOTE: better memory access pattern
#njit(parallel=True)
def nb_2d_explicit_cross(a, b):
e = np.empty(a.shape)
for i in prange(e.shape[0]):
for j in range(e.shape[1]):
e[i, j, 0] = a[i, j, 1] * b[i, j, 2] - a[i, j, 2] * b[i, j, 1]
e[i, j, 1] = a[i, j, 2] * b[i, j, 0] - a[i, j, 0] * b[i, j, 2]
e[i, j, 2] = a[i, j, 0] * b[i, j, 1] - a[i, j, 1] * b[i, j, 0]
return e
# NOTE: avoid the slow building of temporary arrays
#njit(parallel=True)
def cross_absolute_magnitude(cross):
return cross[:, :, 0] ** 2 + cross[:, :, 1] ** 2 + cross[:, :, 2] ** 2
# NOTE: avoid the slow building of temporary arrays again and multiple pass in memory
# Warning: do the work in-place
#njit(parallel=True)
def discard_singularities(arr):
for i in prange(arr.shape[0]):
for j in range(arr.shape[1]):
for k in range(3):
if np.isinf(arr[i, j, k]) or np.isnan(arr[i, j, k]):
arr[i, j, k] = 0.0
#njit(parallel=True)
def compute_k(strengths, r_1_cross_r_2_absolute_magnitude, r_0_dot_r_1, r_1_length, r_0_dot_r_2, r_2_length):
return (strengths
/ (4 * np.pi * r_1_cross_r_2_absolute_magnitude)
* (r_0_dot_r_1 / r_1_length - r_0_dot_r_2 / r_2_length)
)
#njit(parallel=True)
def rDotProducts(b, c):
assert b.shape == c.shape and b.shape[2] == 3
n, m = b.shape[0], b.shape[1]
ab = np.empty((n, m))
ac = np.empty((n, m))
for i in prange(n):
for j in range(m):
ab[i, j] = 0.0
ac[i, j] = 0.0
for k in range(3):
a = b[i, j, k] - c[i, j, k]
ab[i, j] += a * b[i, j, k]
ac[i, j] += a * c[i, j, k]
return (ab, ac)
# Compute `np.sum(arr, axis=1)` in parallel.
#njit(parallel=True)
def collapseArr(arr):
assert arr.shape[2] == 3
n, m = arr.shape[0], arr.shape[1]
res = np.empty((n, 3))
for i in prange(n):
res[i, 0] = np.sum(arr[i, :, 0])
res[i, 1] = np.sum(arr[i, :, 1])
res[i, 2] = np.sum(arr[i, :, 2])
return res
def calculate_velocity_induced_by_line_vortices(points, origins, terminations, strengths, collapse=True):
r_1 = subtract(points, origins)
r_2 = subtract(points, terminations)
# NOTE: r_0 is computed on the fly by rDotProducts
r_1_cross_r_2 = nb_2d_explicit_cross(r_1, r_2)
r_1_cross_r_2_absolute_magnitude = cross_absolute_magnitude(r_1_cross_r_2)
r_1_length = nb_2d_explicit_norm(r_1)
r_2_length = nb_2d_explicit_norm(r_2)
radius = 3.0e-16
r_1_length[r_1_length < radius] = 0
r_2_length[r_2_length < radius] = 0
r_1_cross_r_2_absolute_magnitude[r_1_cross_r_2_absolute_magnitude < radius] = 0
r_0_dot_r_1, r_0_dot_r_2 = rDotProducts(r_1, r_2)
with np.errstate(divide="ignore", invalid="ignore"):
k = compute_k(strengths, r_1_cross_r_2_absolute_magnitude, r_0_dot_r_1, r_1_length, r_0_dot_r_2, r_2_length)
k = np.expand_dims(k, axis=2)
induced_velocities = k * r_1_cross_r_2
discard_singularities(induced_velocities)
if collapse:
induced_velocities = collapseArr(induced_velocities)
return induced_velocities
On my machine, this code is 2.5 times faster than the initial implementation on arrays of size 10**3. It also use a bit less memory.

evaluate many monomials at many points

The following problem concerns evaluating many monomials (x**k * y**l * z**m) at many points.
I would like to compute the "inner power" of two numpy arrays, i.e.,
import numpy
a = numpy.random.rand(10, 3)
b = numpy.random.rand(3, 5)
out = numpy.ones((10, 5))
for i in range(10):
for j in range(5):
for k in range(3):
out[i, j] *= a[i, k]**b[k, j]
print(out.shape)
If instead the line would read
out[i, j] += a[i, k]*b[j, k]
this would be a a number of inner products, computable with a simple dot or einsum.
Is it possible to perform the above loop in just one numpy line?
What about thinking of it in terms of logarithms:
import numpy
a = numpy.random.rand(10, 3)
b = numpy.random.rand(3, 5)
out = np.exp(np.matmul(np.log(a), b))
Since c_ij = prod(a_ik ** b_kj, k=1..K), then log(c_ij) = sum(log(a_ik) * b_ik, k=1..K).
Note: Having zeros in a may mess up the result (also negatives, but then the result wouldn't be well defined anyway). I have given it a try and it doesn't seem to actually break somehow; I don't know if that behavior is guaranteed by NumPy but, to be safe, you can add something at the end like:
out[np.logical_or.reduce(a < eps, axis=1)] = 0
You can use broadcasting after extending those arrays to 3D versions -
(a[:,:,None]**b[None,:,:]).prod(axis=1)
Simply put -
(a[...,None]**b[None]).prod(1)
Basically, we are keeping the last axis and first axis from the two arrays aligned, while performing element-wise powers between the first and last axes from the two inputs. Schematically put using the given sample on shapes -
10 x 3 x 1
1 x 3 x 5
Two more solutions:
Inlining
numpy.array([
numpy.prod([a[:, i]**bb[i] for i in range(len(bb))], axis=0)
for bb in b.T
]).T
and using power.outer:
numpy.prod([numpy.power.outer(a[:, k], b[k]) for k in range(len(b))], axis=0)
Both are a bit slower than the broadcasting solution.
Even with some logic to accommodate for zero and negative values, the exp-log solution takes the cake.
Code to reproduce the plot:
import numpy
import perfplot
def loop(data):
a, b = data
m = a.shape[0]
n = b.shape[1]
out = numpy.ones((m, n))
for i in range(m):
for j in range(n):
for k in range(3):
out[i, j] *= a[i, k]**b[k, j]
return out
def broadcasting(data):
a, b = data
return (a[..., None]**b[None]).prod(1)
def log_exp(data):
a, b = data
neg_a = numpy.zeros(a.shape, dtype=int)
neg_a[a < 0.0] = 1
odd_b = numpy.zeros(b.shape, dtype=int)
odd_b[b % 2 == 1] = 1
negative_count = numpy.dot(neg_a, odd_b)
out = (-1)**negative_count * numpy.exp(
numpy.matmul(
numpy.log(abs(a), where=abs(a) > 0.0),
b
))
zero_a = numpy.zeros(a.shape, dtype=int)
zero_a[a == 0.0] = 1
pos_b = numpy.zeros(b.shape, dtype=int)
pos_b[b > 0] = 1
zero_count = numpy.dot(zero_a, pos_b)
out[zero_count > 0] = 0.0
return out
def inline(data):
a, b = data
return numpy.array([
numpy.prod([a[:, i]**bb[i] for i in range(len(bb))], axis=0)
for bb in b.T
]).T
def outer_power(data):
a, b = data
return numpy.prod([
numpy.power.outer(a[:, k], b[k]) for k in range(len(b))
], axis=0)
perfplot.show(
setup=lambda n: (
numpy.random.rand(n, 3) - 0.5,
numpy.random.randint(0, 10, (3, n))
),
n_range=[2**k for k in range(11)],
repeat=10,
kernels=[
loop,
broadcasting,
inline,
log_exp,
outer_power
],
logx=True,
logy=True,
xlabel='len(a)',
)
import numpy
a = numpy.random.rand(10, 3)
b = numpy.random.rand(3, 5)
out = [[numpy.prod([a[i, k]**b[k, j] for k in range(3)]) for j in range(5)] for i in range(10)]

Generate matrices for pairs of values in Numpy

I have a 3D array (a 2D array of vectors), of which I want to transform each vector with a rotation matrix. The rotations are in two separate 2D arrays of radians angle values called cols and rows.
I've been able to have NumPy compute the angles for me already, without a Python loop. Now I'm looking for a way to have NumPy generate the rotation matrices, too, hopefully resulting in a great performance boost.
size = img.shape[:2]
# Create an array that assigns each pixel the percentage of
# the correction (value between -1 and 1, distributed linearly).
cols = np.array([np.arange(size[1]) for __ in range(size[0])]) / (size[1] - 1) * 2 - 1
rows = np.array([np.arange(size[0]) for __ in range(size[1])]).T / (size[0] - 1) * 2 - 1
# Atan distribution based on F-number and Sensor size.
cols = np.arctan(sh * cols / (2 * f))
rows = np.arctan(sv * rows / (2 * f))
### This is the loop that I would like to remove and find a
### clever way to make NumPy do the same operation natively.
for i in range(size[0]):
for j in range(size[1]):
ah = cols[i,j]
av = rows[i,j]
# Y-rotation.
mat = np.matrix([
[ np.cos(ah), 0, np.sin(ah)],
[0, 1, 0],
[-np.sin(ah), 0, np.cos(ah)]
])
# X-rotation.
mat *= np.matrix([
[1, 0, 0],
[0, np.cos(av), -np.sin(av)],
[0, np.sin(av), np.cos(av)]
])
img[i,j] = img[i,j] * mat
return img
Is there any clever way to rewrite the loop in NumPy operations?
(Let's assume the shape of img be (a, b, 3).)
Firstly, cols and rows does not need to be fully expanded to (a, b) (you could write cols[j] instead of cols[i,j]). And they can be easy generated using np.linspace:
cols = np.linspace(-1, 1, size[1]) # shape: (b,)
rows = np.linspace(-1, 1, size[0]) # shape: (a,)
cols = np.arctan(sh * cols / (2*f))
rows = np.arctan(sv * rows / (2*f))
Then we get precalculate the components of the matrices.
# shape: (b,)
cos_ah = np.cos(cols)
sin_ah = np.sin(cols)
zeros_ah = np.zeros_like(cols)
ones_ah = np.ones_like(cols)
# shape: (a,)
cos_av = np.cos(rows)
sin_av = np.sin(rows)
zeros_av = np.zeros_like(rows)
ones_av = np.ones_like(rows)
And then construct the rotation matrices:
# shape: (3, 3, b)
y_mat = np.array([
[cos_ah, zeros_ah, sin_ah],
[zeros_ah, ones_ah, zeros_ah],
[-sin_ah, zeros_ah, cos_ah],
])
# shape: (3, 3, a)
x_mat = np.array([
[ones_av, zeros_av, zeros_av],
[zeros_av, cos_av, -sin_av],
[zeros_av, sin_av, cos_av],
])
Now let's see. If we have a loop we would write:
for i in range(size[0]):
for j in range(size[1]):
img[i, j, :] = img[i, j, :] # y_mat[:, :, j] # x_mat[:, :, i]
or, if we expand out the matrix multiplications:
This can be handled nicely using np.einsum (note the i,j,k,m,n corresponds exactly like the equation above):
img = np.einsum('ijk,kmj,mni->ijn', img, y_mat, x_mat)
To summarize:
size = img.shape[:2]
cols = np.linspace(-1, 1, size[1]) # shape: (b,)
rows = np.linspace(-1, 1, size[0]) # shape: (a,)
cols = np.arctan(sh * cols / (2*f))
rows = np.arctan(sv * rows / (2*f))
cos_ah = np.cos(cols)
sin_ah = np.sin(cols)
zeros_ah = np.zeros_like(cols)
ones_ah = np.ones_like(cols)
cos_av = np.cos(rows)
sin_av = np.sin(rows)
zeros_av = np.zeros_like(rows)
ones_av = np.ones_like(rows)
y_mat = np.array([
[cos_ah, zeros_ah, sin_ah],
[zeros_ah, ones_ah, zeros_ah],
[-sin_ah, zeros_ah, cos_ah],
])
x_mat = np.array([
[ones_av, zeros_av, zeros_av],
[zeros_av, cos_av, -sin_av],
[zeros_av, sin_av, cos_av],
])
return np.einsum('ijk,kmj,mni->ijn', img, y_mat, x_mat)

Vectorized Computation of Compositional Variance

Given an n X m matrix with entries xi, j, the compositional variance is an m X m matrix, with the i, j entry including the expression
∑k = 1n [ ln2(xk, i / xk, j)]
(it includes other, easily calculated, expressions).
This is very easy to calculate in a loop, but how can it be calculated using vectorization?
Here is the crappy loop code:
x = np.array([[1, 2, 3], [4, 5, 6]], dtype=float)
v = np.zeros((3, 3))
for i in range(3):
for j in range(3):
for k in range(2):
v[i, j] += np.log(x[k, i] / x[k, j])**2
Assuming you meant something like (np.log(x[k, i] / x[k, j])**2) in NumPy terms, being summed over for k = 1:n, one vectorized approach could be suggested with broadcasting -
((np.log(x[:,:,None]/x[:,None])**2)).sum(0)

Categories