Shear a numpy array - python

I'd like to 'shear' a numpy array. I'm not sure I'm using the term 'shear' correctly; by shear, I mean something like:
Shift the first column by 0 places
Shift the second column by 1 place
Shift the third colum by 2 places
etc...
So this array:
array([[11, 12, 13],
[17, 18, 19],
[35, 36, 37]])
would turn into either this array:
array([[11, 36, 19],
[17, 12, 37],
[35, 18, 13]])
or something like this array:
array([[11, 0, 0],
[17, 12, 0],
[35, 18, 13]])
depending on how we handle the edges. I'm not too particular about edge behavior.
Here's my attempt at a function that does this:
import numpy
def shear(a, strength=1, shift_axis=0, increase_axis=1, edges='clip'):
strength = int(strength)
shift_axis = int(shift_axis)
increase_axis = int(increase_axis)
if shift_axis == increase_axis:
raise UserWarning("Shear can't shift in the direction it increases")
temp = numpy.zeros(a.shape, dtype=int)
indices = []
for d, num in enumerate(a.shape):
coords = numpy.arange(num)
shape = [1] * len(a.shape)
shape[d] = num
coords = coords.reshape(shape) + temp
indices.append(coords)
indices[shift_axis] -= strength * indices[increase_axis]
if edges == 'clip':
indices[shift_axis][indices[shift_axis] < 0] = -1
indices[shift_axis][indices[shift_axis] >= a.shape[shift_axis]] = -1
res = a[indices]
res[indices[shift_axis] == -1] = 0
elif edges == 'roll':
indices[shift_axis] %= a.shape[shift_axis]
res = a[indices]
return res
if __name__ == '__main__':
a = numpy.random.random((3,4))
print a
print shear(a)
It seems to work. Please tell me if it doesn't!
It also seems clunky and inelegant. Am I overlooking a builtin numpy/scipy function that does this? Is there a cleaner/better/more efficient way to do this in numpy? Am I reinventing the wheel?
EDIT:
Bonus points if this works on an N-dimensional array, instead of just the 2D case.
This function will be at the very center of a loop I'll repeat many times in our data processing, so I suspect it's actually worth optimizing.
SECOND EDIT:
I finally did some benchmarking. It looks like numpy.roll is the way to go, despite the loop. Thanks, tom10 and Sven Marnach!
Benchmarking code: (run on Windows, don't use time.clock on Linux I think)
import time, numpy
def shear_1(a, strength=1, shift_axis=0, increase_axis=1, edges='roll'):
strength = int(strength)
shift_axis = int(shift_axis)
increase_axis = int(increase_axis)
if shift_axis == increase_axis:
raise UserWarning("Shear can't shift in the direction it increases")
temp = numpy.zeros(a.shape, dtype=int)
indices = []
for d, num in enumerate(a.shape):
coords = numpy.arange(num)
shape = [1] * len(a.shape)
shape[d] = num
coords = coords.reshape(shape) + temp
indices.append(coords)
indices[shift_axis] -= strength * indices[increase_axis]
if edges == 'clip':
indices[shift_axis][indices[shift_axis] < 0] = -1
indices[shift_axis][indices[shift_axis] >= a.shape[shift_axis]] = -1
res = a[indices]
res[indices[shift_axis] == -1] = 0
elif edges == 'roll':
indices[shift_axis] %= a.shape[shift_axis]
res = a[indices]
return res
def shear_2(a, strength=1, shift_axis=0, increase_axis=1, edges='roll'):
indices = numpy.indices(a.shape)
indices[shift_axis] -= strength * indices[increase_axis]
indices[shift_axis] %= a.shape[shift_axis]
res = a[tuple(indices)]
if edges == 'clip':
res[indices[shift_axis] < 0] = 0
res[indices[shift_axis] >= a.shape[shift_axis]] = 0
return res
def shear_3(a, strength=1, shift_axis=0, increase_axis=1):
if shift_axis > increase_axis:
shift_axis -= 1
res = numpy.empty_like(a)
index = numpy.index_exp[:] * increase_axis
roll = numpy.roll
for i in range(0, a.shape[increase_axis]):
index_i = index + (i,)
res[index_i] = roll(a[index_i], i * strength, shift_axis)
return res
numpy.random.seed(0)
for a in (
numpy.random.random((3, 3, 3, 3)),
numpy.random.random((50, 50, 50, 50)),
numpy.random.random((300, 300, 10, 10)),
):
print 'Array dimensions:', a.shape
for sa, ia in ((0, 1), (1, 0), (2, 3), (0, 3)):
print 'Shift axis:', sa
print 'Increase axis:', ia
ref = shear_1(a, shift_axis=sa, increase_axis=ia)
for shear, label in ((shear_1, '1'), (shear_2, '2'), (shear_3, '3')):
start = time.clock()
b = shear(a, shift_axis=sa, increase_axis=ia)
end = time.clock()
print label + ': %0.6f seconds'%(end-start)
if (b - ref).max() > 1e-9:
print "Something's wrong."
print

The approach in tom10's answer can be extended to arbitrary dimensions:
def shear3(a, strength=1, shift_axis=0, increase_axis=1):
if shift_axis > increase_axis:
shift_axis -= 1
res = numpy.empty_like(a)
index = numpy.index_exp[:] * increase_axis
roll = numpy.roll
for i in range(0, a.shape[increase_axis]):
index_i = index + (i,)
res[index_i] = roll(a[index_i], -i * strength, shift_axis)
return res

numpy roll does this. For example, if you original array is x then
for i in range(x.shape[1]):
x[:,i] = np.roll(x[:,i], i)
produces
[[11 36 19]
[17 12 37]
[35 18 13]]

This can be done using a trick described in this answer by Joe Kington:
from numpy.lib.stride_tricks import as_strided
a = numpy.array([[11, 12, 13],
[17, 18, 19],
[35, 36, 37]])
shift_axis = 0
increase_axis = 1
b = numpy.vstack((a, a))
strides = list(b.strides)
strides[increase_axis] -= strides[shift_axis]
strides = (b.strides[0], b.strides[1] - b.strides[0])
as_strided(b, shape=b.shape, strides=strides)[a.shape[0]:]
# array([[11, 36, 19],
# [17, 12, 37],
# [35, 18, 13]])
To get "clip" instead of "roll", use
b = numpy.vstack((numpy.zeros(a.shape, int), a))
This is probably the most efficient way of doing it, since it does not use any Python loop at all.

Here is a cleaned-up version of your own approach:
def shear2(a, strength=1, shift_axis=0, increase_axis=1, edges='clip'):
indices = numpy.indices(a.shape)
indices[shift_axis] -= strength * indices[increase_axis]
indices[shift_axis] %= a.shape[shift_axis]
res = a[tuple(indices)]
if edges == 'clip':
res[indices[shift_axis] < 0] = 0
res[indices[shift_axis] >= a.shape[shift_axis]] = 0
return res
The main difference is that it uses numpy.indices() instead of rolling your own version of this.

r = lambda l, n: l[n:]+l[:n]
transpose(map(r, transpose(a), range(0, len(a)))
I think. You should probably consider this psuedocode more than actual Python. Basically transpose the array, map a general rotate function over it to do the rotation, then transpose it back.

Related

How do I multiply two matrices in pyhton without numpy?

Implement a function mat_mult_by_transpose(mat) which gets a valid matrix
called mat and returns a new matrix which is the matrix multiplication of π‘šπ‘Žπ‘‘ and (π‘šπ‘Žπ‘‘)𝑇, i.e. (π‘šπ‘Žπ‘‘) β‹… (π‘šπ‘Žπ‘‘)𝑇.
Return a new matrix, without modifying mat2.
You may assume that the input matrix is not empty.
Example 1:
mat = [[1,2],[3,4],[5,6]]
mult_mat = mat_mult_by_transpose(mat)
print(mat) # [[1, 2], [3, 4], [5, 6]]
print(mult_mat) # [[5, 11, 17], [11, 25, 39], [17, 39, 61]]
Example 2:
mat2 = [[0, 1, 2], [10, 11, 12], [20, 21, 22]]
mult_mat2 = mat_mult_by_transpose(mat2)
print(mult_mat2) # [[5, 35, 65], [35, 365, 695], [65, 695, 1325]]
This is my attempt:
def mat_transpose(mat):
transposedMat= []
i = 0
for i in range(len(mat[i])):
newMat= []
for j in range(len(mat)):
newMat.append(mat[j][i])
transposedMat.append(newMat)
return transposedMat
def mat_mult_by_transpose(mat):
matT = mat_transpose(mat)
mat2 = []
for i in range(len(matT)):
temp_list=[]
for j in range(len(matT)):
sum = 0
for k in range(len(matT[0])):
sum += mat[i][k] * matT[k][j]
temp_list.append(sum)
mat2.append(temp_list)
return mat2
The error I ran into is this:
Traceback (most recent call last):
File "file0.py", line 156, in <module>
mult_mat = mat_mult_by_transpose(mat)
File "file0.py", line 97, in mat_mult_by_transpose
sum += mat[i][k] * matT[k][j]
IndexError: list index out of range
I don’t understand where my mistake is…
Assuming your mat_transpose() works fine. Look at it in terms of dimensions.
Let mat has dimension MxN
Hence M = len(mat) and N = len(mat[0])
Now matT will have dimension NxM
Hence N = len(matT) and M = len(matT[0])
You need to traverse row of first matrix mat, i.e i ranges [0..M]
then column of second matrix matT, i.e j ranges [0..M]
and k should be ranging the common dimension which is [0..N]
Here is the code:
def mat_mult_by_transpose(mat):
matT = mat_transpose(mat)
mat2 = []
for i in range(len(mat)): # M,
temp_list=[]
for j in range(len(matT[0])): # M
sum = 0
for k in range(len(mat[0])): # N, you can use len(matT) also
sum += mat[i][k] * matT[k][j]
temp_list.append(sum)
mat2.append(temp_list)
return mat2

Numba function works randomly with same given input, is this a bug?

I wrote a function called not_test in Numba to take a list of 2d arrays that are a drainage network, then I get an imaginary water drop routing from the figure below. The point of the code is to get the path of the drop for every possible drainage stream.
Results
This area the results I am getting, this are the routing stream a water drop would take if it falls in the start of the streams, eg. falls in point 1 then, routing stream [16, 15, 2, 1].
[[16, 3],
[16, 15, 2, 0],
[16, 15, 2, 1],
[16, 15, 14, 13],
[16, 15, 14, 12, 4],
[16, 15, 14, 12, 11, 6],
[16, 15, 14, 12, 11, 10, 9],
[16, 15, 14, 12, 11, 10, 8, 5],
[16, 15, 14, 12, 11, 10, 8, 7]]
Problem
The code works on normal python and it works as well when it is compile with Numba. The problem comes if you ran several times the code compile with Numba, some times this gives an error and sometimes it does work.
I have not been able to debug the code in Numba and it gives no error in python mode. And it does not show any particular error in the python console or pycharm run, it just stops.
The code that is commented is sure not to be part of the issue I am experiencing.
I would really like to been able to use Numba on this function because it has a 653X speed up, and this function will ran around 5k times, this would mean:
with Numba: 0.0015003681182861328s per run -> 7.5s total time
with Python: 0.9321613311767578s per run -> 1.3 hours total time
Using Numba is a BIG help in this particular issue, so I would appreciate any help, because normal python would not work for the application usage.
'Error example'
in Pycharm error:
Now
0.0
0.2295396327972412
[16]
[ 3 15]
[ 3 15]
[ 2 14]
[0 1]
[12 13]
Process finished with exit code -1073740940 (0xC0000374)
in Pycharm no error:
Now
0.0
0.2430422306060791
[16]
[ 3 15]
[ 3 15]
[ 2 14]
[0 1]
[12 13]
[ 4 11]
[ 4 11]
[ 4 11]
[ 6 10]
[ 6 10]
[8 9]
[5 7]
[[16, 3], [16, 15, 2, 0], [16, 15, 2, 1], [16, 15, 14, 13], [16, 15, 14, 12, 4], [16, 15, 14, 12, 11, 6], [16, 15, 14, 12, 11, 10, 9], [16, 15, 14, 12, 11, 10, 8, 5], [16, 15, 14, 12, 11, 10, 8, 7]]
0.0016527080535889
Process finished with exit code 0
Code
link to file: https://drive.google.com/file/d/1guAe1C2sKZyy2U2_qXAhMA1v46PfeKnN/view
import numpy as np
#from pypiper import RUT_5
import numba
def convert2(x, dtype=np.float64):
try:
# Try and convert x to a Numpy array. If this succeeds
# then we have reached the end of the nesting-depth.
y = np.asarray(x, dtype=dtype)
except:
# If the conversion to a Numpy array fails, then it can
# be because not all elements of x can be converted to
# the given dtype. There is currently no way to distinguish
# if this is because x is a nested list, or just a list
# of simple elements with incompatible types.
# Recursively call this function on all elements of x.
y = [convert2(x_, dtype=dtype) for x_ in x]
# Convert Python list to Numba list.
y = numba.typed.List(y)
return y
#numba.njit('(ListType(float64[:, ::1]), float64[:])')
def not_test(branches, outlet):
# get len of branches
_len_branches = len(branches)
# # empty array
# d_array = np.empty(shape=_len_branches, dtype=np.float64)
# # set outlet coordinates as arrays
# x_outlet, y_outlet = outlet
# x_outlet, y_outlet = np.array([x_outlet]), np.array([y_outlet])
#
# # get min distance from branches
# for pos in numba.prange(_len_branches):
# # get current branch
# branch = branches[pos]
# # get min distance from outlet point
# d_min = RUT_5.nb_cdist(branch, x_outlet, y_outlet).min()
# # add to array
# d_array[pos] = d_min
#
# #get index for minimun distance
# index_branch = np.argmin(d_array)
index_branch = 16
#remove initial branch
update_branches = branches.copy()
del update_branches[index_branch]
#define arrays
not_read = np.empty(shape=0, dtype=np.int64)
paths_update = [[np.int(x)] for x in range(0)]
points = np.empty(shape=(2, 2))
a_list = [np.int(x) for x in range(0)]
# avoid from loop
not_read = np.append(index_branch, not_read)
# iterable in loop
iterable = not_read.copy()
# conditions
cond = 0
cont = 0
while cond == 0:
for pos_idx in iterable:
print(iterable)
if cont > 0:
paths = paths_update.copy()
branch = branches[pos_idx]
points[0] = branch[0]
points[1] = branch[-1]
for point in points:
for pos_j in range(_len_branches):
if pos_j not in not_read:
diff = np.sum(point - branches[pos_j], axis=1)
if 0 in diff:
a_list.append(pos_j)
if cont == 0:
paths = [[pos_idx] + [i] for i in a_list]
paths_update = paths.copy()
cont = cont + 1
not_read = np.append(not_read, a_list)
iterable = np.array(a_list)
a_list = [np.int(x) for x in range(0)]
else:
if len(a_list):
path_arr = [_i for _i in paths if pos_idx in _i]
for path in path_arr:
for conexion in a_list:
temp_list = path.copy()
temp_list.append(conexion)
paths_update.append(temp_list)
paths_update.remove(path)
not_read = np.append(not_read, a_list)
iterable = np.array(a_list)
a_list = [np.int(x) for x in range(0)]
else:
continue
if len(branches) == len(np.unique(not_read)):
cond = 1
return paths
if __name__ == '__main__':
print('Now')
branches = np.load('test.npy', allow_pickle=True).item()
x_snap, y_snap = 717110.7843995667, 9669749.115011858
import time
t0 = time.time()
arr = []
for pos, branch in enumerate(branches.features):
arr.append(list(branch.geometry.coordinates))
print(time.time() - t0)
t0 = time.time()
arr = convert2(arr)
print(time.time() - t0)
t0 = time.time()
outlet = np.array([x_snap, y_snap])
print(not_test(branches=arr, outlet=outlet))
print(time.time() - t0)
This is not a real answer as it does not addresses the actual problem of the potential bug in the numba code, but it gets the job done.
It seems to be an issue when using the pop or remove list method in the code while using the #numba.njit decorator, this issue was reported and the developers are debugging it.
I ended up avoiding these methods, surely it is not ideal as it iterates over some paths it should not, but it is quite faster than normal python.
Code
#numba.njit('(ListType(float64[:, ::1]), float64[:])')
def not_test(branches, outlet):
# get len of branches
_len_branches = len(branches)
# # empty array
# d_array = np.empty(shape=_len_branches, dtype=np.float64)
# # set outlet coordinates as arrays
# x_outlet, y_outlet = outlet
# x_outlet, y_outlet = np.array([x_outlet]), np.array([y_outlet])
#
# # get min distance from branches
# for pos in numba.prange(_len_branches):
# # get current branch
# branch = branches[pos]
# # get min distance from outlet point
# d_min = RUT_5.nb_cdist(branch, x_outlet, y_outlet).min()
# # add to array
# d_array[pos] = d_min
#
# #get index for minimun distance
# index_branch = np.argmin(d_array)
index_branch = 16
#remove initial branch
update_branches = branches.copy()
del update_branches[index_branch]
#define arrays
not_read = np.empty(shape=0, dtype=np.int64)
paths_update = [[np.int(_)] for _ in range(0)]
paths_remove = [np.int(_) for _ in range(0)]
points = np.empty(shape=(2, 2))
a_list = [np.int(x) for x in range(0)]
diff = np.empty(shape=0, dtype=np.int64)
# avoid from loop
not_read = np.append(index_branch, not_read)
# iterable in loop
iterable = not_read.copy()
# conditions
cond = 0
cont = 0
while cond == 0:
for pos_idx in iterable:
if cont > 0:
paths = paths_update.copy()
branch = branches[pos_idx]
points[0] = branch[0]
points[1] = branch[-1]
for point in points:
for pos_j in numba.prange(_len_branches):
if pos_j not in not_read:
diff = np.sum(point - branches[pos_j], axis=1)
if len(diff[diff == 0]) > 0:
a_list.append(pos_j)
if cont == 0:
paths = [[pos_idx] + [i] for i in a_list]
paths_update = paths.copy()
cont = 1
not_read = np.append(not_read, a_list)
iterable = np.array(a_list)
a_list = [np.int(x) for x in range(0)]
else:
if len(a_list):
for pos, path in enumerate(paths):
if pos_idx in path:
for conexion in a_list:
temp_list = path.copy()
temp_list.append(conexion)
paths_update.append(temp_list)
paths_remove.append(pos)
not_read = np.append(not_read, a_list)
iterable = np.array(a_list)
a_list = [np.int(x) for x in range(0)]
if len(branches) == len(np.unique(not_read)):
cond = 1
paths = [_ for _i, _ in enumerate(paths) if _i not in paths_remove]
return paths

Variable array creation using numpy operations

I wish to create a variable array of numbers in numpy while skipping a chunk of numbers. For instance, If I have the variables:
m = 5
k = 3
num = 50
I want to create a linearly spaced numpy array starting at num and ending at num - k, skip k numbers and continue the array generation. Then repeat this process m times. For example, the above would yield:
np.array([50, 49, 48, 47, 44, 43, 42, 41, 38, 37, 36, 35, 32, 31, 30, 29, 26, 25, 24, 23])
How can I accomplish this via Numpy?
You can try:
import numpy as np
m = 5
k = 3
num = 50
np.hstack([np.arange(num - 2*i*k, num - (2*i+1)*k - 1, -1) for i in range(m)])
It gives:
array([50, 49, 48, 47, 44, 43, 42, 41, 38, 37, 36, 35, 32, 31, 30, 29, 26,
25, 24, 23])
Edit:
#JanChristophTerasa posted an answer (now deleted) that avoided Python loops by masking some elements of an array obtained using np.arange(). Here is a solution inspired by that idea. It works much faster than the above one:
import numpy as np
m = 5
k = 3
num = 50
x = np.arange(num, num - 2*k*m , -1).reshape(-1, 2*k)
x[:, :k+1].ravel()
We can use a mask and np.tile:
def mask_and_tile(m=5, k=3, num=50):
a = np.arange(num, num - 2 * m * k, -1) # create numbers
mask = np.ones(k * 2, dtype=bool) # create mask
mask[k+1:] = False # set appropriate elements to False
mask = np.tile(mask, m) # repeat mask m times
result = a[mask] # mask our numbers
return result
Or we can use a mask and just toggle the appropriate element:
def mask(m=5, k=3, num=50):
a = np.arange(num, num - 2 * m * k, -1) # create numbers
mask = np.ones_like(a, dtype=bool).reshape(-1, k)
mask[1::2] = False
mask[1::2, 0] = True
result = a[mask.flatten()]
return result
This will work fine:
import numpy as np
m = 5
k = 3
num = 50
h=0
x = np.array([])
for i in range(m):
x = np.append(x, range(num-h,num-h-k-1,-1))
h+=2*k
print(x)
Output
[50. 49. 48. 47. 44. 43. 42. 41. 38. 37. 36. 35. 32. 31. 30. 29. 26. 25.
24. 23.]
One way of doing this is making a 2D grid and calculating each number based on its position in the grid, then flattening it to a 1D array.
import numpy as np
num=50
m=5
k=3
# coordinates in a grid of width k+1 and height m
y, x = np.mgrid[:m, :k+1]
# a=[[50-0, 50-1, 50-2, 50-3], [50-0-2*3*1, 50-1-2*3*1, ...], [50-0-2*3*2...]...]
a = num - x - 2 * k * y
print(a.ravel())

numpy create array of the max of consecutive pairs in another array

I have a numpy array:
A = np.array([8, 2, 33, 4, 3, 6])
What I want is to create another array B where each element is the pairwise max of 2 consecutive pairs in A, so I get:
B = np.array([8, 33, 33, 4, 6])
Any ideas on how to implement?
Any ideas on how to implement this for more then 2 elements? (same thing but for consecutive n elements)
Edit:
The answers gave me a way to solve this question, but for the n-size window case, is there a more efficient way that does not require loops?
Edit2:
Turns out that the question is equivalent for asking how to perform 1d max-pooling of a list with a window of size n.
Does anyone know how to implement this efficiently?
One solution to the pairwise problem is using the np.maximum function and array slicing:
B = np.maximum(A[:-1], A[1:])
A loop-free solution is to use max on the windows created by skimage.util.view_as_windows:
list(map(max, view_as_windows(A, (2,))))
[8, 33, 33, 4, 6]
Copy/pastable example:
import numpy as np
from skimage.util import view_as_windows
A = np.array([8, 2, 33, 4, 3, 6])
list(map(max, view_as_windows(A, (2,))))
Here is an approach specifically taylored for larger windows. It is O(1) in window size and O(n) in data size.
I've done a pure numpy and a pythran implementation.
How do we achieve O(1) in window size? We use a "sawtooth" trick: If w is the window width we group the data into lots of w and for each group we do the cumulative maximum from left to right and from right to left. The elements of any in-between window distribute over two groups and the maxima of the intersections are among the cumulative maxima we have computed earlier. So we need a total of 3 comparisons per data point.
benchit (thanks #Divakar) for w=100; my functions are pp (numpy) and winmax (pythran):
For small window size w=5 the picture is more even. Interestingly, pythran still has a huge edge even for very small sizes. They must be doing something right to mimimze call overhead.
python code:
cummax = np.maximum.accumulate
def pp(a,w):
N = a.size//w
if a.size-w+1 > N*w:
out = np.empty(a.size-w+1,a.dtype)
out[:-1] = cummax(a[w*N-1::-1].reshape(N,w),axis=1).ravel()[:w-a.size-1:-1]
out[-1] = a[w*N:].max()
else:
out = cummax(a[w*N-1::-1].reshape(N,w),axis=1).ravel()[:w-a.size-2:-1]
out[1:N*w-w+1] = np.maximum(out[1:N*w-w+1],
cummax(a[w:w*N].reshape(N-1,w),axis=1).ravel())
out[N*w-w+1:] = np.maximum(out[N*w-w+1:],cummax(a[N*w:]))
return out
pythran version; compile with pythran -O3 <filename.py>; this creates a compiled module which you can import:
import numpy as np
# pythran export winmax(float[:],int)
# pythran export winmax(int[:],int)
def winmax(data,winsz):
N = data.size//winsz
if N < 1:
raise ValueError
out = np.empty(data.size-winsz+1,data.dtype)
nxt = winsz
for j in range(winsz,data.size):
if j == nxt:
nxt += winsz
out[j+1-winsz] = data[j]
else:
out[j+1-winsz] = out[j-winsz] if out[j-winsz]>data[j] else data[j]
running = data[-winsz:N*winsz].max()
nxt -= winsz << (nxt > data.size)
for j in range(data.size-winsz,0,-1):
if j == nxt:
nxt -= winsz
running = data[j-1]
else:
running = data[j] if data[j] > running else running
out[j] = out[j] if out[j] > running else running
out[0] = data[0] if data[0] > running else running
return out
In this Q&A, we are basically asking for sliding max values. This has been explored before - Max in a sliding window in NumPy array. Since, we are looking to be efficient, we can look further. One of those would be numba and here are two final variants I ended up with that leverage parallel directive that boosts performance over a without version :
import numpy as np
from numba import njit, prange
#njit(parallel=True)
def numba1(a, W):
L = len(a)-W+1
out = np.empty(L, dtype=a.dtype)
v = np.iinfo(a.dtype).min
for i in prange(L):
max1 = v
for j in range(W):
cur = a[i + j]
if cur>max1:
max1 = cur
out[i] = max1
return out
#njit(parallel=True)
def numba2(a, W):
L = len(a)-W+1
out = np.empty(L, dtype=a.dtype)
for i in prange(L):
for j in range(W):
cur = a[i + j]
if cur>out[i]:
out[i] = cur
return out
From the earlier linked Q&A, the equivalent SciPy version would be -
from scipy.ndimage.filters import maximum_filter1d
def scipy_max_filter1d(a, W):
L = len(a)-W+1
hW = W//2 # Half window size
return maximum_filter1d(a,size=W)[hW:hW+L]
Benchmarking
Other posted working approaches for generic window arg :
from skimage.util import view_as_windows
def rolling(a, window):
shape = (a.size - window + 1, window)
strides = (a.itemsize, a.itemsize)
return np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides)
# #mathfux's soln
def npmax_strided(a,n):
return np.max(rolling(a, n), axis=1)
# #Nicolas Gervais's soln
def mapmax_strided(a, W):
return list(map(max, view_as_windows(a,W)))
cummax = np.maximum.accumulate
def pp(a,w):
N = a.size//w
if a.size-w+1 > N*w:
out = np.empty(a.size-w+1,a.dtype)
out[:-1] = cummax(a[w*N-1::-1].reshape(N,w),axis=1).ravel()[:w-a.size-1:-1]
out[-1] = a[w*N:].max()
else:
out = cummax(a[w*N-1::-1].reshape(N,w),axis=1).ravel()[:w-a.size-2:-1]
out[1:N*w-w+1] = np.maximum(out[1:N*w-w+1],
cummax(a[w:w*N].reshape(N-1,w),axis=1).ravel())
out[N*w-w+1:] = np.maximum(out[N*w-w+1:],cummax(a[N*w:]))
return out
Using benchit package (few benchmarking tools packaged together; disclaimer: I am its author) to benchmark proposed solutions.
import benchit
funcs = [mapmax_strided, npmax_strided, numba1, numba2, scipy_max_filter1d, pp]
in_ = {(n,W):(np.random.randint(0,100,n),W) for n in 10**np.arange(2,6) for W in [2, 10, 20, 50, 100]}
t = benchit.timings(funcs, in_, multivar=True, input_name=['Array-length', 'Window-length'])
t.plot(logx=True, sp_ncols=1, save='timings.png')
So, numba ones are great for window sizes lower than 10, at which there's no clear winner and on larger window sizes pp wins with SciPy one at second spot.
In case there are consecutive n items, extended solution requires looping:
np.maximum(*[A[i:len(A)-n+i+1] for i in range(n)])
In order to avoid it you can use stride tricks and convert A to array of n-length blocks:
def rolling(a, window):
shape = (a.size - window + 1, window)
strides = (a.itemsize, a.itemsize)
return np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides)
For example:
>>> rolling(A, 3)
array([[ 8, 2, 8],
[ 2, 8, 33],
[ 8, 33, 33],
[33, 33, 4]])
After it's done you can kill it with np.max(rolling(A, n), axis=1).
Though, despite its elegance, neither this solution nor first one were not efficient because we apply repeatedly maximum on adjacent blocks that differs by two items only.
a recursive solution, for all of n
import numpy as np
import sys
def recursive(a: np.ndarray, n: int, b=None, level=2):
if n <= 0 or n > len(a):
raise ValueError(f'len(a):{len(a)} n:{n}')
if n == 1:
return a
if len(a) == n:
return np.max(a)
b = np.maximum(a[:-1], a[1:]) if b is None else np.maximum(a[level - 1:], b)
if n == level:
return b
return recursive(a, n, b[:-1], level + 1)
test_data = np.array([8, 2, 33, 4, 3, 6])
for test_n in range(1, len(test_data) + 2):
try:
print(recursive(test_data, n=test_n))
except ValueError as e:
sys.stderr.write(str(e))
output
[ 8 2 33 4 3 6]
[ 8 33 33 4 6]
[33 33 33 6]
[33 33 33]
[33 33]
33
len(a):6 n:7
about recursive function
You can observe the following data, and then you will know how to write the recursive function.
"""
np.array([8, 2, 33, 4, 3, 6])
n=2: (8, 2), (2, 33), (33, 4), (4, 3), (3, 6) => [8, 33, 33, 4, 6] => B' = [8, 33, 33, 4]
n=3: (8, 2, 33), (2, 33, 4), (33, 4, 3), (4, 3, 6) => B' [33, 4, 3, 6] => np.maximum([8, 33, 33, 4], [33, 4, 3, 6]) => 33, 33, 33, 6
...
"""
Using Pandas:
A = pd.Series([8, 2, 33, 4, 3, 6])
res = pd.concat([A,A.shift(-1)],axis=1).max(axis=1,skipna=False).dropna()
>>res
0 8.0
1 33.0
2 33.0
3 4.0
4 6.0
Or using numpy:
np.vstack([A[1:],A[:-1]]).max(axis=0)

NumPy indexing with varying position

I have an array input_data of shape (A, B, C), and an array ind of shape (B,). I want to loop through the B axis and take the sum of elements C[B[i]] and C[B[i]+1]. The desired output is of shape (A, B). I have the following code which works, but I feel is inefficient due to index-based looping through the B axis. Is there a more efficient method?
import numpy as np
input_data = np.random.rand(2, 6, 10)
ind = [ 2, 3, 5, 6, 5, 4 ]
out = np.zeros( ( input_data.shape[0], input_data.shape[1] ) )
for i in range( len(ind) ):
d = input_data[:, i, ind[i]:ind[i]+2]
out[:, i] = np.sum(d, axis = 1)
Edited based on Divakar's answer:
import timeit
import numpy as np
N = 1000
input_data = np.random.rand(10, N, 5000)
ind = ( 4999 * np.random.rand(N) ).astype(np.int)
def test_1(): # Old loop-based method
out = np.zeros( ( input_data.shape[0], input_data.shape[1] ) )
for i in range( len(ind) ):
d = input_data[:, i, ind[i]:ind[i]+2]
out[:, i] = np.sum(d, axis = 1)
return out
def test_2():
extent = 2 # Comes from 2 in "ind[i]:ind[i]+2"
m,n,r = input_data.shape
idx = (np.arange(n)*r + ind)[:,None] + np.arange(extent)
out1 = input_data.reshape(m,-1)[:,idx].reshape(m,n,-1).sum(2)
return out1
print timeit.timeit(stmt = test_1, number = 1000)
print timeit.timeit(stmt = test_2, number = 1000)
print np.all( test_1() == test_2(), keepdims = True )
>> 7.70429363482
>> 0.392034666757
>> [[ True]]
Here's a vectorized approach using linear indexing with some help from broadcasting. We merge the last two axes of the input array, calculate the linear indices corresponding to the last two axes, perform slicing and reshape back to a 3D shape. Finally, we do summation along the last axis to get the desired output. The implementation would look something like this -
extent = 2 # Comes from 2 in "ind[i]:ind[i]+2"
m,n,r = input_data.shape
idx = (np.arange(n)*r + ind)[:,None] + np.arange(extent)
out1 = input_data.reshape(m,-1)[:,idx].reshape(m,n,-1).sum(2)
If the extent is always going to be 2 as stated in the question - "... sum of elements C[B[i]] and C[B[i]+1]", then you could simply do -
m,n,r = input_data.shape
ind_arr = np.array(ind)
axis1_r = np.arange(n)
out2 = input_data[:,axis1_r,ind_arr] + input_data[:,axis1_r,ind_arr+1]
You could also use integer array indexing combined with basic slicing:
import numpy as np
m,n,r = 2, 6, 10
input_data = np.arange(2*6*10).reshape(m, n, r)
ind = np.array([ 2, 3, 5, 6, 5, 4 ])
out = np.zeros( ( input_data.shape[0], input_data.shape[1] ) )
for i in range( len(ind) ):
d = input_data[:, i, ind[i]:ind[i]+2]
out[:, i] = np.sum(d, axis = 1)
out2 = input_data[:, np.arange(n)[:,None], np.add.outer(ind,range(2))].sum(axis=-1)
print(out2)
# array([[ 5, 27, 51, 73, 91, 109],
# [125, 147, 171, 193, 211, 229]])
assert np.allclose(out, out2)

Categories