I would like to implement itertools.combinations for numpy. Based on this discussion, I have a function that works for 1D input:
def combs(a, r):
"""
Return successive r-length combinations of elements in the array a.
Should produce the same output as array(list(combinations(a, r))), but
faster.
"""
a = asarray(a)
dt = dtype([('', a.dtype)]*r)
b = fromiter(combinations(a, r), dt)
return b.view(a.dtype).reshape(-1, r)
and the output makes sense:
In [1]: list(combinations([1,2,3], 2))
Out[1]: [(1, 2), (1, 3), (2, 3)]
In [2]: array(list(combinations([1,2,3], 2)))
Out[2]:
array([[1, 2],
[1, 3],
[2, 3]])
In [3]: combs([1,2,3], 2)
Out[3]:
array([[1, 2],
[1, 3],
[2, 3]])
However, it would be best if I could expand it to N-D inputs, where additional dimensions simply allow you to speedily do multiple calls at once. So, conceptually, if combs([1, 2, 3], 2) produces [1, 2], [1, 3], [2, 3], and combs([4, 5, 6], 2) produces [4, 5], [4, 6], [5, 6], then combs((1,2,3) and (4,5,6), 2) should produce [1, 2], [1, 3], [2, 3] and [4, 5], [4, 6], [5, 6] where "and" just represents parallel rows or columns (whichever makes sense). (and likewise for additional dimensions)
I'm not sure:
How to make the dimensions work in a logical way that's consistent with the way other functions work (like how some numpy functions have an axis= parameter, and a default of axis 0. So probably axis 0 should be the one I am combining along, and all other axes just represent parallel calculations?)
How to get the above code to work with ND (right now I get ValueError: setting an array element with a sequence.)
Is there a better way to do dt = dtype([('', a.dtype)]*r)?
You can use itertools.combinations() to create the index array, and then use NumPy's fancy indexing:
import numpy as np
from itertools import combinations, chain
from scipy.special import comb
def comb_index(n, k):
count = comb(n, k, exact=True)
index = np.fromiter(chain.from_iterable(combinations(range(n), k)),
int, count=count*k)
return index.reshape(-1, k)
data = np.array([[1,2,3,4,5],[10,11,12,13,14]])
idx = comb_index(5, 3)
print(data[:, idx])
output:
[[[ 1 2 3]
[ 1 2 4]
[ 1 2 5]
[ 1 3 4]
[ 1 3 5]
[ 1 4 5]
[ 2 3 4]
[ 2 3 5]
[ 2 4 5]
[ 3 4 5]]
[[10 11 12]
[10 11 13]
[10 11 14]
[10 12 13]
[10 12 14]
[10 13 14]
[11 12 13]
[11 12 14]
[11 13 14]
[12 13 14]]]
Case k = 2: np.triu_indices
I've tested case k = 2 using lots of variations of abovementioned functions using perfplot. The winner is, no doubt, np.triu_indices and I see now that using np.dtype([('', np.intp)] * 2) data structure can be a huge boost even for exotic data types such as igraph.EdgeList.
from itertools import combinations, chain
from scipy.special import comb
import igraph as ig #graph library build on C
import networkx as nx #graph library, pure Python
def _combs(n):
return np.array(list(combinations(range(n),2)))
def _combs_fromiter(n): ##Jaime
indices = np.arange(n)
dt = np.dtype([('', np.intp)]*2)
indices = np.fromiter(combinations(indices, 2), dt)
indices = indices.view(np.intp).reshape(-1, 2)
return indices
def _combs_fromiterplus(n):
dt = np.dtype([('', np.intp)]*2)
indices = np.fromiter(combinations(range(n), 2), dt)
indices = indices.view(np.intp).reshape(-1, 2)
return indices
def _numpy(n): ##endolith
return np.transpose(np.triu_indices(n,1))
def _igraph(n):
return np.array(ig.Graph(n).complementer(False).get_edgelist())
def _igraph_fromiter(n):
dt = np.dtype([('', np.intp)]*2)
indices = np.fromiter(ig.Graph(n).complementer(False).get_edgelist(), dt)
indices = indices.view(np.intp).reshape(-1, 2)
return indices
def _nx(n):
G = nx.Graph()
G.add_nodes_from(range(n))
return np.array(list(nx.complement(G).edges))
def _nx_fromiter(n):
G = nx.Graph()
G.add_nodes_from(range(n))
dt = np.dtype([('', np.intp)]*2)
indices = np.fromiter(nx.complement(G).edges, dt)
indices = indices.view(np.intp).reshape(-1, 2)
return indices
def _comb_index(n): ##HYRY
count = comb(n, 2, exact=True)
index = np.fromiter(chain.from_iterable(combinations(range(n), 2)),
int, count=count*2)
return index.reshape(-1, 2)
fig = plt.figure(figsize=(15, 10))
plt.grid(True, which="both")
out = perfplot.bench(
setup = lambda x: x,
kernels = [_numpy, _combs, _combs_fromiter, _combs_fromiterplus,
_comb_index, _igraph, _igraph_fromiter, _nx, _nx_fromiter],
n_range = [2 ** k for k in range(12)],
xlabel = 'combinations(n, 2)',
title = 'testing combinations',
show_progress = False,
equality_check = False)
out.show()
Wondering why np.triu_indices can't be extended to more dimensions?
Case 2 ≤ k ≤ 4: triu_indices(implemented here) = up to 2x speedup
np.triu_indices could actually be a winner for case k = 3 and even k = 4 if we implement a generalised method instead. A current version of this method is equivalent of:
def triu_indices(n, k):
x = np.less.outer(np.arange(n), np.arange(-k+1, n-k+1))
return np.nonzero(x)
It constructs matrix representation of a relation x < y for two sequences 0,1,...,n-1 and finds locations of cells where they are not zero. For 3D case we need to add extra dimension and intersect relations x < y and y < z. For next dimensions procedure is the same but this gets a huge memory overload since n^k binary cells are needed and only C(n, k) of them attains True values. Memory usage and performance grows by O(n!) so this algorithm outperformans itertools.combinations only for small values of k. This is best to use actually for case k=2 and k=3
def C(n, k): #huge memory overload...
if k==0:
return np.array([])
if k==1:
return np.arange(1,n+1)
elif k==2:
return np.less.outer(np.arange(n), np.arange(n))
else:
x = C(n, k-1)
X = np.repeat(x[None, :, :], len(x), axis=0)
Y = np.repeat(x[:, :, None], len(x), axis=2)
return X&Y
def C_indices(n, k):
return np.transpose(np.nonzero(C(n,k)))
Let's checkout with perfplot:
import matplotlib.pyplot as plt
import numpy as np
import perfplot
from itertools import chain, combinations
from scipy.special import comb
def C(n, k): # huge memory overload...
if k == 0:
return np.array([])
if k == 1:
return np.arange(1, n + 1)
elif k == 2:
return np.less.outer(np.arange(n), np.arange(n))
else:
x = C(n, k - 1)
X = np.repeat(x[None, :, :], len(x), axis=0)
Y = np.repeat(x[:, :, None], len(x), axis=2)
return X & Y
def C_indices(data):
n, k = data
return np.transpose(np.nonzero(C(n, k)))
def comb_index(data):
n, k = data
count = comb(n, k, exact=True)
index = np.fromiter(chain.from_iterable(combinations(range(n), k)),
int, count=count * k)
return index.reshape(-1, k)
def build_args(k):
return {'setup': lambda x: (x, k),
'kernels': [comb_index, C_indices],
'n_range': [2 ** x for x in range(2, {2: 10, 3:10, 4:7, 5:6}[k])],
'xlabel': f'N',
'title': f'test of case C(N,{k})',
'show_progress': True,
'equality_check': lambda x, y: np.array_equal(x, y)}
outs = [perfplot.bench(**build_args(n)) for n in (2, 3, 4, 5)]
fig = plt.figure(figsize=(20, 20))
for i in range(len(outs)):
ax = fig.add_subplot(2, 2, i + 1)
ax.grid(True, which="both")
outs[i].plot()
plt.show()
So the best performance boost is achieved for k=2 (equivalent to np.triu_indices) and for k=3` it's faster almost twice.
Case k > 3: numpy_combinations(implemented here) = up to 2.5x speedup
Following this question (thanks #Divakar) I managed to find a way to calculate values of specific column based on previous column and Pascal's triangle. It's not optimized yet as much as it could but results are really promising. Here we go:
from scipy.linalg import pascal
def stretch(a, k):
l = a.sum()+len(a)*(-k)
out = np.full(l, -1, dtype=int)
out[0] = a[0]-1
idx = (a-k).cumsum()[:-1]
out[idx] = a[1:]-1-k
return out.cumsum()
def numpy_combinations(n, k):
#n, k = data #benchmark version
n, k = data
x = np.array([n])
P = pascal(n).astype(int)
C = []
for b in range(k-1,-1,-1):
x = stretch(x, b)
r = P[b][x - b]
C.append(np.repeat(x, r))
return n - 1 - np.array(C).T
And the benchmark results are:
# script is the same as in previous example except this part
def build_args(k):
return {'setup': lambda x: (k, x),
'kernels': [comb_index, numpy_combinations],
'n_range': [x for x in range(1, k)],
'xlabel': f'N',
'title': f'test of case C({k}, k)',
'show_progress': True,
'equality_check': False}
outs = [perfplot.bench(**build_args(n)) for n in (12, 15, 17, 23, 25, 28)]
fig = plt.figure(figsize=(20, 20))
for i in range(len(outs)):
ax = fig.add_subplot(2, 3, i + 1)
ax.grid(True, which="both")
outs[i].plot()
plt.show()
Despite it still can't fight with itertools.combinations for n < 15 but it is a new winner in other cases. Last but not least, numpy demonstrates its power when amount of combinations gets reaaallly big. It was able to survive while processing C(28, 14) combinations which is around 40'000'000 items of size 14
When r = k = 2, you can also use numpy.triu_indices(n, 1) which indexes upper triangle of a matrix.
idx = comb_index(5, 2)
from HYRY's answer is equivalent to
idx = np.transpose(np.triu_indices(5, 1))
but built-in, and a few times faster for N above ~20:
timeit comb_index(1000, 2)
32.3 ms ± 443 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
timeit np.transpose(np.triu_indices(1000, 1))
10.2 ms ± 25.2 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
Not sure how it will work out performance-wise, but you can do the combinations on an index array, then extract the actual array slices with np.take:
def combs_nd(a, r, axis=0):
a = np.asarray(a)
if axis < 0:
axis += a.ndim
indices = np.arange(a.shape[axis])
dt = np.dtype([('', np.intp)]*r)
indices = np.fromiter(combinations(indices, r), dt)
indices = indices.view(np.intp).reshape(-1, r)
return np.take(a, indices, axis=axis)
>>> combs_nd([1,2,3], 2)
array([[1, 2],
[1, 3],
[2, 3]])
>>> combs_nd([[1,2,3],[4,5,6]], 2, axis=1)
array([[[1, 2],
[1, 3],
[2, 3]],
[[4, 5],
[4, 6],
[5, 6]]])
Related
I want to "multiply" (for lack of better description) a numpy array X of size M with a smaller numpy array Y of size N, for every N elements in X. Then, I want to sum the resulting array (almost like a dotproduct).
I hope the example makes it more clear:
Example
X = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Y = [1,2,3]
Z = mymul(X, Y)
= [0*1, 1*2, 2*3, 3*1, 4*2, 5*3, 6*1, 7*2, 8*3, 9*1]
= [ 0, 2, 6, 3, 8, 15, 6, 14, 24, 9]
result = sum(Z) = 87
X and Y can be of varying lengths and Y is always smaller than X, but not necessarily divisible (e.g. M % N != 0)
I have some solutions but they are quite slow. I'm hoping there is a faster way to do this.
import numpy as np
X = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=int)
Y = np.array([1,2,3], dtype=int)
# these work but are slow for large X, Y
# simple for-loop
t = 0
for i in range(len(X)):
t += X[i] * Y[i % len(Y)]
print(t) #87
# extend Y M/N times so np.dot can be applied
Ytiled = np.tile(Y, int(np.ceil(len(X) / len(Y))))[:len(X)]
t = np.dot(X, Ytiled)
print(t) #87
Resize Y to same length as X and then use matrix-multiplication -
In [52]: np.dot(X, np.resize(Y,len(X)))
Out[52]: 87
Alternative to using np.resize would be with tiling. Hence, np.tile(Y,(m+n-1)//n)[:m] for m,n = len(X), len(Y), could replace np.resize(Y,len(X)) for a faster one.
Another without resizing Y to achieve memory-efficiency -
In [79]: m,n = len(X), len(Y)
In [80]: s = n*(m//n)
In [81]: X2D = X[:s].reshape(-1,n)
In [82]: X2D.dot(Y).sum() + np.dot(X[s:],Y[:m-s])
Out[82]: 87
Alternatively, we can use np.einsum('ij,j->',X2D,Y) to replace X2D.dot(Y).sum().
You can use convolve (documentation):
np.convolve(X, Y[::-1], 'same')[::len(Y)].sum()
Remember to reverse the second array.
Setup
Suppose I have
bins = np.array([0, 0, 1, 1, 2, 2, 2, 0, 1, 2])
vals = np.array([8, 7, 3, 4, 1, 2, 6, 5, 0, 9])
k = 3
I need the position of maximal values by unique bin in bins.
# Bin == 0
# ↓ ↓ ↓
# [0 0 1 1 2 2 2 0 1 2]
# [8 7 3 4 1 2 6 5 0 9]
# ↑ ↑ ↑
# ⇧
# [0 1 2 3 4 5 6 7 8 9]
# Maximum is 8 and happens at position 0
(vals * (bins == 0)).argmax()
0
# Bin == 1
# ↓ ↓ ↓
# [0 0 1 1 2 2 2 0 1 2]
# [8 7 3 4 1 2 6 5 0 9]
# ↑ ↑ ↑
# ⇧
# [0 1 2 3 4 5 6 7 8 9]
# Maximum is 4 and happens at position 3
(vals * (bins == 1)).argmax()
3
# Bin == 2
# ↓ ↓ ↓ ↓
# [0 0 1 1 2 2 2 0 1 2]
# [8 7 3 4 1 2 6 5 0 9]
# ↑ ↑ ↑ ↑
# ⇧
# [0 1 2 3 4 5 6 7 8 9]
# Maximum is 9 and happens at position 9
(vals * (bins == 2)).argmax()
9
Those functions are hacky and aren't even generalizable for negative values.
Question
How do I get all such values in the most efficient manner using Numpy?
What I've tried.
def binargmax(bins, vals, k):
out = -np.ones(k, np.int64)
trk = np.empty(k, vals.dtype)
trk.fill(np.nanmin(vals) - 1)
for i in range(len(bins)):
v = vals[i]
b = bins[i]
if v > trk[b]:
trk[b] = v
out[b] = i
return out
binargmax(bins, vals, k)
array([0, 3, 9])
LINK TO TESTING AND VALIDATION
The numpy_indexed library:
I know this isn't technically numpy, but the numpy_indexed library has a vectorized group_by function which is perfect for this, just wanted to share as an alternative I use frequently:
>>> import numpy_indexed as npi
>>> npi.group_by(bins).argmax(vals)
(array([0, 1, 2]), array([0, 3, 9], dtype=int64))
Using a simple pandas groupby and idxmax:
df = pd.DataFrame({'bins': bins, 'vals': vals})
df.groupby('bins').vals.idxmax()
Using a sparse.csr_matrix
This option is very fast on very large inputs.
sparse.csr_matrix(
(vals, bins, np.arange(vals.shape[0]+1)), (vals.shape[0], k)
).argmax(0)
# matrix([[0, 3, 9]])
Performance
Functions
def chris(bins, vals, k):
return npi.group_by(bins).argmax(vals)
def chris2(df):
return df.groupby('bins').vals.idxmax()
def chris3(bins, vals, k):
sparse.csr_matrix((vals, bins, np.arange(vals.shape[0] + 1)), (vals.shape[0], k)).argmax(0)
def divakar(bins, vals, k):
mx = vals.max()+1
sidx = bins.argsort()
sb = bins[sidx]
sm = np.r_[sb[:-1] != sb[1:],True]
argmax_out = np.argsort(bins*mx + vals)[sm]
max_out = vals[argmax_out]
return max_out, argmax_out
def divakar2(bins, vals, k):
last_idx = np.bincount(bins).cumsum()-1
scaled_vals = bins*(vals.max()+1) + vals
argmax_out = np.argsort(scaled_vals)[last_idx]
max_out = vals[argmax_out]
return max_out, argmax_out
def user545424(bins, vals, k):
return np.argmax(vals*(bins == np.arange(bins.max()+1)[:,np.newaxis]),axis=-1)
def user2699(bins, vals, k):
res = []
for v in np.unique(bins):
idx = (bins==v)
r = np.where(idx)[0][np.argmax(vals[idx])]
res.append(r)
return np.array(res)
def sacul(bins, vals, k):
return np.lexsort((vals, bins))[np.append(np.diff(np.sort(bins)), 1).astype(bool)]
#njit
def piRSquared(bins, vals, k):
out = -np.ones(k, np.int64)
trk = np.empty(k, vals.dtype)
trk.fill(np.nanmin(vals))
for i in range(len(bins)):
v = vals[i]
b = bins[i]
if v > trk[b]:
trk[b] = v
out[b] = i
return out
Setup
import numpy_indexed as npi
import numpy as np
import pandas as pd
from timeit import timeit
import matplotlib.pyplot as plt
from numba import njit
from scipy import sparse
res = pd.DataFrame(
index=['chris', 'chris2', 'chris3', 'divakar', 'divakar2', 'user545424', 'user2699', 'sacul', 'piRSquared'],
columns=[10, 50, 100, 500, 1000, 5000, 10000, 50000, 100000, 500000],
dtype=float
)
k = 5
for f in res.index:
for c in res.columns:
bins = np.random.randint(0, k, c)
k = 5
vals = np.random.rand(c)
df = pd.DataFrame({'bins': bins, 'vals': vals})
stmt = '{}(df)'.format(f) if f in {'chris2'} else '{}(bins, vals, k)'.format(f)
setp = 'from __main__ import bins, vals, k, df, {}'.format(f)
res.at[f, c] = timeit(stmt, setp, number=50)
ax = res.div(res.min()).T.plot(loglog=True)
ax.set_xlabel("N");
ax.set_ylabel("time (relative)");
plt.show()
Results
Results with a much larger k (This is where broadcasting gets hit hard):
res = pd.DataFrame(
index=['chris', 'chris2', 'chris3', 'divakar', 'divakar2', 'user545424', 'user2699', 'sacul', 'piRSquared'],
columns=[10, 50, 100, 500, 1000, 5000, 10000, 50000, 100000, 500000],
dtype=float
)
k = 500
for f in res.index:
for c in res.columns:
bins = np.random.randint(0, k, c)
vals = np.random.rand(c)
df = pd.DataFrame({'bins': bins, 'vals': vals})
stmt = '{}(df)'.format(f) if f in {'chris2'} else '{}(bins, vals, k)'.format(f)
setp = 'from __main__ import bins, vals, df, k, {}'.format(f)
res.at[f, c] = timeit(stmt, setp, number=50)
ax = res.div(res.min()).T.plot(loglog=True)
ax.set_xlabel("N");
ax.set_ylabel("time (relative)");
plt.show()
As is apparent from the graphs, broadcasting is a nifty trick when the number of groups is small, however the time complexity/memory of broadcasting increases too fast at higher k values to make it highly performant.
Here's one way by offsetting each group data so that we could use argsort on the entire data in one go -
def binargmax_scale_sort(bins, vals):
w = np.bincount(bins)
valid_mask = w!=0
last_idx = w[valid_mask].cumsum()-1
scaled_vals = bins*(vals.max()+1) + vals
#unique_bins = np.flatnonzero(valid_mask) # if needed
return len(bins) -1 -np.argsort(scaled_vals[::-1], kind='mergesort')[last_idx]
Okay, here's my linear-time entry, using only indexing and np.(max|min)inum.at. It assumes bins go up from 0 to max(bins).
def via_at(bins, vals):
max_vals = np.full(bins.max()+1, -np.inf)
np.maximum.at(max_vals, bins, vals)
expanded = max_vals[bins]
max_idx = np.full_like(max_vals, np.inf)
np.minimum.at(max_idx, bins, np.where(vals == expanded, np.arange(len(bins)), np.inf))
return max_vals, max_idx
How about this:
>>> import numpy as np
>>> bins = np.array([0, 0, 1, 1, 2, 2, 2, 0, 1, 2])
>>> vals = np.array([8, 7, 3, 4, 1, 2, 6, 5, 0, 9])
>>> k = 3
>>> np.argmax(vals*(bins == np.arange(k)[:,np.newaxis]),axis=-1)
array([0, 3, 9])
If you're going for readability, this might not be the best solution, but I think it works
def binargsort(bins,vals):
s = np.lexsort((vals,bins))
s2 = np.sort(bins)
msk = np.roll(s2,-1) != s2
# or use this for msk, but not noticeably better for performance:
# msk = np.append(np.diff(np.sort(bins)),1).astype(bool)
return s[msk]
array([0, 3, 9])
Explanation:
lexsort sorts the indices of vals according to the sorted order of bins, then by the order of vals:
>>> np.lexsort((vals,bins))
array([7, 1, 0, 8, 2, 3, 4, 5, 6, 9])
So then you can mask that by where sorted bins differ from one index to the next:
>>> np.sort(bins)
array([0, 0, 0, 1, 1, 1, 2, 2, 2, 2])
# Find where sorted bins end, use that as your mask on the `lexsort`
>>> np.append(np.diff(np.sort(bins)),1)
array([0, 0, 1, 0, 0, 1, 0, 0, 0, 1])
>>> np.lexsort((vals,bins))[np.append(np.diff(np.sort(bins)),1).astype(bool)]
array([0, 3, 9])
This is a fun little problem to solve. My approach is to to get an index into vals based on the values in bins. Using where to get the points where the index is True in combination with argmax on those points in vals gives the resulting value.
def binargmaxA(bins, vals):
res = []
for v in unique(bins):
idx = (bins==v)
r = where(idx)[0][argmax(vals[idx])]
res.append(r)
return array(res)
It's possible to remove the call to unique by using range(k) to get possible bin values. This speeds things up, but still leaves it with poor performance as the size of k increases.
def binargmaxA2(bins, vals, k):
res = []
for v in range(k):
idx = (bins==v)
r = where(idx)[0][argmax(vals[idx])]
res.append(r)
return array(res)
Last try, comparing each value slows things down substantially. This version computes the sorted array of values, rather than making a comparison for each unique value. Well, it actually computes the sorted indices and only gets the sorted values when needed, as that avoids one time loading vals into memory. Performance still scales with the number of bins, but much slower than before.
def binargmaxB(bins, vals):
idx = argsort(bins) # Find sorted indices
split = r_[0, where(diff(bins[idx]))[0]+1, len(bins)] # Compute where values start in sorted array
newmax = [argmax(vals[idx[i1:i2]]) for i1, i2 in zip(split, split[1:])] # Find max for each value in sorted array
return idx[newmax +split[:-1]] # Convert to indices in unsorted array
Benchmarks
Here's some benchmarks with the other answers.
3000 elements
With a somewhat larger dataset (bins = randint(0, 30, 3000); vals = randn(3000); k=30;)
171us binargmax_scale_sort2 by Divakar
209us this answer, version B
281us binargmax_scale_sort by Divakar
329us broadcast version by user545424
399us this answer, version A
416us answer by sacul, using lexsort
899us reference code by piRsquared
30000 elements
And an even larger dataset (bins = randint(0, 30, 30000); vals = randn(30000); k=30). Surprisingly this doesn't change the relative performance between solutions.
1.27ms this answer, version B
2.01ms binargmax_scale_sort2 by Divakar
2.38ms broadcast version by user545424
2.68ms this answer, version A
5.71ms answer by sacul, using lexsort
9.12ms reference code by piRSquared
Edit I didn't change k with the increasing number of possible bin values, now that I've fixed that the benchmarks are more even.
1000 bin values
Increasing the number unique bin values may also have an impact on performance. The solutions by Divakar and sacul are mostly unaffected, while the others have quite a substantial impact.
bins = randint(0, 1000, 30000); vals = randn(30000); k = 1000
1.99ms binargmax_scale_sort2 by Divakar
3.48ms this answer, version B
6.15ms answer by sacul, using lexsort
10.6ms reference code by piRsquared
27.2ms this answer, version A
129ms broadcast version by user545424
Edit Including benchmarks for the reference code in the question, it's surprisingly competitive especially with more bins.
I know you said to use Numpy, but if Pandas is acceptable:
import numpy as np; import pandas as pd;
(pd.DataFrame(
{'bins':np.array([0, 0, 1, 1, 2, 2, 2, 0, 1, 2]),
'values':np.array([8, 7, 3, 4, 1, 2, 6, 5, 0, 9])})
.groupby('bins')
.idxmax())
values
bins
0 0
1 3
2 9
I am not sure what the title of this question should be. But lets say we have 2 arrays, values and distances.
values = np.array([[-1,-1,-1],
[1, 2, 0],
[-1,-1,-1]])
distances = np.array([[1,2,3],
[6,5,4],
[7,8,9]])
I would like to get the values that are non negative, and have them in order by its corresponding distance, based on the distances array.
So with the example above, the positive values are [1,2,0] and its distances will be [6,5,4]. Thus, if sorting by its corresponding distance, I would like to have [0,2,1] as the answer.
My code is below. It works, but would like to have the solution of just using numpy. Im sure that would be more efficient than this:
import numpy as np
import heapq
def get_sorted_values(seek_val, values, distances):
r, c = np.where(values >= seek_val)
di = distances[r, c]
vals = values[r, c]
print("di", di)
print("vals", vals)
if len(di) >= 1:
heap = []
for d, v in zip(di,vals):
heapq.heappush(heap, (d,v))
lists = []
while heap:
d, v = heapq.heappop(heap)
lists.append(v)
return lists
else:
## NOTHING FOUND
return None
Input:
seek_val = 0
values = np.array([[-1,-1,-1],
[1,2,0],
[-1,-1,-1]])
distances = np.array([[1,2,3],
[6,5,4],
[7,8,9]])
print("Ans:",get_sorted_values(seek_val, values, distances))
Output:
di [6 5 4]
vals [1 2 0]
Ans: [0, 2, 1]
"one liner":
values[np.where(values >= 0)][np.argsort(distances[np.where(values >= 0)])]
Out[981]: array([0, 2, 1])
repeating np.where(values >= 0) is inefficient, could make a variable if values is big
v_indx = np.where(values >= 0)
values[v_indx][np.argsort(distances[v_indx])]
Try np.argsort
import numpy as np
values = np.array([[-1,-1,-1],
[ 1, 2, 0],
[-1,-1,-1]])
distances = np.array([[1, 2, 3],
[6, 5, 4],
[7, 8, 9]])
print(values[values >= 0])
# [1 2 0]
print(distances[values >= 0])
# [6 5 4]
print('Ans:', values[values >= 0][np.argsort(distances[values >= 0])])
# Ans: [0 2 1]
Let's say I have 2 one-dimensional (1D) numpy arrays, a and b, with lengths n1 and n2 respectively. I also have a function, F(x,y), that takes two values. Now I want to apply that function to each pair of values from my two 1D arrays, so the result would be a 2D numpy array with shape n1, n2. The i, j element of the two-dimensional array would be F(a[i], b[j]).
I haven't been able to find a way of doing this without a horrible amount of for-loops, and I'm sure there's a much simpler (and faster!) way of doing this in numpy.
Thanks in advance!
You can use numpy broadcasting to do calculation on the two arrays, turning a into a vertical 2D array using newaxis:
In [11]: a = np.array([1, 2, 3]) # n1 = 3
...: b = np.array([4, 5]) # n2 = 2
...: #if function is c(i, j) = a(i) + b(j)*2:
...: c = a[:, None] + b*2
In [12]: c
Out[12]:
array([[ 9, 11],
[10, 12],
[11, 13]])
To benchmark:
In [28]: a = arange(100)
In [29]: b = arange(222)
In [30]: timeit r = np.array([[f(i, j) for j in b] for i in a])
10 loops, best of 3: 29.9 ms per loop
In [31]: timeit c = a[:, None] + b*2
10000 loops, best of 3: 71.6 us per loop
If F is beyond your control, you can wrap it automatically to be "vector-aware" by using numpy.vectorize. I present a working example below where I define my own F just for completeness. This approach has the simplicity advantage, but if you have control over F, rewriting it with a bit of care to vectorize correctly can have huge speed benefits
import numpy
n1 = 100
n2 = 200
a = numpy.arange(n1)
b = numpy.arange(n2)
def F(x, y):
return x + y
# Everything above this is setup, the answer to your question lies here:
fv = numpy.vectorize(F)
r = fv(a[:, numpy.newaxis], b)
On my computer, the following timings are found, showing the price you pay for "automatic" vectorisation:
%timeit fv(a[:, numpy.newaxis], b)
100 loops, best of 3: 3.58 ms per loop
%timeit F(a[:, numpy.newaxis], b)
10000 loops, best of 3: 38.3 µs per loop
If F() works with broadcast arguments, definitely use that, as others describe.
An alternative is to use
np.fromfunction
(function_on_an_int_grid would be a better name.)
The following just maps the int grid to your a-b grid, then into F():
import numpy as np
def func_allpairs( F, a, b ):
""" -> array len(a) x len(b):
[[ F( a0 b0 ) F( a0 b1 ) ... ]
[ F( a1 b0 ) F( a1 b1 ) ... ]
...
]
"""
def fab( i, j ):
return F( a[i], b[j] ) # F scalar or vec, e.g. gradient
return np.fromfunction( fab, (len(a), len(b)), dtype=int ) # -> fab( all pairs )
#...............................................................................
def F( x, y ):
return x + 10*y
a = np.arange( 100 )
b = np.arange( 222 )
A = func_allpairs( F, a, b )
# %timeit: 1000 loops, best of 3: 241 µs per loop -- imac i5, np 1.9.3
As another alternative that's a bit more extensible than the dot-product, in less than 1/5th - 1/9th the time of nested list comprehensions, use numpy.newaxis (took a bit more digging to find):
>>> import numpy
>>> a = numpy.array([0,1,2])
>>> b = numpy.array([0,1,2,3])
This time, using the power function:
>>> pow(a[:,numpy.newaxis], b)
array([[1, 0, 0, 0],
[1, 1, 1, 1],
[1, 2, 4, 8]])
Compared with an alternative:
>>> numpy.array([[pow(i,j) for j in b] for i in a])
array([[1, 0, 0, 0],
[1, 1, 1, 1],
[1, 2, 4, 8]])
And comparing the timing:
>>> import timeit
>>> timeit.timeit('numpy.array([[pow(i,j) for i in a] for j in b])', 'import numpy; a=numpy.arange(3); b=numpy.arange(4)')
31.943181037902832
>>> timeit.timeit('pow(a[:, numpy.newaxis], b)', 'import numpy; a=numpy.arange(3); b=numpy.arange(4)')
5.985810041427612
>>> timeit.timeit('numpy.array([[pow(i,j) for i in a] for j in b])', 'import numpy; a=numpy.arange(10); b=numpy.arange(10)')
109.74687385559082
>>> timeit.timeit('pow(a[:, numpy.newaxis], b)', 'import numpy; a=numpy.arange(10); b=numpy.arange(10)')
11.989138126373291
You could use list comprehensions to create an array of arrays:
import numpy as np
# Arrays
a = np.array([1, 2, 3]) # n1 = 3
b = np.array([4, 5]) # n2 = 2
# Your function (just an example)
def f(i, j):
return i + j
result = np.array([[f(i, j)for j in b ]for i in a])
print result
Output:
[[5 6]
[6 7]
[7 8]]
May I suggest, if your use-case is more limited to products, that you use the outer-product?
e.g.:
import numpy
a = array([0, 1, 2])
b = array([0, 1, 2, 3])
numpy.outer(a,b)
returns
array([[0, 0, 0, 0],
[0, 1, 2, 3],
[0, 2, 4, 6]])
You can then apply other transformations:
numpy.outer(a,b) + 1
returns
array([[1, 1, 1, 1],
[1, 2, 3, 4],
[1, 3, 5, 7]])
This is much faster:
>>> import timeit
>>> timeit.timeit('numpy.array([[i*j for i in a] for j in b])', 'import numpy; a=numpy.arange(3); b=numpy.arange(4)')
31.79583477973938
>>> timeit.timeit('numpy.outer(a,b)', 'import numpy; a=numpy.arange(3); b=numpy.arange(4)')
9.351550102233887
>>> timeit.timeit('numpy.outer(a,b)+1', 'import numpy; a=numpy.arange(3); b=numpy.arange(4)')
12.308301210403442
Is there a way to slice a 2d array in numpy into smaller 2d arrays?
Example
[[1,2,3,4], -> [[1,2] [3,4]
[5,6,7,8]] [5,6] [7,8]]
So I basically want to cut down a 2x4 array into 2 2x2 arrays. Looking for a generic solution to be used on images.
There was another question a couple of months ago which clued me in to the idea of using reshape and swapaxes. The h//nrows makes sense since this keeps the first block's rows together. It also makes sense that you'll need nrows and ncols to be part of the shape. -1 tells reshape to fill in whatever number is necessary to make the reshape valid. Armed with the form of the solution, I just tried things until I found the formula that works.
You should be able to break your array into "blocks" using some combination of reshape and swapaxes:
def blockshaped(arr, nrows, ncols):
"""
Return an array of shape (n, nrows, ncols) where
n * nrows * ncols = arr.size
If arr is a 2D array, the returned array should look like n subblocks with
each subblock preserving the "physical" layout of arr.
"""
h, w = arr.shape
assert h % nrows == 0, f"{h} rows is not evenly divisible by {nrows}"
assert w % ncols == 0, f"{w} cols is not evenly divisible by {ncols}"
return (arr.reshape(h//nrows, nrows, -1, ncols)
.swapaxes(1,2)
.reshape(-1, nrows, ncols))
turns c
np.random.seed(365)
c = np.arange(24).reshape((4, 6))
print(c)
[out]:
[[ 0 1 2 3 4 5]
[ 6 7 8 9 10 11]
[12 13 14 15 16 17]
[18 19 20 21 22 23]]
into
print(blockshaped(c, 2, 3))
[out]:
[[[ 0 1 2]
[ 6 7 8]]
[[ 3 4 5]
[ 9 10 11]]
[[12 13 14]
[18 19 20]]
[[15 16 17]
[21 22 23]]]
I've posted an inverse function, unblockshaped, here, and an N-dimensional generalization here. The generalization gives a little more insight into the reasoning behind this algorithm.
Note that there is also superbatfish's
blockwise_view. It arranges the
blocks in a different format (using more axes) but it has the advantage of (1)
always returning a view and (2) being capable of handling arrays of any
dimension.
It seems to me that this is a task for numpy.split or some variant.
e.g.
a = np.arange(30).reshape([5,6]) #a.shape = (5,6)
a1 = np.split(a,3,axis=1)
#'a1' is a list of 3 arrays of shape (5,2)
a2 = np.split(a, [2,4])
#'a2' is a list of three arrays of shape (2,5), (2,5), (1,5)
If you have a NxN image you can create, e.g., a list of 2 NxN/2 subimages, and then divide them along the other axis.
numpy.hsplit and numpy.vsplit are also available.
There are some other answers that seem well-suited for your specific case already, but your question piqued my interest in the possibility of a memory-efficient solution usable up to the maximum number of dimensions that numpy supports, and I ended up spending most of the afternoon coming up with possible method. (The method itself is relatively simple, it's just that I still haven't used most of the really fancy features that numpy supports so most of the time was spent researching to see what numpy had available and how much it could do so that I didn't have to do it.)
def blockgen(array, bpa):
"""Creates a generator that yields multidimensional blocks from the given
array(_like); bpa is an array_like consisting of the number of blocks per axis
(minimum of 1, must be a divisor of the corresponding axis size of array). As
the blocks are selected using normal numpy slicing, they will be views rather
than copies; this is good for very large multidimensional arrays that are being
blocked, and for very large blocks, but it also means that the result must be
copied if it is to be modified (unless modifying the original data as well is
intended)."""
bpa = np.asarray(bpa) # in case bpa wasn't already an ndarray
# parameter checking
if array.ndim != bpa.size: # bpa doesn't match array dimensionality
raise ValueError("Size of bpa must be equal to the array dimensionality.")
if (bpa.dtype != np.int # bpa must be all integers
or (bpa < 1).any() # all values in bpa must be >= 1
or (array.shape % bpa).any()): # % != 0 means not evenly divisible
raise ValueError("bpa ({0}) must consist of nonzero positive integers "
"that evenly divide the corresponding array axis "
"size".format(bpa))
# generate block edge indices
rgen = (np.r_[:array.shape[i]+1:array.shape[i]//blk_n]
for i, blk_n in enumerate(bpa))
# build slice sequences for each axis (unfortunately broadcasting
# can't be used to make the items easy to operate over
c = [[np.s_[i:j] for i, j in zip(r[:-1], r[1:])] for r in rgen]
# Now to get the blocks; this is slightly less efficient than it could be
# because numpy doesn't like jagged arrays and I didn't feel like writing
# a ufunc for it.
for idxs in np.ndindex(*bpa):
blockbounds = tuple(c[j][idxs[j]] for j in range(bpa.size))
yield array[blockbounds]
You question practically the same as this one. You can use the one-liner with np.ndindex() and reshape():
def cutter(a, r, c):
lenr = a.shape[0]/r
lenc = a.shape[1]/c
np.array([a[i*r:(i+1)*r,j*c:(j+1)*c] for (i,j) in np.ndindex(lenr,lenc)]).reshape(lenr,lenc,r,c)
To create the result you want:
a = np.arange(1,9).reshape(2,1)
#array([[1, 2, 3, 4],
# [5, 6, 7, 8]])
cutter( a, 1, 2 )
#array([[[[1, 2]],
# [[3, 4]]],
# [[[5, 6]],
# [[7, 8]]]])
Some minor enhancement to TheMeaningfulEngineer's answer that handles the case when the big 2d array cannot be perfectly sliced into equally sized subarrays
def blockfy(a, p, q):
'''
Divides array a into subarrays of size p-by-q
p: block row size
q: block column size
'''
m = a.shape[0] #image row size
n = a.shape[1] #image column size
# pad array with NaNs so it can be divided by p row-wise and by q column-wise
bpr = ((m-1)//p + 1) #blocks per row
bpc = ((n-1)//q + 1) #blocks per column
M = p * bpr
N = q * bpc
A = np.nan* np.ones([M,N])
A[:a.shape[0],:a.shape[1]] = a
block_list = []
previous_row = 0
for row_block in range(bpc):
previous_row = row_block * p
previous_column = 0
for column_block in range(bpr):
previous_column = column_block * q
block = A[previous_row:previous_row+p, previous_column:previous_column+q]
# remove nan columns and nan rows
nan_cols = np.all(np.isnan(block), axis=0)
block = block[:, ~nan_cols]
nan_rows = np.all(np.isnan(block), axis=1)
block = block[~nan_rows, :]
## append
if block.size:
block_list.append(block)
return block_list
Examples:
a = np.arange(25)
a = a.reshape((5,5))
out = blockfy(a, 2, 3)
a->
array([[ 0, 1, 2, 3, 4],
[ 5, 6, 7, 8, 9],
[10, 11, 12, 13, 14],
[15, 16, 17, 18, 19],
[20, 21, 22, 23, 24]])
out[0] ->
array([[0., 1., 2.],
[5., 6., 7.]])
out[1]->
array([[3., 4.],
[8., 9.]])
out[-1]->
array([[23., 24.]])
For now it just works when the big 2d array can be perfectly sliced into equally sized subarrays.
The code bellow slices
a ->array([[ 0, 1, 2, 3, 4, 5],
[ 6, 7, 8, 9, 10, 11],
[12, 13, 14, 15, 16, 17],
[18, 19, 20, 21, 22, 23]])
into this
block_array->
array([[[ 0, 1, 2],
[ 6, 7, 8]],
[[ 3, 4, 5],
[ 9, 10, 11]],
[[12, 13, 14],
[18, 19, 20]],
[[15, 16, 17],
[21, 22, 23]]])
p ang q determine the block size
Code
a = arange(24)
a = a.reshape((4,6))
m = a.shape[0] #image row size
n = a.shape[1] #image column size
p = 2 #block row size
q = 3 #block column size
block_array = []
previous_row = 0
for row_block in range(blocks_per_row):
previous_row = row_block * p
previous_column = 0
for column_block in range(blocks_per_column):
previous_column = column_block * q
block = a[previous_row:previous_row+p,previous_column:previous_column+q]
block_array.append(block)
block_array = array(block_array)
If you want a solution that also handles the cases when the matrix is
not equally divided, you can use this:
from operator import add
half_split = np.array_split(input, 2)
res = map(lambda x: np.array_split(x, 2, axis=1), half_split)
res = reduce(add, res)
Here is a solution based on unutbu's answer that handle case where matrix cannot be equally divided. In this case, it will resize the matrix before using some interpolation. You need OpenCV for this. Note that I had to swap ncols and nrows to make it works, didn't figured why.
import numpy as np
import cv2
import math
def blockshaped(arr, r_nbrs, c_nbrs, interp=cv2.INTER_LINEAR):
"""
arr a 2D array, typically an image
r_nbrs numbers of rows
r_cols numbers of cols
"""
arr_h, arr_w = arr.shape
size_w = int( math.floor(arr_w // c_nbrs) * c_nbrs )
size_h = int( math.floor(arr_h // r_nbrs) * r_nbrs )
if size_w != arr_w or size_h != arr_h:
arr = cv2.resize(arr, (size_w, size_h), interpolation=interp)
nrows = int(size_w // r_nbrs)
ncols = int(size_h // c_nbrs)
return (arr.reshape(r_nbrs, ncols, -1, nrows)
.swapaxes(1,2)
.reshape(-1, ncols, nrows))
a = np.random.randint(1, 9, size=(9,9))
out = [np.hsplit(x, 3) for x in np.vsplit(a,3)]
print(a)
print(out)
yields
[[7 6 2 4 4 2 5 2 3]
[2 3 7 6 8 8 2 6 2]
[4 1 3 1 3 8 1 3 7]
[6 1 1 5 7 2 1 5 8]
[8 8 7 6 6 1 8 8 4]
[6 1 8 2 1 4 5 1 8]
[7 3 4 2 5 6 1 2 7]
[4 6 7 5 8 2 8 2 8]
[6 6 5 5 6 1 2 6 4]]
[[array([[7, 6, 2],
[2, 3, 7],
[4, 1, 3]]), array([[4, 4, 2],
[6, 8, 8],
[1, 3, 8]]), array([[5, 2, 3],
[2, 6, 2],
[1, 3, 7]])], [array([[6, 1, 1],
[8, 8, 7],
[6, 1, 8]]), array([[5, 7, 2],
[6, 6, 1],
[2, 1, 4]]), array([[1, 5, 8],
[8, 8, 4],
[5, 1, 8]])], [array([[7, 3, 4],
[4, 6, 7],
[6, 6, 5]]), array([[2, 5, 6],
[5, 8, 2],
[5, 6, 1]]), array([[1, 2, 7],
[8, 2, 8],
[2, 6, 4]])]]
I publish my solution. Notice that this code doesn't' actually create copies of original array, so it works well with big data. Moreover, it doesn't crash if array cannot be divided evenly (but you can easly add condition for that by deleting ceil and checking if v_slices and h_slices are divided without rest).
import numpy as np
from math import ceil
a = np.arange(9).reshape(3, 3)
p, q = 2, 2
width, height = a.shape
v_slices = ceil(width / p)
h_slices = ceil(height / q)
for h in range(h_slices):
for v in range(v_slices):
block = a[h * p : h * p + p, v * q : v * q + q]
# do something with a block
This code changes (or, more precisely, gives you direct access to part of an array) this:
[[0 1 2]
[3 4 5]
[6 7 8]]
Into this:
[[0 1]
[3 4]]
[[2]
[5]]
[[6 7]]
[[8]]
If you need actual copies, Aenaon code is what you are looking for.
If you are sure that big array can be divided evenly, you can use numpy splitting tools.
to add to #Aenaon answer and his blockfy function, if you are working with COLOR IMAGES/ 3D ARRAY here is my pipeline to create crops of 224 x 224 for 3 channel input
def blockfy(a, p, q):
'''
Divides array a into subarrays of size p-by-q
p: block row size
q: block column size
'''
m = a.shape[0] #image row size
n = a.shape[1] #image column size
# pad array with NaNs so it can be divided by p row-wise and by q column-wise
bpr = ((m-1)//p + 1) #blocks per row
bpc = ((n-1)//q + 1) #blocks per column
M = p * bpr
N = q * bpc
A = np.nan* np.ones([M,N])
A[:a.shape[0],:a.shape[1]] = a
block_list = []
previous_row = 0
for row_block in range(bpc):
previous_row = row_block * p
previous_column = 0
for column_block in range(bpr):
previous_column = column_block * q
block = A[previous_row:previous_row+p, previous_column:previous_column+q]
# remove nan columns and nan rows
nan_cols = np.all(np.isnan(block), axis=0)
block = block[:, ~nan_cols]
nan_rows = np.all(np.isnan(block), axis=1)
block = block[~nan_rows, :]
## append
if block.size:
block_list.append(block)
return block_list
then extended above to
for file in os.listdir(path_to_crop): ### list files in your folder
img = io.imread(path_to_crop + file, as_gray=False) ### open image
r = blockfy(img[:,:,0],224,224) ### crop blocks of 224 x 224 for red channel
g = blockfy(img[:,:,1],224,224) ### crop blocks of 224 x 224 for green channel
b = blockfy(img[:,:,2],224,224) ### crop blocks of 224 x 224 for blue channel
for x in range(0,len(r)):
img = np.array((r[x],g[x],b[x])) ### combine each channel into one patch by patch
img = img.astype(np.uint8) ### cast back to proper integers
img_swap = img.swapaxes(0, 2) ### need to swap axes due to the way things were proceesed
img_swap_2 = img_swap.swapaxes(0, 1) ### do it again
Image.fromarray(img_swap_2).save(path_save_crop+str(x)+"bounding" + file,
format = 'jpeg',
subsampling=0,
quality=100) ### save patch with new name etc