Setup
Suppose I have
bins = np.array([0, 0, 1, 1, 2, 2, 2, 0, 1, 2])
vals = np.array([8, 7, 3, 4, 1, 2, 6, 5, 0, 9])
k = 3
I need the position of maximal values by unique bin in bins.
# Bin == 0
# ↓ ↓ ↓
# [0 0 1 1 2 2 2 0 1 2]
# [8 7 3 4 1 2 6 5 0 9]
# ↑ ↑ ↑
# ⇧
# [0 1 2 3 4 5 6 7 8 9]
# Maximum is 8 and happens at position 0
(vals * (bins == 0)).argmax()
0
# Bin == 1
# ↓ ↓ ↓
# [0 0 1 1 2 2 2 0 1 2]
# [8 7 3 4 1 2 6 5 0 9]
# ↑ ↑ ↑
# ⇧
# [0 1 2 3 4 5 6 7 8 9]
# Maximum is 4 and happens at position 3
(vals * (bins == 1)).argmax()
3
# Bin == 2
# ↓ ↓ ↓ ↓
# [0 0 1 1 2 2 2 0 1 2]
# [8 7 3 4 1 2 6 5 0 9]
# ↑ ↑ ↑ ↑
# ⇧
# [0 1 2 3 4 5 6 7 8 9]
# Maximum is 9 and happens at position 9
(vals * (bins == 2)).argmax()
9
Those functions are hacky and aren't even generalizable for negative values.
Question
How do I get all such values in the most efficient manner using Numpy?
What I've tried.
def binargmax(bins, vals, k):
out = -np.ones(k, np.int64)
trk = np.empty(k, vals.dtype)
trk.fill(np.nanmin(vals) - 1)
for i in range(len(bins)):
v = vals[i]
b = bins[i]
if v > trk[b]:
trk[b] = v
out[b] = i
return out
binargmax(bins, vals, k)
array([0, 3, 9])
LINK TO TESTING AND VALIDATION
The numpy_indexed library:
I know this isn't technically numpy, but the numpy_indexed library has a vectorized group_by function which is perfect for this, just wanted to share as an alternative I use frequently:
>>> import numpy_indexed as npi
>>> npi.group_by(bins).argmax(vals)
(array([0, 1, 2]), array([0, 3, 9], dtype=int64))
Using a simple pandas groupby and idxmax:
df = pd.DataFrame({'bins': bins, 'vals': vals})
df.groupby('bins').vals.idxmax()
Using a sparse.csr_matrix
This option is very fast on very large inputs.
sparse.csr_matrix(
(vals, bins, np.arange(vals.shape[0]+1)), (vals.shape[0], k)
).argmax(0)
# matrix([[0, 3, 9]])
Performance
Functions
def chris(bins, vals, k):
return npi.group_by(bins).argmax(vals)
def chris2(df):
return df.groupby('bins').vals.idxmax()
def chris3(bins, vals, k):
sparse.csr_matrix((vals, bins, np.arange(vals.shape[0] + 1)), (vals.shape[0], k)).argmax(0)
def divakar(bins, vals, k):
mx = vals.max()+1
sidx = bins.argsort()
sb = bins[sidx]
sm = np.r_[sb[:-1] != sb[1:],True]
argmax_out = np.argsort(bins*mx + vals)[sm]
max_out = vals[argmax_out]
return max_out, argmax_out
def divakar2(bins, vals, k):
last_idx = np.bincount(bins).cumsum()-1
scaled_vals = bins*(vals.max()+1) + vals
argmax_out = np.argsort(scaled_vals)[last_idx]
max_out = vals[argmax_out]
return max_out, argmax_out
def user545424(bins, vals, k):
return np.argmax(vals*(bins == np.arange(bins.max()+1)[:,np.newaxis]),axis=-1)
def user2699(bins, vals, k):
res = []
for v in np.unique(bins):
idx = (bins==v)
r = np.where(idx)[0][np.argmax(vals[idx])]
res.append(r)
return np.array(res)
def sacul(bins, vals, k):
return np.lexsort((vals, bins))[np.append(np.diff(np.sort(bins)), 1).astype(bool)]
#njit
def piRSquared(bins, vals, k):
out = -np.ones(k, np.int64)
trk = np.empty(k, vals.dtype)
trk.fill(np.nanmin(vals))
for i in range(len(bins)):
v = vals[i]
b = bins[i]
if v > trk[b]:
trk[b] = v
out[b] = i
return out
Setup
import numpy_indexed as npi
import numpy as np
import pandas as pd
from timeit import timeit
import matplotlib.pyplot as plt
from numba import njit
from scipy import sparse
res = pd.DataFrame(
index=['chris', 'chris2', 'chris3', 'divakar', 'divakar2', 'user545424', 'user2699', 'sacul', 'piRSquared'],
columns=[10, 50, 100, 500, 1000, 5000, 10000, 50000, 100000, 500000],
dtype=float
)
k = 5
for f in res.index:
for c in res.columns:
bins = np.random.randint(0, k, c)
k = 5
vals = np.random.rand(c)
df = pd.DataFrame({'bins': bins, 'vals': vals})
stmt = '{}(df)'.format(f) if f in {'chris2'} else '{}(bins, vals, k)'.format(f)
setp = 'from __main__ import bins, vals, k, df, {}'.format(f)
res.at[f, c] = timeit(stmt, setp, number=50)
ax = res.div(res.min()).T.plot(loglog=True)
ax.set_xlabel("N");
ax.set_ylabel("time (relative)");
plt.show()
Results
Results with a much larger k (This is where broadcasting gets hit hard):
res = pd.DataFrame(
index=['chris', 'chris2', 'chris3', 'divakar', 'divakar2', 'user545424', 'user2699', 'sacul', 'piRSquared'],
columns=[10, 50, 100, 500, 1000, 5000, 10000, 50000, 100000, 500000],
dtype=float
)
k = 500
for f in res.index:
for c in res.columns:
bins = np.random.randint(0, k, c)
vals = np.random.rand(c)
df = pd.DataFrame({'bins': bins, 'vals': vals})
stmt = '{}(df)'.format(f) if f in {'chris2'} else '{}(bins, vals, k)'.format(f)
setp = 'from __main__ import bins, vals, df, k, {}'.format(f)
res.at[f, c] = timeit(stmt, setp, number=50)
ax = res.div(res.min()).T.plot(loglog=True)
ax.set_xlabel("N");
ax.set_ylabel("time (relative)");
plt.show()
As is apparent from the graphs, broadcasting is a nifty trick when the number of groups is small, however the time complexity/memory of broadcasting increases too fast at higher k values to make it highly performant.
Here's one way by offsetting each group data so that we could use argsort on the entire data in one go -
def binargmax_scale_sort(bins, vals):
w = np.bincount(bins)
valid_mask = w!=0
last_idx = w[valid_mask].cumsum()-1
scaled_vals = bins*(vals.max()+1) + vals
#unique_bins = np.flatnonzero(valid_mask) # if needed
return len(bins) -1 -np.argsort(scaled_vals[::-1], kind='mergesort')[last_idx]
Okay, here's my linear-time entry, using only indexing and np.(max|min)inum.at. It assumes bins go up from 0 to max(bins).
def via_at(bins, vals):
max_vals = np.full(bins.max()+1, -np.inf)
np.maximum.at(max_vals, bins, vals)
expanded = max_vals[bins]
max_idx = np.full_like(max_vals, np.inf)
np.minimum.at(max_idx, bins, np.where(vals == expanded, np.arange(len(bins)), np.inf))
return max_vals, max_idx
How about this:
>>> import numpy as np
>>> bins = np.array([0, 0, 1, 1, 2, 2, 2, 0, 1, 2])
>>> vals = np.array([8, 7, 3, 4, 1, 2, 6, 5, 0, 9])
>>> k = 3
>>> np.argmax(vals*(bins == np.arange(k)[:,np.newaxis]),axis=-1)
array([0, 3, 9])
If you're going for readability, this might not be the best solution, but I think it works
def binargsort(bins,vals):
s = np.lexsort((vals,bins))
s2 = np.sort(bins)
msk = np.roll(s2,-1) != s2
# or use this for msk, but not noticeably better for performance:
# msk = np.append(np.diff(np.sort(bins)),1).astype(bool)
return s[msk]
array([0, 3, 9])
Explanation:
lexsort sorts the indices of vals according to the sorted order of bins, then by the order of vals:
>>> np.lexsort((vals,bins))
array([7, 1, 0, 8, 2, 3, 4, 5, 6, 9])
So then you can mask that by where sorted bins differ from one index to the next:
>>> np.sort(bins)
array([0, 0, 0, 1, 1, 1, 2, 2, 2, 2])
# Find where sorted bins end, use that as your mask on the `lexsort`
>>> np.append(np.diff(np.sort(bins)),1)
array([0, 0, 1, 0, 0, 1, 0, 0, 0, 1])
>>> np.lexsort((vals,bins))[np.append(np.diff(np.sort(bins)),1).astype(bool)]
array([0, 3, 9])
This is a fun little problem to solve. My approach is to to get an index into vals based on the values in bins. Using where to get the points where the index is True in combination with argmax on those points in vals gives the resulting value.
def binargmaxA(bins, vals):
res = []
for v in unique(bins):
idx = (bins==v)
r = where(idx)[0][argmax(vals[idx])]
res.append(r)
return array(res)
It's possible to remove the call to unique by using range(k) to get possible bin values. This speeds things up, but still leaves it with poor performance as the size of k increases.
def binargmaxA2(bins, vals, k):
res = []
for v in range(k):
idx = (bins==v)
r = where(idx)[0][argmax(vals[idx])]
res.append(r)
return array(res)
Last try, comparing each value slows things down substantially. This version computes the sorted array of values, rather than making a comparison for each unique value. Well, it actually computes the sorted indices and only gets the sorted values when needed, as that avoids one time loading vals into memory. Performance still scales with the number of bins, but much slower than before.
def binargmaxB(bins, vals):
idx = argsort(bins) # Find sorted indices
split = r_[0, where(diff(bins[idx]))[0]+1, len(bins)] # Compute where values start in sorted array
newmax = [argmax(vals[idx[i1:i2]]) for i1, i2 in zip(split, split[1:])] # Find max for each value in sorted array
return idx[newmax +split[:-1]] # Convert to indices in unsorted array
Benchmarks
Here's some benchmarks with the other answers.
3000 elements
With a somewhat larger dataset (bins = randint(0, 30, 3000); vals = randn(3000); k=30;)
171us binargmax_scale_sort2 by Divakar
209us this answer, version B
281us binargmax_scale_sort by Divakar
329us broadcast version by user545424
399us this answer, version A
416us answer by sacul, using lexsort
899us reference code by piRsquared
30000 elements
And an even larger dataset (bins = randint(0, 30, 30000); vals = randn(30000); k=30). Surprisingly this doesn't change the relative performance between solutions.
1.27ms this answer, version B
2.01ms binargmax_scale_sort2 by Divakar
2.38ms broadcast version by user545424
2.68ms this answer, version A
5.71ms answer by sacul, using lexsort
9.12ms reference code by piRSquared
Edit I didn't change k with the increasing number of possible bin values, now that I've fixed that the benchmarks are more even.
1000 bin values
Increasing the number unique bin values may also have an impact on performance. The solutions by Divakar and sacul are mostly unaffected, while the others have quite a substantial impact.
bins = randint(0, 1000, 30000); vals = randn(30000); k = 1000
1.99ms binargmax_scale_sort2 by Divakar
3.48ms this answer, version B
6.15ms answer by sacul, using lexsort
10.6ms reference code by piRsquared
27.2ms this answer, version A
129ms broadcast version by user545424
Edit Including benchmarks for the reference code in the question, it's surprisingly competitive especially with more bins.
I know you said to use Numpy, but if Pandas is acceptable:
import numpy as np; import pandas as pd;
(pd.DataFrame(
{'bins':np.array([0, 0, 1, 1, 2, 2, 2, 0, 1, 2]),
'values':np.array([8, 7, 3, 4, 1, 2, 6, 5, 0, 9])})
.groupby('bins')
.idxmax())
values
bins
0 0
1 3
2 9
Related
I would like to implement itertools.combinations for numpy. Based on this discussion, I have a function that works for 1D input:
def combs(a, r):
"""
Return successive r-length combinations of elements in the array a.
Should produce the same output as array(list(combinations(a, r))), but
faster.
"""
a = asarray(a)
dt = dtype([('', a.dtype)]*r)
b = fromiter(combinations(a, r), dt)
return b.view(a.dtype).reshape(-1, r)
and the output makes sense:
In [1]: list(combinations([1,2,3], 2))
Out[1]: [(1, 2), (1, 3), (2, 3)]
In [2]: array(list(combinations([1,2,3], 2)))
Out[2]:
array([[1, 2],
[1, 3],
[2, 3]])
In [3]: combs([1,2,3], 2)
Out[3]:
array([[1, 2],
[1, 3],
[2, 3]])
However, it would be best if I could expand it to N-D inputs, where additional dimensions simply allow you to speedily do multiple calls at once. So, conceptually, if combs([1, 2, 3], 2) produces [1, 2], [1, 3], [2, 3], and combs([4, 5, 6], 2) produces [4, 5], [4, 6], [5, 6], then combs((1,2,3) and (4,5,6), 2) should produce [1, 2], [1, 3], [2, 3] and [4, 5], [4, 6], [5, 6] where "and" just represents parallel rows or columns (whichever makes sense). (and likewise for additional dimensions)
I'm not sure:
How to make the dimensions work in a logical way that's consistent with the way other functions work (like how some numpy functions have an axis= parameter, and a default of axis 0. So probably axis 0 should be the one I am combining along, and all other axes just represent parallel calculations?)
How to get the above code to work with ND (right now I get ValueError: setting an array element with a sequence.)
Is there a better way to do dt = dtype([('', a.dtype)]*r)?
You can use itertools.combinations() to create the index array, and then use NumPy's fancy indexing:
import numpy as np
from itertools import combinations, chain
from scipy.special import comb
def comb_index(n, k):
count = comb(n, k, exact=True)
index = np.fromiter(chain.from_iterable(combinations(range(n), k)),
int, count=count*k)
return index.reshape(-1, k)
data = np.array([[1,2,3,4,5],[10,11,12,13,14]])
idx = comb_index(5, 3)
print(data[:, idx])
output:
[[[ 1 2 3]
[ 1 2 4]
[ 1 2 5]
[ 1 3 4]
[ 1 3 5]
[ 1 4 5]
[ 2 3 4]
[ 2 3 5]
[ 2 4 5]
[ 3 4 5]]
[[10 11 12]
[10 11 13]
[10 11 14]
[10 12 13]
[10 12 14]
[10 13 14]
[11 12 13]
[11 12 14]
[11 13 14]
[12 13 14]]]
Case k = 2: np.triu_indices
I've tested case k = 2 using lots of variations of abovementioned functions using perfplot. The winner is, no doubt, np.triu_indices and I see now that using np.dtype([('', np.intp)] * 2) data structure can be a huge boost even for exotic data types such as igraph.EdgeList.
from itertools import combinations, chain
from scipy.special import comb
import igraph as ig #graph library build on C
import networkx as nx #graph library, pure Python
def _combs(n):
return np.array(list(combinations(range(n),2)))
def _combs_fromiter(n): ##Jaime
indices = np.arange(n)
dt = np.dtype([('', np.intp)]*2)
indices = np.fromiter(combinations(indices, 2), dt)
indices = indices.view(np.intp).reshape(-1, 2)
return indices
def _combs_fromiterplus(n):
dt = np.dtype([('', np.intp)]*2)
indices = np.fromiter(combinations(range(n), 2), dt)
indices = indices.view(np.intp).reshape(-1, 2)
return indices
def _numpy(n): ##endolith
return np.transpose(np.triu_indices(n,1))
def _igraph(n):
return np.array(ig.Graph(n).complementer(False).get_edgelist())
def _igraph_fromiter(n):
dt = np.dtype([('', np.intp)]*2)
indices = np.fromiter(ig.Graph(n).complementer(False).get_edgelist(), dt)
indices = indices.view(np.intp).reshape(-1, 2)
return indices
def _nx(n):
G = nx.Graph()
G.add_nodes_from(range(n))
return np.array(list(nx.complement(G).edges))
def _nx_fromiter(n):
G = nx.Graph()
G.add_nodes_from(range(n))
dt = np.dtype([('', np.intp)]*2)
indices = np.fromiter(nx.complement(G).edges, dt)
indices = indices.view(np.intp).reshape(-1, 2)
return indices
def _comb_index(n): ##HYRY
count = comb(n, 2, exact=True)
index = np.fromiter(chain.from_iterable(combinations(range(n), 2)),
int, count=count*2)
return index.reshape(-1, 2)
fig = plt.figure(figsize=(15, 10))
plt.grid(True, which="both")
out = perfplot.bench(
setup = lambda x: x,
kernels = [_numpy, _combs, _combs_fromiter, _combs_fromiterplus,
_comb_index, _igraph, _igraph_fromiter, _nx, _nx_fromiter],
n_range = [2 ** k for k in range(12)],
xlabel = 'combinations(n, 2)',
title = 'testing combinations',
show_progress = False,
equality_check = False)
out.show()
Wondering why np.triu_indices can't be extended to more dimensions?
Case 2 ≤ k ≤ 4: triu_indices(implemented here) = up to 2x speedup
np.triu_indices could actually be a winner for case k = 3 and even k = 4 if we implement a generalised method instead. A current version of this method is equivalent of:
def triu_indices(n, k):
x = np.less.outer(np.arange(n), np.arange(-k+1, n-k+1))
return np.nonzero(x)
It constructs matrix representation of a relation x < y for two sequences 0,1,...,n-1 and finds locations of cells where they are not zero. For 3D case we need to add extra dimension and intersect relations x < y and y < z. For next dimensions procedure is the same but this gets a huge memory overload since n^k binary cells are needed and only C(n, k) of them attains True values. Memory usage and performance grows by O(n!) so this algorithm outperformans itertools.combinations only for small values of k. This is best to use actually for case k=2 and k=3
def C(n, k): #huge memory overload...
if k==0:
return np.array([])
if k==1:
return np.arange(1,n+1)
elif k==2:
return np.less.outer(np.arange(n), np.arange(n))
else:
x = C(n, k-1)
X = np.repeat(x[None, :, :], len(x), axis=0)
Y = np.repeat(x[:, :, None], len(x), axis=2)
return X&Y
def C_indices(n, k):
return np.transpose(np.nonzero(C(n,k)))
Let's checkout with perfplot:
import matplotlib.pyplot as plt
import numpy as np
import perfplot
from itertools import chain, combinations
from scipy.special import comb
def C(n, k): # huge memory overload...
if k == 0:
return np.array([])
if k == 1:
return np.arange(1, n + 1)
elif k == 2:
return np.less.outer(np.arange(n), np.arange(n))
else:
x = C(n, k - 1)
X = np.repeat(x[None, :, :], len(x), axis=0)
Y = np.repeat(x[:, :, None], len(x), axis=2)
return X & Y
def C_indices(data):
n, k = data
return np.transpose(np.nonzero(C(n, k)))
def comb_index(data):
n, k = data
count = comb(n, k, exact=True)
index = np.fromiter(chain.from_iterable(combinations(range(n), k)),
int, count=count * k)
return index.reshape(-1, k)
def build_args(k):
return {'setup': lambda x: (x, k),
'kernels': [comb_index, C_indices],
'n_range': [2 ** x for x in range(2, {2: 10, 3:10, 4:7, 5:6}[k])],
'xlabel': f'N',
'title': f'test of case C(N,{k})',
'show_progress': True,
'equality_check': lambda x, y: np.array_equal(x, y)}
outs = [perfplot.bench(**build_args(n)) for n in (2, 3, 4, 5)]
fig = plt.figure(figsize=(20, 20))
for i in range(len(outs)):
ax = fig.add_subplot(2, 2, i + 1)
ax.grid(True, which="both")
outs[i].plot()
plt.show()
So the best performance boost is achieved for k=2 (equivalent to np.triu_indices) and for k=3` it's faster almost twice.
Case k > 3: numpy_combinations(implemented here) = up to 2.5x speedup
Following this question (thanks #Divakar) I managed to find a way to calculate values of specific column based on previous column and Pascal's triangle. It's not optimized yet as much as it could but results are really promising. Here we go:
from scipy.linalg import pascal
def stretch(a, k):
l = a.sum()+len(a)*(-k)
out = np.full(l, -1, dtype=int)
out[0] = a[0]-1
idx = (a-k).cumsum()[:-1]
out[idx] = a[1:]-1-k
return out.cumsum()
def numpy_combinations(n, k):
#n, k = data #benchmark version
n, k = data
x = np.array([n])
P = pascal(n).astype(int)
C = []
for b in range(k-1,-1,-1):
x = stretch(x, b)
r = P[b][x - b]
C.append(np.repeat(x, r))
return n - 1 - np.array(C).T
And the benchmark results are:
# script is the same as in previous example except this part
def build_args(k):
return {'setup': lambda x: (k, x),
'kernels': [comb_index, numpy_combinations],
'n_range': [x for x in range(1, k)],
'xlabel': f'N',
'title': f'test of case C({k}, k)',
'show_progress': True,
'equality_check': False}
outs = [perfplot.bench(**build_args(n)) for n in (12, 15, 17, 23, 25, 28)]
fig = plt.figure(figsize=(20, 20))
for i in range(len(outs)):
ax = fig.add_subplot(2, 3, i + 1)
ax.grid(True, which="both")
outs[i].plot()
plt.show()
Despite it still can't fight with itertools.combinations for n < 15 but it is a new winner in other cases. Last but not least, numpy demonstrates its power when amount of combinations gets reaaallly big. It was able to survive while processing C(28, 14) combinations which is around 40'000'000 items of size 14
When r = k = 2, you can also use numpy.triu_indices(n, 1) which indexes upper triangle of a matrix.
idx = comb_index(5, 2)
from HYRY's answer is equivalent to
idx = np.transpose(np.triu_indices(5, 1))
but built-in, and a few times faster for N above ~20:
timeit comb_index(1000, 2)
32.3 ms ± 443 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
timeit np.transpose(np.triu_indices(1000, 1))
10.2 ms ± 25.2 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
Not sure how it will work out performance-wise, but you can do the combinations on an index array, then extract the actual array slices with np.take:
def combs_nd(a, r, axis=0):
a = np.asarray(a)
if axis < 0:
axis += a.ndim
indices = np.arange(a.shape[axis])
dt = np.dtype([('', np.intp)]*r)
indices = np.fromiter(combinations(indices, r), dt)
indices = indices.view(np.intp).reshape(-1, r)
return np.take(a, indices, axis=axis)
>>> combs_nd([1,2,3], 2)
array([[1, 2],
[1, 3],
[2, 3]])
>>> combs_nd([[1,2,3],[4,5,6]], 2, axis=1)
array([[[1, 2],
[1, 3],
[2, 3]],
[[4, 5],
[4, 6],
[5, 6]]])
I am tracking multiple discrete time-series at multiple temporal resolutions, resulting in an SxRxB matrix where S is the number of time-series, R is the number of different resolutions and B is the buffer, i.e. how many values each series remembers. Each series is discrete and uses a limited range of natural numbers to represent its values. I will call these "symbols" here.
For each series I want to calculate how often any of the previous measurement's symbols directly precedes any of the current measurement's symbols, over all measurements. I have solved this with a for-loop as seen below, but would like to vectorize it for obvious reasons.
I'm not sure if my way of structuring data is efficient, so I'm open for suggestions there. Especially the ratios matrix could be done differently I think.
Thanks in advance!
def supports_loop(data, num_series, resolutions, buffer_size, vocab_size):
# For small test matrices we can calculate the complete matrix without problems
indices = []
indices.append(xrange(num_series))
indices.append(xrange(vocab_size))
indices.append(xrange(num_series))
indices.append(xrange(vocab_size))
indices.append(xrange(resolutions))
# This is huge! :/
# dimensions:
# series and value for which we calculate,
# series and value which precedes that measurement,
# resolution
ratios = np.full((num_series, vocab_size, num_series, vocab_size, resolutions), 0.0)
for idx in itertools.product(*indices):
s0, v0 = idx[0],idx[1] # the series and symbol for which we calculate
s1, v1 = idx[2],idx[3] # the series and symbol which should precede the we're calculating for
res = idx[4]
# Find the positions where s0==v0
found0 = np.where(data[s0, res, :] == v0)[0]
if found0.size == 0:
continue
#print('found {}={} at {}'.format(s0, v0, found0))
# Check how often s1==v1 right before s0==v0
candidates = (s1, res, (found0 - 1 + buffer_size) % buffer_size)
found01 = np.count_nonzero(data[candidates] == v1)
if found01 == 0:
continue
print('found {}={} following {}={} at {}'.format(s0, v0, s1, v1, found01))
# total01 = number of positions where either s0 or s1 is defined (i.e. >=0)
total01 = len(np.argwhere((data[s0, res, :] >= 0) & (data[s1, res, :] >= 0)))
ratio = (float(found01) / total01) if total01 > 0 else 0.0
ratios[idx] = ratio
return ratios
def stackoverflow_example(fnc):
data = np.array([
[[0, 0, 1], # series 0, resolution 0
[1, 3, 2]], # series 0, resolution 1
[[2, 1, 2], # series 1, resolution 0
[3, 3, 3]], # series 1, resoltuion 1
])
num_series = data.shape[0]
resolutions = data.shape[1]
buffer_size = data.shape[2]
vocab_size = np.max(data)+1
ratios = fnc(data, num_series, resolutions, buffer_size, vocab_size)
coordinates = np.argwhere(ratios > 0.0)
nz_values = ratios[ratios > 0.0]
print(np.hstack((coordinates, nz_values[:,None])))
print('0/0 precedes 0/0 in 1 out of 3 cases: {}'.format(np.isclose(ratios[0,0,0,0,0], 1.0/3.0)))
print('1/2 precedes 0/0 in 2 out of 3 cases: {}'.format(np.isclose(ratios[0,0,1,2,0], 2.0/3.0)))
Expected output (21 pairs, 5 columns for coordinates, followed by found count):
[[0 0 0 0 0 1]
[0 0 0 1 0 1]
[0 0 1 2 0 2]
[0 1 0 0 0 1]
[0 1 0 2 1 1]
[0 1 1 1 0 1]
[0 1 1 3 1 1]
[0 2 0 3 1 1]
[0 2 1 3 1 1]
[0 3 0 1 1 1]
[0 3 1 3 1 1]
[1 1 0 0 0 1]
[1 1 1 2 0 1]
[1 2 0 0 0 1]
[1 2 0 1 0 1]
[1 2 1 1 0 1]
[1 2 1 2 0 1]
[1 3 0 1 1 1]
[1 3 0 2 1 1]
[1 3 0 3 1 1]
[1 3 1 3 1 3]]
In the example above the 0 in series 0 follows a 2 in series 1 in two out of three cases (since the buffers are circular), so the ratio at [0, 0, 1, 2, 0] will be ~0.6666. Also series 0, value 0 follows itself in one out of three cases, so the ratio at [0, 0, 0, 0, 0] will be ~0.3333. There are some others which are >0.0 as well.
I am testing each answer on two datasets: a tiny one (as shown above) and a more realistic one (100 series, 5 resolutions, 10 values per series, 50 symbols).
Results
Answer Time (tiny) Time (huge) All pairs found (tiny=21)
-----------------------------------------------------------------------
Baseline ~1ms ~675s (!) Yes
Saedeas ~0.13ms ~1.4ms No (!)
Saedeas2 ~0.20ms ~4.0ms Yes, +cross resolutions
Elliot_1 ~0.70ms ~100s (!) Yes
Elliot_2 ~1ms ~21s (!) Yes
Kuppern_1 ~0.39ms ~2.4s (!) Yes
Kuppern_2 ~0.18ms ~28ms Yes
Kuppern_3 ~0.19ms ~24ms Yes
David ~0.21ms ~27ms Yes
Saedeas 2nd approach is the clear winner! Thank you so much, all of you :)
To start, you're doing yourself a bit of a disservice by not explicitly nesting the for loops. You wind up repeating a lot of effort and not saving anything in terms of memory. When the loop is nested, you can move some of the computations from one level to another and figure out which inner loops can be vectorized over.
def supports_5_loop(data, num_series, resolutions, buffer_size, vocab_size):
ratios = np.full((num_series, vocab_size, num_series, vocab_size, resolutions), 0.0)
for res in xrange(resolutions):
for s0 in xrange(num_series):
# Find the positions where s0==v0
for v0 in np.unique(data[s0, res]):
# only need to find indices once for each series and value
found0 = np.where(data[s0, res, :] == v0)[0]
for s1 in xrange(num_series):
# Check how often s1==v1 right before s0==v0
candidates = (s1, res, (found0 - 1 + buffer_size) % buffer_size)
total01 = np.logical_or(data[s0, res, :] >= 0, data[s1, res, :] >= 0).sum()
# can skip inner loops if there are no candidates
if total01 == 0:
continue
for v1 in xrange(vocab_size):
found01 = np.count_nonzero(data[candidates] == v1)
if found01 == 0:
continue
ratio = (float(found01) / total01)
ratios[(s0, v0, s1, v1, res)] = ratio
return ratios
You'll see in the timings that the majority of the speed pickup comes from not duplicating effort.
Once you've made the nested structure, you can start looking at vectorizations and other optimizations.
def supports_4_loop(data, num_series, resolutions, buffer_size, vocab_size):
# For small test matrices we can calculate the complete matrix without problems
# This is huge! :/
# dimensions:
# series and value for which we calculate,
# series and value which precedes that measurement,
# resolution
ratios = np.full((num_series, vocab_size, num_series, vocab_size, resolutions), 0.0)
for res in xrange(resolutions):
for s0 in xrange(num_series):
# find the counts where either s0 or s1 are present
total01 = np.logical_or(data[s0, res] >= 0,
data[:, res] >= 0).sum(axis=1)
s1s = np.where(total01)[0]
# Find the positions where s0==v0
v0s, counts = np.unique(data[s0, res], return_counts=True)
# sorting before searching will show gains as the datasets
# get larger
indarr = np.argsort(data[s0, res])
i0 = 0
for v0, count in itertools.izip(v0s, counts):
found0 = indarr[i0:i0+count]
i0 += count
for s1 in s1s:
candidates = data[(s1, res, (found0 - 1) % buffer_size)]
# can replace the innermost loop with numpy functions
v1s, counts = np.unique(candidates, return_counts=True)
ratios[s0, v0, s1, v1s, res] = counts / total01[s1]
return ratios
Unfortunately I could only really vectorize over the innermost loop, and that only bought an additional 10% speedup. Outside of the innermost loop you can't guarantee that all the vectors are the same size, so you can't build an array.
In [121]: (np.all(supports_loop(data, num_series, resolutions, buffer_size, vocab_size) == supports_5_loop(data, num_series, resolutions, buffer_size, vocab_size)))
Out[121]: True
In [122]: (np.all(supports_loop(data, num_series, resolutions, buffer_size, vocab_size) == supports_4_loop(data, num_series, resolutions, buffer_size, vocab_size)))
Out[122]: True
In [123]: %timeit(supports_loop(data, num_series, resolutions, buffer_size, vocab_size))
2.29 ms ± 73.9 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
In [124]: %timeit(supports_5_loop(data, num_series, resolutions, buffer_size, vocab_size))
949 µs ± 5.37 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
In [125]: %timeit(supports_4_loop(data, num_series, resolutions, buffer_size, vocab_size))
843 µs ± 3.21 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
If I'm understanding your problem correctly, I think this bit of code will get you the symbol pairs you're looking for in a relatively quick, vectorized fashion.
import numpy as np
import time
from collections import Counter
series = 2
resolutions = 2
buffer_len = 3
symbols = range(3)
#mat = np.random.choice(symbols, size=(series, resolutions, buffer_len)).astype('uint8')
mat = np.array([
[[0, 0, 1], # series 0, resolution 0
[1, 3, 2]], # series 0, resolution 1
[[2, 1, 2], # series 1, resolution 0
[3, 3, 3]], # series 1, resoltuion 1
])
start = time.time()
index_mat = np.indices(mat.shape)
right_shift_indices = np.roll(index_mat, -1, axis=3)
mat_shifted = mat[right_shift_indices[0], right_shift_indices[1], right_shift_indices[2]]
# These construct all the pairs directly
first_series = np.repeat(range(series), series*resolutions*buffer_len)
second_series = np.tile(np.repeat(range(series), resolutions*buffer_len), series)
res_loop = np.tile(np.repeat(range(resolutions), buffer_len), series*series)
mat_unroll = np.repeat(mat, series, axis=0)
shift_unroll = np.tile(mat_shifted, series)
# Constructs the pairs
pairs = zip(np.ravel(first_series),
np.ravel(second_series),
np.ravel(res_loop),
np.ravel(mat_unroll),
np.ravel(shift_unroll))
pair_time = time.time() - start
results = Counter(pairs)
end = time.time() - start
print("Mat: {}").format(mat)
print("Pairs: {}").format(results)
print("Number of Pairs: {}".format(len(pairs)))
print("Pair time is: {}".format(pair_time))
print("Count time is: {}".format(end-pair_time))
print("Total time is: {}".format(end))
The basic idea was to circularly shift each buffer by the appropriate amount depending on which time series it was (I think this is what your current code was doing). I can then generate all the symbol pairs by simply zipping lists offset by 1 together along the series axis.
Example output:
Mat: [[[0 0 1]
[1 3 2]]
[[2 1 2]
[3 3 3]]]
Pairs: Counter({(1, 1, 1, 3, 3): 3, (1, 0, 0, 2, 0): 2, (0, 0, 0, 0, 0): 1, (1, 1, 0, 2, 2): 1, (1, 1, 0, 2, 1): 1, (0, 1, 0, 0, 2): 1, (1, 0, 1, 3, 3): 1, (0, 0, 1, 1, 3): 1, (0, 0, 1, 3, 2): 1, (1, 0, 0, 1, 1): 1, (0, 1, 0, 0, 1): 1, (0, 1, 1, 2, 3): 1, (0, 1, 0, 1, 2): 1, (1, 1, 0, 1, 2): 1, (0, 1, 1, 3, 3): 1, (1, 0, 1, 3, 2): 1, (0, 0, 0, 0, 1): 1, (0, 1, 1, 1, 3): 1, (0, 0, 1, 2, 1): 1, (0, 0, 0, 1, 0): 1, (1, 0, 1, 3, 1): 1})
Number of Pairs: 24
Pair time is: 0.000135183334351
Count time is: 5.10215759277e-05
Total time is: 0.000186204910278
Edit: True final attempt. Fully vectorized.
A trick that makes this vectorizable is to make an array of comb[i] = buffer1[i]+buffer2[i-1]*voc_size for each pair of series. Each combination then gets a unique value in the array. And one can find the combination by doing v1[i] = comb[i] % voc_size, v2[i] = comb[i]//voc_size. As long as the number of series is not very high (<10000 i think) there is no point in doing any further vectorisations.
def support_vectorized(data, num_series, resolutions, buffer_size, vocab_size):
ratios = np.zeros((num_series, vocab_size, num_series, vocab_size, resolutions))
prev = np.roll(data, 1, axis=2) # Get previous values
prev *= vocab_size # To separate prev from data
for i, series in enumerate(data):
for j, prev_series in enumerate(prev):
comb = series + prev_series
for k, buffer in enumerate(comb):
idx, counts = np.unique(buffer, return_counts=True)
v = idx % vocab_size
v2 = idx // vocab_size
ratios[i, v, j, v2, k] = counts/buffer_size
return ratios
If however S or R is large, a full vectorization is possible but this uses a lot of memory:
def row_unique(comb):
comb.sort(axis=-1)
changes = np.concatenate((
np.ones((comb.shape[0], comb.shape[1], comb.shape[2], 1), dtype="bool"),
comb[:, :,:, 1:] != comb[:, :, :, :-1]), axis=-1)
vals = comb[changes]
idxs = np.nonzero(changes)
tmp = np.hstack((idxs[-1], 0))
counts = np.where(tmp[1:], np.diff(tmp), comb.shape[-1]-tmp[:-1])
return idxs, vals, counts
def supports_full_vectorized(data, num_series, resolutions, buffer_size, vocab_size):
ratios = np.zeros((num_series, vocab_size, num_series, vocab_size, resolutions))
prev = np.roll(data, 1, axis=2)*vocab_size
comb = data + prev[:, None] # Create every combination
idxs, vals, counts = row_unique(comb) # Get unique values and counts for each row
ratios[idxs[1], vals % vocab_size, idxs[0], vals // vocab_size, idxs[2]] = counts/buffer_size
return ratios
However, for S=100 this is slower than the previos solution. A middle ground is to keep a for loop over the series too reduce the memory usage:
def row_unique2(comb):
comb.sort(axis=-1)
changes = np.concatenate((
np.ones((comb.shape[0], comb.shape[1], 1), dtype="bool"),
comb[:, :, 1:] != comb[:, :, :-1]), axis=-1)
vals = comb[changes]
idxs = np.nonzero(changes)
tmp = np.hstack((idxs[-1], 0))
counts = np.where(tmp[1:], np.diff(tmp), comb.shape[-1]-tmp[:-1])
return idxs, vals, counts
def supports_half_vectorized(data, num_series, resolutions, buffer_size, vocab_size):
prev = np.roll(data, 1, axis=2)*vocab_size
ratios = np.zeros((num_series, vocab_size, num_series, vocab_size, resolutions))
for i, series in enumerate(data):
comb = series + prev
idxs, vals, counts = row_unique2(comb)
ratios[i, vals % vocab_size, idxs[0], vals // vocab_size, idxs[1]] = counts/buffer_size
return ratios
The running times for the different solutions show that support_half_vectorized is the fastest
In [41]: S, R, B, voc_size = (100, 5, 1000, 29)
In [42]: data = np.random.randint(voc_size, size=S*R*B).reshape((S, R, B))
In [43]: %timeit support_vectorized(data, S, R, B, voc_size)
1 loop, best of 3: 4.84 s per loop
In [44]: %timeit supports_full_vectorized(data, S, R, B, voc_size)
1 loop, best of 3: 5.3 s per loop
In [45]: %timeit supports_half_vectorized(data, S, R, B, voc_size)
1 loop, best of 3: 4.36 s per loop
In [46]: %timeit supports_4_loop(data, S, R, B, voc_size)
1 loop, best of 3: 36.7 s per loop
So this is kind of a cop out answer, but I've been working with #Saedeas's answer and based on timings on my machine have been able to optimize it slightly. I do believe that there is a way to do this without the loop, but the size of the intermediate array may be prohibitive.
The changes I have made have been to remove the concatenation that happens at the end of the run() function. This was creating a new array and is unnecessary. Instead we create the full size array at the beginning and just dont use the last row until the end.
Another change I have made is that the tiling of single was slightly inefficient. I have replaced this with very slightly faster code.
I do believe that this can be made faster, but would take some work. I was testing with larger sizes so please let me know what timings you get on your machine.
Code is below;
import numpy as np
import logging
import sys
import time
import itertools
import timeit
logging.basicConfig(stream=sys.stdout,
level=logging.DEBUG,
format='%(message)s')
def run():
series = 2
resolutions = 2
buffer_len = 3
symbols = range(50)
#mat = np.random.choice(symbols, size=(series, resolutions, buffer_len))
mat = np.array([
[[0, 0, 1], # series 0, resolution 0
[1, 3, 2]], # series 0, resolution 1
[[2, 1, 2], # series 1, resolution 0
[3, 3, 3]], # series 1, resoltuion 1
# [[4, 5, 6, 10],
# [7, 8, 9, 11]],
])
# logging.debug("Original:")
# logging.debug(mat)
start = time.time()
index_mat = np.indices((series, resolutions, buffer_len))
# This loop shifts all series but the one being looked at, and zips the
# element being looked at with every other member of that row
cross_pairs = np.empty((series, resolutions, buffer_len, series, 2), int)
#cross_pairs = []
right_shift_indices = [index_mat[0], index_mat[1], (index_mat[2] - 1) % buffer_len]
for i in range(series):
right_shift_indices[2][i] = (right_shift_indices[2][i] + 1) % buffer_len
# create a new matrix from the modified indices
mat_shifted = mat[right_shift_indices]
mat_shifted_t = mat_shifted.T.reshape(-1, series)
single = mat_shifted_t[:, i]
#print np.tile(single,(series-1,1)).T
#print single.reshape(-1,1).repeat(series-1,1)
#print single.repeat(series-1).reshape(-1,series-1)
mat_shifted_t = np.delete(mat_shifted_t, i, axis=1)
#cross_pairs[i,:,:,:-1] = (np.dstack((np.tile(single, (mat_shifted_t.shape[1], 1)).T, mat_shifted_t))).reshape(resolutions, buffer_len, (series-1), 2, order='F')
#cross_pairs[i,:,:,:-1] = (np.dstack((single.reshape(-1,1).repeat(series-1,1), mat_shifted_t))).reshape(resolutions, buffer_len, (series-1), 2, order='F')
cross_pairs[i,:,:,:-1] = np.dstack((single.repeat(series-1).reshape(-1,series-1), mat_shifted_t)).reshape(resolutions, buffer_len, (series-1), 2, order='F')
right_shift_indices[2][i] = (right_shift_indices[2][i] - 1) % buffer_len
#cross_pairs.extend([zip(itertools.repeat(x[i]), np.append(x[:i], x[i+1:])) for x in mat_shifted_t])
#consecutive_pairs = np.empty((series, resolutions, buffer_len, 2, 2), int)
#print "1", consecutive_pairs.shape
# tedious code to put this stuff in the right shape
in_series_zips = np.stack([mat[:, :, :-1], mat[:, :, 1:]], axis=3)
circular_in_series_zips = np.stack([mat[:, :, -1], mat[:, :, 0]], axis=2)
# This creates the final array.
# Index 0 is the preceding series
# Index 1 is the resolution
# Index 2 is the location in the buffer
# Index 3 is for the first n-1 elements, the following series, and for the last element
# it's the next element of the Index 0 series
# Index 4 is the index into the two element pair
cross_pairs[:,:,:-1,-1] = in_series_zips
cross_pairs[:,:,-1,-1] = circular_in_series_zips
end = time.time()
#logging.debug("Pairs encountered:")
#logging.debug(pairs)
logging.info("Elapsed: {}".format(end - start))
if __name__ == '__main__':
run()
I am not sure what the title of this question should be. But lets say we have 2 arrays, values and distances.
values = np.array([[-1,-1,-1],
[1, 2, 0],
[-1,-1,-1]])
distances = np.array([[1,2,3],
[6,5,4],
[7,8,9]])
I would like to get the values that are non negative, and have them in order by its corresponding distance, based on the distances array.
So with the example above, the positive values are [1,2,0] and its distances will be [6,5,4]. Thus, if sorting by its corresponding distance, I would like to have [0,2,1] as the answer.
My code is below. It works, but would like to have the solution of just using numpy. Im sure that would be more efficient than this:
import numpy as np
import heapq
def get_sorted_values(seek_val, values, distances):
r, c = np.where(values >= seek_val)
di = distances[r, c]
vals = values[r, c]
print("di", di)
print("vals", vals)
if len(di) >= 1:
heap = []
for d, v in zip(di,vals):
heapq.heappush(heap, (d,v))
lists = []
while heap:
d, v = heapq.heappop(heap)
lists.append(v)
return lists
else:
## NOTHING FOUND
return None
Input:
seek_val = 0
values = np.array([[-1,-1,-1],
[1,2,0],
[-1,-1,-1]])
distances = np.array([[1,2,3],
[6,5,4],
[7,8,9]])
print("Ans:",get_sorted_values(seek_val, values, distances))
Output:
di [6 5 4]
vals [1 2 0]
Ans: [0, 2, 1]
"one liner":
values[np.where(values >= 0)][np.argsort(distances[np.where(values >= 0)])]
Out[981]: array([0, 2, 1])
repeating np.where(values >= 0) is inefficient, could make a variable if values is big
v_indx = np.where(values >= 0)
values[v_indx][np.argsort(distances[v_indx])]
Try np.argsort
import numpy as np
values = np.array([[-1,-1,-1],
[ 1, 2, 0],
[-1,-1,-1]])
distances = np.array([[1, 2, 3],
[6, 5, 4],
[7, 8, 9]])
print(values[values >= 0])
# [1 2 0]
print(distances[values >= 0])
# [6 5 4]
print('Ans:', values[values >= 0][np.argsort(distances[values >= 0])])
# Ans: [0 2 1]
Consider the following lists short_list and long_list
short_list = list('aaabaaacaaadaaac')
np.random.seed([3,1415])
long_list = pd.DataFrame(
np.random.choice(list(ascii_letters),
(10000, 2))
).sum(1).tolist()
How do I calculate the cumulative count by unique value?
I want to use numpy and do it in linear time. I want this to compare timings with my other methods. It may be easiest to illustrate with my first proposed solution
def pir1(l):
s = pd.Series(l)
return s.groupby(s).cumcount().tolist()
print(np.array(short_list))
print(pir1(short_list))
['a' 'a' 'a' 'b' 'a' 'a' 'a' 'c' 'a' 'a' 'a' 'd' 'a' 'a' 'a' 'c']
[0, 1, 2, 0, 3, 4, 5, 0, 6, 7, 8, 0, 9, 10, 11, 1]
I've tortured myself trying to use np.unique because it returns a counts array, an inverse array, and an index array. I was sure I could these to get at a solution. The best I got is in pir4 below which scales in quadratic time. Also note that I don't care if counts start at 1 or zero as we can simply add or subtract 1.
Below are some of my attempts (none of which answer my question)
%%cython
from collections import defaultdict
def get_generator(l):
counter = defaultdict(lambda: -1)
for i in l:
counter[i] += 1
yield counter[i]
def pir2(l):
return [i for i in get_generator(l)]
def pir3(l):
return [i for i in get_generator(l)]
def pir4(l):
unq, inv = np.unique(l, 0, 1, 0)
a = np.arange(len(unq))
matches = a[:, None] == inv
return (matches * matches.cumsum(1)).sum(0).tolist()
setup
short_list = np.array(list('aaabaaacaaadaaac'))
functions
dfill takes an array and returns the positions where the array changes and repeats that index position until the next change.
# dfill
#
# Example with short_list
#
# 0 0 0 3 4 4 4 7 8 8 8 11 12 12 12 15
# [ a a a b a a a c a a a d a a a c]
#
# Example with short_list after sorting
#
# 0 0 0 0 0 0 0 0 0 0 0 0 12 13 13 15
# [ a a a a a a a a a a a a b c c d]
argunsort returns the permutation necessary to undo a sort given the argsort array. The existence of this method became know to me via this post.. With this, I can get the argsort array and sort my array with it. Then I can undo the sort without the overhead of sorting again.
cumcount will take an array sort it, find the dfill array. An np.arange less dfill will give me cumulative count. Then I un-sort
# cumcount
#
# Example with short_list
#
# short_list:
# [ a a a b a a a c a a a d a a a c]
#
# short_list.argsort():
# [ 0 1 2 4 5 6 8 9 10 12 13 14 3 7 15 11]
#
# Example with short_list after sorting
#
# short_list[short_list.argsort()]:
# [ a a a a a a a a a a a a b c c d]
#
# dfill(short_list[short_list.argsort()]):
# [ 0 0 0 0 0 0 0 0 0 0 0 0 12 13 13 15]
#
# np.range(short_list.size):
# [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15]
#
# np.range(short_list.size) -
# dfill(short_list[short_list.argsort()]):
# [ 0 1 2 3 4 5 6 7 8 9 10 11 0 0 1 0]
#
# unsorted:
# [ 0 1 2 0 3 4 5 0 6 7 8 0 9 10 11 1]
foo function recommended by #hpaulj using defaultdict
div function recommended by #Divakar (old, I'm sure he'd update it)
code
def dfill(a):
n = a.size
b = np.concatenate([[0], np.where(a[:-1] != a[1:])[0] + 1, [n]])
return np.arange(n)[b[:-1]].repeat(np.diff(b))
def argunsort(s):
n = s.size
u = np.empty(n, dtype=np.int64)
u[s] = np.arange(n)
return u
def cumcount(a):
n = a.size
s = a.argsort(kind='mergesort')
i = argunsort(s)
b = a[s]
return (np.arange(n) - dfill(b))[i]
def foo(l):
n = len(l)
r = np.empty(n, dtype=np.int64)
counter = defaultdict(int)
for i in range(n):
counter[l[i]] += 1
r[i] = counter[l[i]]
return r - 1
def div(l):
a = np.unique(l, return_counts=1)[1]
idx = a.cumsum()
id_arr = np.ones(idx[-1],dtype=int)
id_arr[0] = 0
id_arr[idx[:-1]] = -a[:-1]+1
rng = id_arr.cumsum()
return rng[argunsort(np.argsort(l))]
demonstration
cumcount(short_list)
array([ 0, 1, 2, 0, 3, 4, 5, 0, 6, 7, 8, 0, 9, 10, 11, 1])
time testing
code
functions = pd.Index(['cumcount', 'foo', 'foo2', 'div'], name='function')
lengths = pd.RangeIndex(100, 1100, 100, 'array length')
results = pd.DataFrame(index=lengths, columns=functions)
from string import ascii_letters
for i in lengths:
a = np.random.choice(list(ascii_letters), i)
for j in functions:
results.set_value(
i, j,
timeit(
'{}(a)'.format(j),
'from __main__ import a, {}'.format(j),
number=1000
)
)
results.plot()
Here's a vectorized approach using custom grouped range creating function and np.unique for getting the counts -
def grp_range(a):
idx = a.cumsum()
id_arr = np.ones(idx[-1],dtype=int)
id_arr[0] = 0
id_arr[idx[:-1]] = -a[:-1]+1
return id_arr.cumsum()
count = np.unique(A,return_counts=1)[1]
out = grp_range(count)[np.argsort(A).argsort()]
Sample run -
In [117]: A = list('aaabaaacaaadaaac')
In [118]: count = np.unique(A,return_counts=1)[1]
...: out = grp_range(count)[np.argsort(A).argsort()]
...:
In [119]: out
Out[119]: array([ 0, 1, 2, 0, 3, 4, 5, 0, 6, 7, 8, 0, 9, 10, 11, 1])
For getting the count, few other alternatives could be proposed with focus on performance -
np.bincount(np.unique(A,return_inverse=1)[1])
np.bincount(np.fromstring('aaabaaacaaadaaac',dtype=np.uint8)-97)
Additionally, with A containing single-letter characters, we could get the count simply with -
np.bincount(np.array(A).view('uint8')-97)
Besides defaultdict there are a couple of other counters. Testing a slightly simpler case:
In [298]: from collections import defaultdict
In [299]: from collections import defaultdict, Counter
In [300]: def foo(l):
...: counter = defaultdict(int)
...: for i in l:
...: counter[i] += 1
...: return counter
...:
In [301]: short_list = list('aaabaaacaaadaaac')
In [302]: foo(short_list)
Out[302]: defaultdict(int, {'a': 12, 'b': 1, 'c': 2, 'd': 1})
In [303]: Counter(short_list)
Out[303]: Counter({'a': 12, 'b': 1, 'c': 2, 'd': 1})
In [304]: arr=[ord(i)-ord('a') for i in short_list]
In [305]: np.bincount(arr)
Out[305]: array([12, 1, 2, 1], dtype=int32)
I constructed arr because bincount only works with ints.
In [306]: timeit np.bincount(arr)
The slowest run took 82.46 times longer than the fastest. This could mean that an intermediate result is being cached.
100000 loops, best of 3: 5.63 µs per loop
In [307]: timeit Counter(arr)
100000 loops, best of 3: 13.6 µs per loop
In [308]: timeit foo(arr)
100000 loops, best of 3: 6.49 µs per loop
I'm guessing it would hard to improve on pir2 based on default_dict.
Searching and counting like this are not a strong area for numpy.
[a b c ]
[ a b c ]
[ a b c ]
[ a b c ]
Hello
For my economics course we are suppose to create an array that looks like this. The problem is I am an economist not a programmer. We are using numpy in python. Our professor says college is not preparing us for the real world and wants us to learn programming (which is a good thing). We are not allowed to use any packages and must come up with an original code. Does anybody out there have any idea how to make this matrix. I have spent hours trying codes and browsing the internet looking for help and have been unsuccessful.
This kind of matrix is called a Toeplitz matrix or constant diagonal matrix. Knowing this leads you to scipy.linalg.toeplitz:
import scipy.linalg
scipy.linalg.toeplitz([1, 0, 0, 0], [1, 2, 3, 0, 0, 0])
=>
array([[1, 2, 3, 0, 0, 0],
[0, 1, 2, 3, 0, 0],
[0, 0, 1, 2, 3, 0],
[0, 0, 0, 1, 2, 3]])
The method below fills one diagonal at a time:
import numpy as np
x = np.zeros((4, 6), dtype=np.int)
for i, v in enumerate((6,7,8)):
np.fill_diagonal(x[:,i:], v)
array([[6, 7, 8, 0, 0, 0],
[0, 6, 7, 8, 0, 0],
[0, 0, 6, 7, 8, 0],
[0, 0, 0, 6, 7, 8]])
or you could do the one liner:
x = [6,7,8,0,0,0]
y = np.vstack([np.roll(x,i) for i in range(4)])
Personally, I prefer the first since it's easier to understand and probably faster since it doesn't build all the temporary 1D arrays.
Edit:
Since a discussion of efficiency has come up, it might be worthwhile to run a test. I also included time to the toeplitz method suggested by chthonicdaemon (although personally I interpreted the question to exclude this approach since it uses a package rather than using original code -- also though speed isn't the point of the original question either).
import numpy as np
import timeit
import scipy.linalg as sl
def a(m, n):
x = np.zeros((m, m), dtype=np.int)
for i, v in enumerate((6,7,8)):
np.fill_diagonal(x[:,i:], v)
def b(m, n):
x = np.zeros((n,))
x[:3] = vals
y = np.vstack([np.roll(x,i) for i in range(m)])
def c(m, n):
x = np.zeros((n,))
x[:3] = vals
y = np.zeros((m,))
y[0] = vals[0]
r = sl.toeplitz(y, x)
return r
m, n = 4, 6
print timeit.timeit("a(m,n)", "from __main__ import np, a, b, m, n", number=1000)
print timeit.timeit("b(m,n)", "from __main__ import np, a, b, m, n", number=1000)
print timeit.timeit("c(m,n)", "from __main__ import np, c, sl, m, n", number=1000)
m, n = 1000, 1006
print timeit.timeit("a(m,n)", "from __main__ import np, a, b, m, n", number=1000)
print timeit.timeit("b(m,n)", "from __main__ import np, a, b, m, n", number=1000)
print timeit.timeit("c(m,n)", "from __main__ import np, c, sl, m, n", number=100)
# which gives:
0.03525209 # fill_diagonal
0.07554483 # vstack
0.07058787 # toeplitz
0.18803215 # fill_diagonal
2.58780789 # vstack
1.57608604 # toeplitz
So the first method is about a 2-3x faster for small arrays and 10-20x faster for larger arrays.
This is a simplified tridiagonal matrix. So it is essentially a this question
def tridiag(a, b, c, k1=-1, k2=0, k3=1):
return np.diag(a, k1) + np.diag(b, k2) + np.diag(c, k3)
a = [1, 1]; b = [2, 2, 2]; c = [3, 3]
A = tridiag(a, b, c)
print(A)
Result:
array([[2, 3, 0],
[1, 2, 3],
[0, 1, 2]])
Something along the lines of
import numpy as np
def createArray(theinput,rotations) :
l = [theinput]
for i in range(1,rotations) :
l.append(l[i-1][:])
l[i].insert(0,l[i].pop())
return np.array(l)
print(createArray([1,2,3,0,0,0],4))
"""
[[1 2 3 0 0 0]
[0 1 2 3 0 0]
[0 0 1 2 3 0]
[0 0 0 1 2 3]]
"""
If you care about efficiency, it is hard to beat this:
import numpy as np
def create_matrix(diags, n):
diags = np.asarray(diags)
m = np.zeros((n,n+len(diags)-1), diags.dtype)
s = m.strides
v = np.lib.index_tricks.as_strided(
m,
(len(diags),n),
(s[1],sum(s)))
v[:] = diags[:,None]
return m
print create_matrix(['a','b','c'], 8)
Might be a little over your head, but then again that's good inspiration ;)
Or even better: a solution which has both O(n) storage and runtime requirements, rather than all the other solutions posted thus far, which are O(n^2)
import numpy as np
def create_matrix(diags, n):
diags = np.asarray(diags)
b = np.zeros(len(diags)+n*2, diags.dtype)
b[n:][:len(diags)] = diags
s = b.strides[0]
v = np.lib.index_tricks.as_strided(
b[n:],
(n,n+len(diags)-1),
(-s,s))
return v
print create_matrix(np.arange(1,4), 8)
This is an old question, however some new input can always be useful.
I create tridiagonal matrices in python using list comprehension.
Say a matrix that is symmetric around "-2" and has a "1" on either side:
-2 1 0
Tsym(3) => 1 -2 1
0 1 -2
This can be created using the following "one liner":
Tsym = lambda n: [ [ 1 if (i+1==j or i-1==j) else -2 if j==i else 0 for i in xrange(n) ] for j in xrange(n)] # Symmetric tridiagonal matrix (1,-2,1)
A different case (that several of the other people answering has solved perfectly fine) is:
1 2 3 0 0 0
Tgen(4,6) => 0 1 2 3 0 0
0 0 1 2 3 0
0 0 0 1 2 3
Can be made using the one liner shown below.
Tgen = lambda n,m: [ [ 1 if i==j else 2 if i==j+1 else 3 if i==j+2 else 0 for i in xrange(m) ] for j in xrange(n)] # General tridiagonal matrix (1,2,3)
Feel free to modify to suit your specific needs. These matrices are very common when modelling physical systems and I hope this is useful to someone (other than me).
Hello since your professor asked you not to import any external package, while most answers use numpy or scipy.
You better use only python List to create 2D array (compound list), then populate its diagonals with the items you wish, Find the code below
def create_matrix(rows = 4, cols = 6):
mat = [[0 for col in range(cols)] for row in range(rows)] # create a mtrix filled with zeros of size(4,6)
for row in range(len(mat)): # gives number of lists in the main list,
for col in range(len(mat[0])): # gives number of items in sub-list 0, but all sublists have the same length
if row == col:
mat[row][col] = "a"
if col == row+1:
mat[row][col] = "b"
if col == row+2:
mat[row][col] = "c"
return mat
create_matrix(4, 6)
[['a', 'b', 'c', 0, 0, 0],
[0, 'a', 'b', 'c', 0, 0],
[0, 0, 'a', 'b', 'c', 0],
[0, 0, 0, 'a', 'b', 'c']]
Creating Band Matrix
Check out the definition for it in wiki :
https://en.wikipedia.org/wiki/Band_matrix
You can use this function to create band matrices like diagonal matrix with offset=1 or tridiagonal matrix (The one you are asking about) with offset=1 or Pentadiagonal Matrix with offset=2
def band(size=10, ones=False, low=0, high=100, offset=2):
shape = (size, size)
n_matrix = np.random.randint(low, high, shape) if not ones else np.ones(shape,dtype=int)
n_matrix = np.triu(n_matrix, -1*offset)
n_matrix = np.tril(n_matrix, offset)
return n_matrix
In your case you should use this
rand_tridiagonal = band(size=6,offset=1)
print(rand_tridiagonal)