The DTAIDistance package can be used to find k best matches of the input query. but it cannot be used for multi-dimensional input query. moreover, I want to find the k best matches of many input queries in one run.
I modified the DTAIDistance function so that it can be used to search subsequences of multi-dimensions of multi-queries. I use njit with parallel to speed up the process,i.e.the p_calc function which applies numba-parallel to each of the input query. but I find that the parallel calculation seems not to speed up the calculation compared to just simply looping over the input queries one by one, i.e. the calc function.
import time
from tqdm import tqdm
from numba import njit, prange
import numpy as np
inf = np.inf
argmin=np.argmin
#njit(fastmath=True, nogil=True, error_model="numpy", cache=True, parallel=False)
def p_calc(d, dtw, s1, s2, r, c, psi_1b, psi_1e, psi_2b, psi_2e, window, max_step, max_dist, penalty, psi_neg):
n_series = s1.shape[1]
ndim = s1.shape[2]
# s1 = np.ascontiguousarray(s1)#.shape
# s2 = np.ascontiguousarray(s2)#.shape
# dtw = np.full((n_series,r + 1, c + 1), np.inf,dtype=s1.dtype) # cmath.inf
# d = np.full((n_series), np.inf,dtype=s1.dtype) # cmath.inf
for i in range(psi_2b + 1):
dtw[:, 0, i] = 0
for i in range(psi_1b + 1):
dtw[:, i, 0] = 0
for nn in prange(n_series):
print('im alive...')
i0 = 1
i1 = 0
sc = 0
ec = 0
smaller_found = False
ec_next = 0
for i in range(r):
i0 = i
i1 = i + 1
j_start = max(0, i - max(0, r - c) - window + 1)
j_end = min(c, i + max(0, c - r) + window)
if sc > j_start:
j_start = sc
smaller_found = False
ec_next = i
for j in range(j_start, j_end):
val = 0
tmp = ((s1[i, nn] - s2[j]) ** 2)
# tmp = (np.abs(s1[i, nn] - s2[j, 0]))
for nd in range(ndim):
val += tmp[nd]
d[nn] = val
# d = np.sum(np.abs(s1[i] - s2[j]) ) # multi-d
if max_step is not None and d[nn] > max_step:
continue
# print(i, j + 1 - skip, j - skipp, j + 1 - skipp, j - skip)
dtw[nn, i1, j + 1] = d[nn] + min(dtw[nn, i0, j],
dtw[nn, i0, j + 1] + penalty,
dtw[nn, i1, j] + penalty)
# dtw[i + 1, j + 1 - skip] = d + min(dtw[i + 1, j + 1 - skip], dtw[i + 1, j - skip])
if dtw[nn, i1, j + 1] > max_dist:
if not smaller_found:
sc = j + 1
if j >= ec:
break
else:
smaller_found = True
ec_next = j + 1
ec = ec_next
# Decide which d to return
dtw[nn] = np.sqrt(dtw[nn])
if psi_1e == 0 and psi_2e == 0:
d[nn] = dtw[nn, i1, min(c, c + window - 1)]
else:
ir = i1
ic = min(c, c + window - 1)
if psi_1e != 0:
vr = dtw[nn, ir:max(0, ir - psi_1e - 1):-1, ic]
mir = np.argmin(vr)
vr_mir = vr[mir]
else:
mir = ir
vr_mir = inf
if psi_2e != 0:
vc = dtw[nn, ir, ic:max(0, ic - psi_2e - 1):-1]
mic = np.argmin(vc)
vc_mic = vc[mic]
else:
mic = ic
vc_mic = inf
if vr_mir < vc_mic:
if psi_neg:
dtw[nn, ir:ir - mir:-1, ic] = -1
d[nn] = vr_mir
else:
if psi_neg:
dtw[nn, ir, ic:ic - mic:-1] = -1
d[nn] = vc_mic
if max_dist and d[nn] ** 2 > max_dist:
# if max_dist and d[nn] > max_dist:
d[nn] = inf
return d, dtw
#njit(fastmath=True, nogil=True) # Set "nopython" mode for best performance, equivalent to #njit
def calc(s1, s2, r, c, psi_1b, psi_1e, psi_2b, psi_2e, window, max_step, max_dist, penalty, psi_neg):
dtw = np.full((r + 1, c + 1), np.inf) # cmath.inf
for i in range(psi_2b + 1):
dtw[0, i] = 0
for i in range(psi_1b + 1):
dtw[i, 0] = 0
i0 = 1
i1 = 0
sc = 0
ec = 0
smaller_found = False
ec_next = 0
for i in range(r):
i0 = i
i1 = i + 1
j_start = max(0, i - max(0, r - c) - window + 1)
j_end = min(c, i + max(0, c - r) + window)
if sc > j_start:
j_start = sc
smaller_found = False
ec_next = i
for j in range(j_start, j_end):
# d = (s1[i] - s2[j]) ** 2# 1-d
d = np.sum((s1[i] - s2[j]) ** 2) # multi-d
# d = np.sum(np.abs(s1[i] - s2[j]) ) # multi-d
if max_step is not None and d > max_step:
continue
dtw[i1, j + 1] = d + min(dtw[i0, j],
dtw[i0, j + 1] + penalty,
dtw[i1, j] + penalty)
if dtw[i1, j + 1] > max_dist:
if not smaller_found:
sc = j + 1
if j >= ec:
break
else:
smaller_found = True
ec_next = j + 1
ec = ec_next
# Decide which d to return
dtw = np.sqrt(dtw)
if psi_1e == 0 and psi_2e == 0:
d = dtw[i1, min(c, c + window - 1)]
else:
ir = i1
ic = min(c, c + window - 1)
if psi_1e != 0:
vr = dtw[ir:max(0, ir - psi_1e - 1):-1, ic]
mir = argmin(vr)
vr_mir = vr[mir]
else:
mir = ir
vr_mir = inf
if psi_2e != 0:
vc = dtw[ir, ic:max(0, ic - psi_2e - 1):-1]
mic = argmin(vc)
vc_mic = vc[mic]
else:
mic = ic
vc_mic = inf
if vr_mir < vc_mic:
if psi_neg:
dtw[ir:ir - mir:-1, ic] = -1
d = vr_mir
else:
if psi_neg:
dtw[ir, ic:ic - mic:-1] = -1
d = vc_mic
if max_dist and d * d > max_dist:
d = inf
return d, dtw
mydtype = np.float32
series1 = np.random.random((16, 30, 2)).astype(mydtype)
series2 = np.random.random((100000, 2)).astype(mydtype)
n_series = series1.shape[1]
r = series1.shape[0]
c = series2.shape[0]
dtw = np.full((n_series, r + 1, c + 1), np.inf, dtype=mydtype) # cmath.inf
d = np.full((n_series), np.inf, dtype=mydtype) # cmath.inf
time1 = time.time()
d, dtw1 = p_calc(d, dtw, series1, series2, series1.shape[0], series2.shape[0], 0, 0,
series2.shape[0], series2.shape[0], series2.shape[0], np.inf, np.inf, 0.01, False)
print(time.time() - time1)
time1 = time.time()
for ii in tqdm(range(series1.shape[1])):
d, dtw1 = calc( series1[:, ii, :], series2, series1.shape[0], series2.shape[0], 0, 0,
series2.shape[0], series2.shape[0], series2.shape[0], np.inf, np.inf, 0.01, False)
print(time.time() - time1)# this one is faster
How can I speed up the calc function or p_calc function so that I can calculate the dynamic time warping paths of multi-dimensional multi-queries?
thanks for the answer,and then i modified the code for simplification.
i delete the np.sum part and use loop,i can get another speedup. any suggestions for further speedups ?
import time
from numba import njit, prange
import numpy as np
inf = np.inf
argmin=np.argmin
#njit(fastmath=True, nogil=True, error_model="numpy", cache=False, parallel=True)
def p_calc(d, dtw, s1, s2, r, c, psi_1b, psi_1e, psi_2b, psi_2e, window, max_step, max_dist, penalty, psi_neg):
n_series = s1.shape[1]
ndim = s1.shape[2]
for nn in prange(n_series):
for i in range(r):
j_start = 0
j_end = c
for j in range(j_start, j_end):
val = 0
# tmp = ((s1[i, nn] - s2[j]) ** 2)
# tmp = (np.abs(s1[i, nn] - s2[j, 0]))
for nd in range(ndim):
tmp = ((s1[i, nn,nd] - s2[j,nd]) ** 2)
val += tmp
d[nn] = val
return d, dtw
#njit(fastmath=True, nogil=True) # Set "nopython" mode for best performance, equivalent to #njit
def calc(dtw,s1, s2, r, c, psi_1b, psi_1e, psi_2b, psi_2e, window, max_step, max_dist, penalty, psi_neg):
ndim = s1.shape[-1]
for i in range(r):
j_start = 0
j_end = c
for j in range(j_start, j_end):
d = 0
for kk in range(ndim):
d += (s1[i, kk] - s2[j, kk]) ** 2
return d, dtw
mydtype = np.float32
series1 = np.random.random((16, 300, 2)).astype(mydtype)
series2 = np.random.random((1000000, 2)).astype(mydtype)
n_series = series1.shape[1]
r = series1.shape[0]
c = series2.shape[0]
dtw = np.full((n_series, r + 1, c + 1), np.inf, dtype=mydtype) # cmath.inf
d = np.full((n_series), np.inf, dtype=mydtype) # cmath.inf
time1 = time.time()
# assert 1==2
# dtw[:,series2.shape[0]]
d1, dtw1 = p_calc(d, dtw, series1, series2, series1.shape[0], series2.shape[0], 0, 0, series2.shape[0], series2.shape[0], series2.shape[0], np.inf, np.inf, 0.01, False)
print(time.time() - time1)
# assert 1==2
time1 = time.time()
dtw = np.full(( r + 1, c + 1), np.inf, dtype=mydtype) # cmath.inf
for ii in (range(series1.shape[1])):
d2, dtw2 = calc( dtw,series1[:, ii, :], series2, series1.shape[0], series2.shape[0], 0, 0,
series2.shape[0], series2.shape[0], series2.shape[0], np.inf, np.inf, 0.01, False)
print(time.time() - time1)# this one is faster
np.allclose(dtw1[-1],dtw2)
np.allclose(d1[-1],d2)
EDIT:
i found the following code's performance is very different if use pass or break. i don't understand why?
#njit(fastmath=True, nogil=True)
def kbest_matches(matching,k=4000):
ki = 0
while ki < k:
best_idx =np.argmin(matching)# np.argmin(np.arange(10000000))#
if best_idx == 0 :
# pass
break
ki += 1
return 0
ss= np.random.random((1575822,))
time1 = time.time()
pp = kbest_matches(ss)
print(time.time() - time1)
I assume the code of both implementations are correct and as been carefully checked (otherwise the benchmark would be pointless).
The issue likely comes from the compilation time of the function. Indeed, the first call is significantly slower than next calls, even with cache=True. This is especially important for the parallel implementation as compiling parallel Numba code is often slower (since it is more complex). The best solution to avoid this is to compile Numba functions ahead of time by providing types to Numba.
Besides this, benchmarking a computation only once is usually considered as a bad practice. Good benchmarks perform multiple iterations and remove the first ones (or consider them separately). Indeed, several other problems can appear when a code is executed for the first time: CPU caches (and the TLB) are cold, the CPU frequency can change during the execution and is likely smaller when the program is just started, page faults may need to be needed, etc.
In practice, I cannot reproduce the issue. Actually, p_calc is 3.3 times faster on my 6-core machine. When the benchmark is done in a loop of 5 iterations, the measured time of the parallel implementation is much smaller: about 13 times (which is actually suspicious for a parallel implementation using 6 threads on a 6-core machine).
I am trying to create this matrix in Python using numpy vectors:
where the values come from a function. I have implemented it with repeatedly using numpy.diag but for large dimensions, it becomes very slow. Here is the code:
def makeS(N):
vec = np.full(N, 2*v(x_range[1]))
vec[0]*=0.5
S = np.diag(vec)
vec = np.full(N-1, v(x_range[0]))
S+= np.diag(vec, 1)
for m in xrange(1, N):
vec = np.full(N-m, 2*v(x_range[m+1]))
vec[0]*= 0.5
S += np.diag(vec, -m)
return S
where v() is the said function and x_range is a vector of x-values. Is there a way to make this more efficient?
Edit:
Here is a full example:
import numpy as np
import math
N = 5
x_range = np.linspace(0, 1, N+1)
def v(x):
return math.exp(x)
def makeS(N):
vec = np.full(N, 2*v(x_range[1]))
vec[0]*=0.5
S = np.diag(vec)
vec = np.full(N-1, v(x_range[0]))
S+= np.diag(vec, 1)
for m in xrange(1, N):
vec = np.full(N-m, 2*v(x_range[m+1]))
vec[0]*= 0.5
S += np.diag(vec, -m)
return S
print makeS(N)
which outputs
[[ 1.22140276 1. 0. 0. 0. ]
[ 1.4918247 2.44280552 1. 0. 0. ]
[ 1.8221188 2.9836494 2.44280552 1. 0. ]
[ 2.22554093 3.6442376 2.9836494 2.44280552 1. ]
[ 2.71828183 4.45108186 3.6442376 2.9836494 2.44280552]]
This is the fastest approach I could find:
def makeS(N):
values = np.array([v(x) for x in x_range])
values_doubled = 2 * values
result = np.eye(N, k=1) * values[0]
result[:, 0] = values[1:]
for i in xrange(N - 1):
result[i + 1, 1:i + 2] = values_doubled[1:i + 2][::-1]
return result
With N=2000 the original takes 26.97 seconds on my machine while the new version takes 0.02339 seconds.
Here is the complete script for evaluating timings with some additional approaches.
import numpy as np
import math
import timeit
def v(x):
return math.exp(x)
def makeS1(N, x_range):
vec = np.full(N, 2 * v(x_range[1]))
vec[0] *= 0.5
S = np.diag(vec)
vec = np.full(N - 1, v(x_range[0]))
S += np.diag(vec, 1)
for m in xrange(1, N):
vec = np.full(N - m, 2 * v(x_range[m + 1]))
vec[0] *= 0.5
S += np.diag(vec, -m)
return S
def makeS2(N, x_range):
values = np.array([v(x) for x in x_range])
values_doubled = 2 * values
def value_at_position(ai, aj):
result = np.zeros((N, N))
for i, j in zip(ai.flatten(), aj.flatten()):
if j > i + 1:
continue
elif j == i + 1:
result[i, j] = values[0]
elif j == 0:
result[i, j] = values[i + 1]
else:
result[i, j] = values_doubled[i - j + 1]
return result
return np.fromfunction(value_at_position, (N, N))
def makeS3(N, x_range):
values = np.array([v(x) for x in x_range])
values_doubled = 2 * values
result = np.zeros((N, N))
for i in xrange(N):
for j in xrange(min(i + 2, N)):
if j == i + 1:
result[i, j] = values[0]
elif j == 0:
result[i, j] = values[i + 1]
else:
result[i, j] = values_doubled[i - j + 1]
return result
def makeS4(N, x_range):
values = np.array([v(x) for x in x_range])
values_doubled = 2 * values
result = np.eye(N, k=1) * values[0]
result[:, 0] = values[1:]
for i in xrange(N - 1):
result[i + 1, 1:i + 2] = values_doubled[1:i + 2][::-1]
return result
def main():
N = 2000
x_range = np.random.randn(N + 1)
start = timeit.default_timer()
s1 = makeS1(N, x_range)
print 'makeS1', timeit.default_timer() - start
start = timeit.default_timer()
s2 = makeS2(N, x_range)
print 'makeS2', timeit.default_timer() - start
start = timeit.default_timer()
s3 = makeS3(N, x_range)
print 'makeS3', timeit.default_timer() - start
start = timeit.default_timer()
s4 = makeS4(N, x_range)
print 'makeS4', timeit.default_timer() - start
if N < 10:
print s1
print s2
print s2
print s4
assert np.allclose(s1, s2)
assert np.allclose(s2, s3)
assert np.allclose(s3, s4)
main()
On my machine, this produces the output:
makeS1 26.9707232448
makeS2 11.7728229076
makeS3 0.643742975052
makeS4 0.0233912765665
I have an array of 13.876(13,876) values between 0 and 1. I would like to apply sklearn.cluster.KMeans to only this vector to find the different clusters in which the values are grouped. However, it seems KMeans works with a multidimensional array and not with one-dimensional ones. I guess there is a trick to make it work but I don't know how. I saw that KMeans.fit() accepts "X : array-like or sparse matrix, shape=(n_samples, n_features)", but it wants the n_samples to be bigger than one
I tried putting my array on a np.zeros() matrix and run KMeans, but then is putting all the non-null values on class 1 and the rest on class 0.
Can anyone help in running this algorithm on a one-dimensional array?
You have many samples of 1 feature, so you can reshape the array to (13,876, 1) using numpy's reshape:
from sklearn.cluster import KMeans
import numpy as np
x = np.random.random(13876)
km = KMeans()
km.fit(x.reshape(-1,1)) # -1 will be calculated to be 13876 here
Read about Jenks Natural Breaks. Function in Python found the link from the article:
def get_jenks_breaks(data_list, number_class):
data_list.sort()
mat1 = []
for i in range(len(data_list) + 1):
temp = []
for j in range(number_class + 1):
temp.append(0)
mat1.append(temp)
mat2 = []
for i in range(len(data_list) + 1):
temp = []
for j in range(number_class + 1):
temp.append(0)
mat2.append(temp)
for i in range(1, number_class + 1):
mat1[1][i] = 1
mat2[1][i] = 0
for j in range(2, len(data_list) + 1):
mat2[j][i] = float('inf')
v = 0.0
for l in range(2, len(data_list) + 1):
s1 = 0.0
s2 = 0.0
w = 0.0
for m in range(1, l + 1):
i3 = l - m + 1
val = float(data_list[i3 - 1])
s2 += val * val
s1 += val
w += 1
v = s2 - (s1 * s1) / w
i4 = i3 - 1
if i4 != 0:
for j in range(2, number_class + 1):
if mat2[l][j] >= (v + mat2[i4][j - 1]):
mat1[l][j] = i3
mat2[l][j] = v + mat2[i4][j - 1]
mat1[l][1] = 1
mat2[l][1] = v
k = len(data_list)
kclass = []
for i in range(number_class + 1):
kclass.append(min(data_list))
kclass[number_class] = float(data_list[len(data_list) - 1])
count_num = number_class
while count_num >= 2: # print "rank = " + str(mat1[k][count_num])
idx = int((mat1[k][count_num]) - 2)
# print "val = " + str(data_list[idx])
kclass[count_num - 1] = data_list[idx]
k = int((mat1[k][count_num] - 1))
count_num -= 1
return kclass
Use and visualization:
import numpy as np
import matplotlib.pyplot as plt
def get_jenks_breaks(...):...
x = np.random.random(30)
breaks = get_jenks_breaks(x, 5)
for line in breaks:
plt.plot([line for _ in range(len(x))], 'k--')
plt.plot(x)
plt.grid(True)
plt.show()
Result:
Okay so I am trying to complete the infection simulation code for a monte carlo simulation in python.
We were given the thread shell and just need to complete this. I will also have to add a vaccine function but it should be pretty similar to the infect function so Im trying to make sure that it works first. I know that I need to read from my original matrix but write to a new matrix for each day/iteration, but I can't figure out how to write to the new matrix. I tried using np.append but it forced me to redefine my original matrix A, or it said that list indices must be integers, not floats. I've tried working through examples on here and other places, but they all seem to be using lists or they don't involve calling a function within the while loop.
Any help would be greatly appreciated.
import random
import math
import numpy as np
def infect(Pop,i,j,n,m,tau):
t = 0
if (i > 1) and (i < n) and (j > 1) and (j < m):
if (Pop[i-1,j]>0):
t = (np.random.rand() < tau)
if (Pop[i,j+1]>0):
t = (np.random.rand() < tau)
if (Pop[i+1,j]>0):
t = (np.random.rand() < tau)
if (Pop[i,j-1]>0):
t = (np.random.rand() < tau)
if (i == 1) and (j == 1):
if (Pop[i,j+1]>0):
t = (np.random.rand() < tau)
if (Pop[i+1,j]):
t = (np.random.rand() < tau)
if (i == 1) and (j != m) and (j > 1):
if (Pop[i,j]>0):
t = (np.random.rand() < tau)
if (Pop[i+1,j]>0):
t = (np.random.rand() < tau)
if (Pop[i,j-1]>0):
t = (np.random.rand() < tau)
if (i == 1) and (j == m):
if (Pop[i+1,j]>0):
t = (np.random.rand() < tau)
if (Pop[i,j-1]>0):
t = (np.random.rand() < tau)
if (i == n) and (j == 1):
if (Pop[i-1,j]>0):
t = (np.random.rand() < tau)
if (Pop[i,j+1]>0):
t = (np.random.rand() < tau)
if (i < n) and (i > 1) and (j == 1):
if (Pop[i-1,j]>0):
t = (np.random.rand() < tau)
if (Pop[i,j+1]>0):
t = (np.random.rand() < tau)
if (Pop[i+1,j]>0):
t = (np.random.rand() < tau)
if (i < n) and (i > 1) and (j == m):
if (Pop[i-1,j]>0):
t = (np.random.rand() < tau)
if (Pop[i+1,j]>0):
t = (np.random.rand() < tau)
if (Pop[i,j-1]>0):
t = (np.random.rand() < tau)
if (i == n) and (j > 1) and (j < m):
if (Pop[i-1,j]>0):
t = (np.random.rand() < tau)
if (Pop[i,j+1]>0):
t = (np.random.rand() < tau)
if (Pop[i,j-1]>0):
t = (np.random.rand() < tau)
if (i == n) and (j == m):
if (Pop[i,j-1]>0):
t = (np.random.rand() < tau)
if (Pop[i-1,j]>0):
t = (np.random.rand() < tau)
p = 0
if (t==True):
p = 1
return p
i = 1
j = 1
n = 10
m = 10
k = int(input("Number of Days to Recover from Illness?"))
d = 0.0
tau = 0.5
mu = 0.2
A = np.zeros((n,m))
if d == 0:
n1 = random.sample(range(n),1)
m1 = random.sample(range(m),1)
A[n1,m1] = 1
print(A)
while d < 100:
while True:
if (A[i,j]==0):
x = infect(A,i,j,n,m,tau)
print(x)
#A_new.append(x)
You create an extra matrix and every time through the loop you swap the two references. For example,
A1 = np.zeros((m, n))
A2 = np.zeros((m, n))
Anow = A1 # reference, not copy
Aafter = A2
while d < 100:
x = infectAll(Anow, Aafter, n,m,tau)
Anow, Aafter = Aafter, Anow
and infectAll() sweeps over the whole matrix so it would be something like
def infectAll(Ain, Aout, n, m, tau):
for i in range(m):
for j in range(n):
if Anow[i,j] == 0:
Aafter[i,j] = infect(Anow, i, j, n, m, tau)
I like Andrei's more compact code but there is no need to create a new copy of A every time, so the best would be to combine the above technique with Andrei's approach.
As I understand you need dynamic of your infection matrix and tau is probability of neighbor infection. You can use 3-dimensional array for this and optimize your code like this:
from copy import copy
import numpy as np
def infect(A, tau):
B = copy(A)
for i in range(m):
for j in range(n):
is_infected = False
for neighbor in [A[i-1,j], A[i+1,j], A[i,j-1], A[i,j+1]]:
if neighbor:
B[i,j] = int(A[i,j] or (np.random.rand() < tau))
return B
D = np.zeros((T + 1, m, n))
A = np.zeros((m, n))
A[i,j] = 1
for t in range(T):
D[t,:,:] = A
A = infect(A, tau)
D[T,:,:] = A