speedup dtaidistance key function with numba - python

The DTAIDistance package can be used to find k best matches of the input query. but it cannot be used for multi-dimensional input query. moreover, I want to find the k best matches of many input queries in one run.
I modified the DTAIDistance function so that it can be used to search subsequences of multi-dimensions of multi-queries. I use njit with parallel to speed up the process,i.e.the p_calc function which applies numba-parallel to each of the input query. but I find that the parallel calculation seems not to speed up the calculation compared to just simply looping over the input queries one by one, i.e. the calc function.
import time
from tqdm import tqdm
from numba import njit, prange
import numpy as np
inf = np.inf
argmin=np.argmin
#njit(fastmath=True, nogil=True, error_model="numpy", cache=True, parallel=False)
def p_calc(d, dtw, s1, s2, r, c, psi_1b, psi_1e, psi_2b, psi_2e, window, max_step, max_dist, penalty, psi_neg):
n_series = s1.shape[1]
ndim = s1.shape[2]
# s1 = np.ascontiguousarray(s1)#.shape
# s2 = np.ascontiguousarray(s2)#.shape
# dtw = np.full((n_series,r + 1, c + 1), np.inf,dtype=s1.dtype) # cmath.inf
# d = np.full((n_series), np.inf,dtype=s1.dtype) # cmath.inf
for i in range(psi_2b + 1):
dtw[:, 0, i] = 0
for i in range(psi_1b + 1):
dtw[:, i, 0] = 0
for nn in prange(n_series):
print('im alive...')
i0 = 1
i1 = 0
sc = 0
ec = 0
smaller_found = False
ec_next = 0
for i in range(r):
i0 = i
i1 = i + 1
j_start = max(0, i - max(0, r - c) - window + 1)
j_end = min(c, i + max(0, c - r) + window)
if sc > j_start:
j_start = sc
smaller_found = False
ec_next = i
for j in range(j_start, j_end):
val = 0
tmp = ((s1[i, nn] - s2[j]) ** 2)
# tmp = (np.abs(s1[i, nn] - s2[j, 0]))
for nd in range(ndim):
val += tmp[nd]
d[nn] = val
# d = np.sum(np.abs(s1[i] - s2[j]) ) # multi-d
if max_step is not None and d[nn] > max_step:
continue
# print(i, j + 1 - skip, j - skipp, j + 1 - skipp, j - skip)
dtw[nn, i1, j + 1] = d[nn] + min(dtw[nn, i0, j],
dtw[nn, i0, j + 1] + penalty,
dtw[nn, i1, j] + penalty)
# dtw[i + 1, j + 1 - skip] = d + min(dtw[i + 1, j + 1 - skip], dtw[i + 1, j - skip])
if dtw[nn, i1, j + 1] > max_dist:
if not smaller_found:
sc = j + 1
if j >= ec:
break
else:
smaller_found = True
ec_next = j + 1
ec = ec_next
# Decide which d to return
dtw[nn] = np.sqrt(dtw[nn])
if psi_1e == 0 and psi_2e == 0:
d[nn] = dtw[nn, i1, min(c, c + window - 1)]
else:
ir = i1
ic = min(c, c + window - 1)
if psi_1e != 0:
vr = dtw[nn, ir:max(0, ir - psi_1e - 1):-1, ic]
mir = np.argmin(vr)
vr_mir = vr[mir]
else:
mir = ir
vr_mir = inf
if psi_2e != 0:
vc = dtw[nn, ir, ic:max(0, ic - psi_2e - 1):-1]
mic = np.argmin(vc)
vc_mic = vc[mic]
else:
mic = ic
vc_mic = inf
if vr_mir < vc_mic:
if psi_neg:
dtw[nn, ir:ir - mir:-1, ic] = -1
d[nn] = vr_mir
else:
if psi_neg:
dtw[nn, ir, ic:ic - mic:-1] = -1
d[nn] = vc_mic
if max_dist and d[nn] ** 2 > max_dist:
# if max_dist and d[nn] > max_dist:
d[nn] = inf
return d, dtw
#njit(fastmath=True, nogil=True) # Set "nopython" mode for best performance, equivalent to #njit
def calc(s1, s2, r, c, psi_1b, psi_1e, psi_2b, psi_2e, window, max_step, max_dist, penalty, psi_neg):
dtw = np.full((r + 1, c + 1), np.inf) # cmath.inf
for i in range(psi_2b + 1):
dtw[0, i] = 0
for i in range(psi_1b + 1):
dtw[i, 0] = 0
i0 = 1
i1 = 0
sc = 0
ec = 0
smaller_found = False
ec_next = 0
for i in range(r):
i0 = i
i1 = i + 1
j_start = max(0, i - max(0, r - c) - window + 1)
j_end = min(c, i + max(0, c - r) + window)
if sc > j_start:
j_start = sc
smaller_found = False
ec_next = i
for j in range(j_start, j_end):
# d = (s1[i] - s2[j]) ** 2# 1-d
d = np.sum((s1[i] - s2[j]) ** 2) # multi-d
# d = np.sum(np.abs(s1[i] - s2[j]) ) # multi-d
if max_step is not None and d > max_step:
continue
dtw[i1, j + 1] = d + min(dtw[i0, j],
dtw[i0, j + 1] + penalty,
dtw[i1, j] + penalty)
if dtw[i1, j + 1] > max_dist:
if not smaller_found:
sc = j + 1
if j >= ec:
break
else:
smaller_found = True
ec_next = j + 1
ec = ec_next
# Decide which d to return
dtw = np.sqrt(dtw)
if psi_1e == 0 and psi_2e == 0:
d = dtw[i1, min(c, c + window - 1)]
else:
ir = i1
ic = min(c, c + window - 1)
if psi_1e != 0:
vr = dtw[ir:max(0, ir - psi_1e - 1):-1, ic]
mir = argmin(vr)
vr_mir = vr[mir]
else:
mir = ir
vr_mir = inf
if psi_2e != 0:
vc = dtw[ir, ic:max(0, ic - psi_2e - 1):-1]
mic = argmin(vc)
vc_mic = vc[mic]
else:
mic = ic
vc_mic = inf
if vr_mir < vc_mic:
if psi_neg:
dtw[ir:ir - mir:-1, ic] = -1
d = vr_mir
else:
if psi_neg:
dtw[ir, ic:ic - mic:-1] = -1
d = vc_mic
if max_dist and d * d > max_dist:
d = inf
return d, dtw
mydtype = np.float32
series1 = np.random.random((16, 30, 2)).astype(mydtype)
series2 = np.random.random((100000, 2)).astype(mydtype)
n_series = series1.shape[1]
r = series1.shape[0]
c = series2.shape[0]
dtw = np.full((n_series, r + 1, c + 1), np.inf, dtype=mydtype) # cmath.inf
d = np.full((n_series), np.inf, dtype=mydtype) # cmath.inf
time1 = time.time()
d, dtw1 = p_calc(d, dtw, series1, series2, series1.shape[0], series2.shape[0], 0, 0,
series2.shape[0], series2.shape[0], series2.shape[0], np.inf, np.inf, 0.01, False)
print(time.time() - time1)
time1 = time.time()
for ii in tqdm(range(series1.shape[1])):
d, dtw1 = calc( series1[:, ii, :], series2, series1.shape[0], series2.shape[0], 0, 0,
series2.shape[0], series2.shape[0], series2.shape[0], np.inf, np.inf, 0.01, False)
print(time.time() - time1)# this one is faster
How can I speed up the calc function or p_calc function so that I can calculate the dynamic time warping paths of multi-dimensional multi-queries?
thanks for the answer,and then i modified the code for simplification.
i delete the np.sum part and use loop,i can get another speedup. any suggestions for further speedups ?
import time
from numba import njit, prange
import numpy as np
inf = np.inf
argmin=np.argmin
#njit(fastmath=True, nogil=True, error_model="numpy", cache=False, parallel=True)
def p_calc(d, dtw, s1, s2, r, c, psi_1b, psi_1e, psi_2b, psi_2e, window, max_step, max_dist, penalty, psi_neg):
n_series = s1.shape[1]
ndim = s1.shape[2]
for nn in prange(n_series):
for i in range(r):
j_start = 0
j_end = c
for j in range(j_start, j_end):
val = 0
# tmp = ((s1[i, nn] - s2[j]) ** 2)
# tmp = (np.abs(s1[i, nn] - s2[j, 0]))
for nd in range(ndim):
tmp = ((s1[i, nn,nd] - s2[j,nd]) ** 2)
val += tmp
d[nn] = val
return d, dtw
#njit(fastmath=True, nogil=True) # Set "nopython" mode for best performance, equivalent to #njit
def calc(dtw,s1, s2, r, c, psi_1b, psi_1e, psi_2b, psi_2e, window, max_step, max_dist, penalty, psi_neg):
ndim = s1.shape[-1]
for i in range(r):
j_start = 0
j_end = c
for j in range(j_start, j_end):
d = 0
for kk in range(ndim):
d += (s1[i, kk] - s2[j, kk]) ** 2
return d, dtw
mydtype = np.float32
series1 = np.random.random((16, 300, 2)).astype(mydtype)
series2 = np.random.random((1000000, 2)).astype(mydtype)
n_series = series1.shape[1]
r = series1.shape[0]
c = series2.shape[0]
dtw = np.full((n_series, r + 1, c + 1), np.inf, dtype=mydtype) # cmath.inf
d = np.full((n_series), np.inf, dtype=mydtype) # cmath.inf
time1 = time.time()
# assert 1==2
# dtw[:,series2.shape[0]]
d1, dtw1 = p_calc(d, dtw, series1, series2, series1.shape[0], series2.shape[0], 0, 0, series2.shape[0], series2.shape[0], series2.shape[0], np.inf, np.inf, 0.01, False)
print(time.time() - time1)
# assert 1==2
time1 = time.time()
dtw = np.full(( r + 1, c + 1), np.inf, dtype=mydtype) # cmath.inf
for ii in (range(series1.shape[1])):
d2, dtw2 = calc( dtw,series1[:, ii, :], series2, series1.shape[0], series2.shape[0], 0, 0,
series2.shape[0], series2.shape[0], series2.shape[0], np.inf, np.inf, 0.01, False)
print(time.time() - time1)# this one is faster
np.allclose(dtw1[-1],dtw2)
np.allclose(d1[-1],d2)
EDIT:
i found the following code's performance is very different if use pass or break. i don't understand why?
#njit(fastmath=True, nogil=True)
def kbest_matches(matching,k=4000):
ki = 0
while ki < k:
best_idx =np.argmin(matching)# np.argmin(np.arange(10000000))#
if best_idx == 0 :
# pass
break
ki += 1
return 0
ss= np.random.random((1575822,))
time1 = time.time()
pp = kbest_matches(ss)
print(time.time() - time1)

I assume the code of both implementations are correct and as been carefully checked (otherwise the benchmark would be pointless).
The issue likely comes from the compilation time of the function. Indeed, the first call is significantly slower than next calls, even with cache=True. This is especially important for the parallel implementation as compiling parallel Numba code is often slower (since it is more complex). The best solution to avoid this is to compile Numba functions ahead of time by providing types to Numba.
Besides this, benchmarking a computation only once is usually considered as a bad practice. Good benchmarks perform multiple iterations and remove the first ones (or consider them separately). Indeed, several other problems can appear when a code is executed for the first time: CPU caches (and the TLB) are cold, the CPU frequency can change during the execution and is likely smaller when the program is just started, page faults may need to be needed, etc.
In practice, I cannot reproduce the issue. Actually, p_calc is 3.3 times faster on my 6-core machine. When the benchmark is done in a loop of 5 iterations, the measured time of the parallel implementation is much smaller: about 13 times (which is actually suspicious for a parallel implementation using 6 threads on a 6-core machine).

Related

Heat equation divide by zero issue

I'm writing a code that solves a heat equation implementing an implicit method. The problem is that the values between first and last layer of the matrix are NaNs. What could be the problem?
From my problem of view, the main issue might be with the 105th line, which represents the convrsion of original function to the one that includes the boundary function.
Boundary functions code:
def func(x, t):
return x*(1 - x)*np.exp(-2*t)
# boundary function for x = 0 and x = 1
def q0(t):
return t*np.exp(-t/0.1)*np.cos(t) # граничное условие при x = 0
def q1(t):
return t*np.exp(-t/0.5)*np.cos(t) # граничное уcловие при x = 1
def derivative(f, x0, step):
return (f(x0+step) - f(x0))/step
# boundary function that for t = 0
def u_x0(x):
return (-x + 1)*x
Function that solves the three-diagonal matrix equation
def solution(a, b):
n = len(a)
x = [0 for k in range(0, n)]
# forward
v = [0 for k in range(0, n)]
u = [0 for k in range(0, n)]
# first string (t = 0)
v[0] = a[0][1] / (-a[0][0])
u[0] = ( - b[0]) / (-a[0][0])
for i in range(1, n - 1):
v[i] = a[i][i+1] / ( -a[i][i] - a[i][i-1]*v[i-1] )
u[i] = ( a[i][i-1]*u[i-1] - b[i] ) / ( -a[i][i] - a[i][i-1]*v[i-1] )
# last string (t = 1)
v[n-1] = 0
u[n-1] = (a[n-1][n-2]*u[n-2] - b[n-1]) / (-a[n-1][n-1] - a[n-1][n-2]*v[n-2])
x[n-1] = u[n-1]
for i in range(n-1, 0, -1):
x[i-1] = v[i-1] * x[i] + u[i-1]
return x
Coefficent matrix values:
A = -t/h**2
B = 1 + 2*t/h**2
C = -t/h**2
Code that actually solves the matrix:
i = 1
X =[]
while i < 99:
X = solution(cool_array, f)
k = 0
while k < len(x_i):
#line-105
X[k] += 0.01*(func(x_i[k], x_i[i]) - (1 - x_i[i])*derivative(q0, x_i[i], 0.01) - (x_i[i])*derivative(q1, x_i[i], 0.01))
k+=1
a = 1
while a < 98:
w_h_t[i][a] = X[a]
a+=1
f = X
f[0] = w_h_t[i][0]
f[99] = w_h_t[i][99]
i+=1
print(w_h_t)
As far as I understand, the algorith solution(a, b) is written properly, so I guess the problem might be with the boundary functions or with the 105th line. The output I expect is at least an array of number, not NaNs.

Subtle mistakes of one implementation of decision stump

Note: this question arises because of implementation details instead of decision stump ERM algorithm itself.
I am trying to implement the decision stump algorithm by myself and compare it with a correct implementation (both implementations are shown below).
To my understanding, my implementation is essentially the same as the correct implementation, where I do the sorting of x, D and y together but correct implementation does not sort D and y and uses curr_idx instead.
But after running the following test codes, of 10000 different test cases, I got 6213 incorrect test cases.
np.random.seed(0)
cor_count = 0
err_count = 0
for iter in range(10000):
X = np.random.randint(1, 100, size=(10, 2))
D = np.random.rand(10)
D /= np.sum(D)
y = np.random.choice([-1, 1], size=(10,))
my_res = my_decision_stump(X, D, y)
res = decision_stump(X, D, y)
if my_res == res:
cor_count += 1
else:
err_count += 1
print("Number of correct cases {}".format(cor_count))
print("Number of error cases {}".format(err_count))
My implementation and correct implementation are shown below.
def my_decision_stump(X, D, y):
Fs = np.inf
optimal_j = None
optimal_b = None
optimal_theta = None
m, d = X.shape
for j in range(d):
record = np.hstack((X[:, j].reshape(-1, 1), D.reshape(-1, 1), y.reshape(-1, 1)))
record_sorted = record[record[:, 0].argsort()]
x = record_sorted[:, 0]; D = record_sorted[:, 1]; y = record_sorted[:, 2]
x = np.hstack((x, x[-1]+1))
F_pos = np.sum(D[y == 1])
F_neg = np.sum(D[y == -1])
if F_pos < Fs or F_neg < Fs:
optimal_theta = x[0] - 1; optimal_j = j
if F_pos < F_neg:
Fs = F_pos; optimal_b = 1
else:
Fs = F_neg; optimal_b = -1
for i in range(m):
F_pos -= y[i] * D[i]
F_neg += y[i] * D[i]
if (F_pos < Fs or F_neg < Fs) and x[i] != x[i+1]:
optimal_theta = 0.5 * (x[i] + x[i+1]); optimal_j = j
if F_pos < F_neg:
Fs = F_pos; optimal_b = 1
else:
Fs = F_neg; optimal_b = -1
return (optimal_j, optimal_b, optimal_theta)
def decision_stump(X, D, y):
Fs = np.inf
optimal_j = None
optimal_b = None
optimal_theta = None
m, d = X.shape
for j in range(d):
index = np.argsort(X[:, j])
x = np.zeros(m+1)
x[:-1] = X[index, j]
x[-1] = x[-2] + 1
F_pos = np.sum(D[y == 1])
F_neg = np.sum(D[y == -1])
if F_pos < Fs or F_neg < Fs:
optimal_theta = x[0] - 1; optimal_j = j
if F_pos < F_neg:
Fs = F_pos; optimal_b = 1
else:
Fs = F_neg; optimal_b = -1
for i in range(m):
curr_idx = index[i]
F_pos -= y[curr_idx] * D[curr_idx]
F_neg += y[curr_idx] * D[curr_idx]
if (F_pos < Fs or F_neg < Fs) and x[i] != x[i+1]:
optimal_theta = 0.5 * (x[i] + x[i+1]); optimal_j = j
if F_pos < F_neg:
Fs = F_pos; optimal_b = 1
else:
Fs = F_neg; optimal_b = -1
return (optimal_j, optimal_b, optimal_theta)
I am not sure what (maybe subtle) mistake I have made.
Could anyone help me, thank you in advance.
I made a stupid mistake...
In my implementation, due to the abuse of variable name D and y, it turns out that every time D and y changes (i.e. D = record_sorted[:, 1] but in next iteration this D is used, the same with y) since I do the sorting but they SHOULD NOT.
This example shows the great hazard of abusing variable name.

Visual for masses on a string using ivisual

Hi I am trying to create a visual for a physics problem using ivisual. The best visual I could find to describe the problem is below:
The problem I am having is I found an example in a text book of the same problem. The textbook visualizes the answers using visual module, however from the research I have been doing I believe that the visual module is now called ivisual. I can get the answer numerically when I hash out the function call and any other code that uses the ivisual module. In my specific example the masses are 10 and 20 and the length of each rope segment is 3, 4, and 4, and the distance between a and a is 8. I tried fixing the visual code, but I cannot get it work, it is probably something stupid.
My code is below with the error I am currently getting:
from ivisual import *
from numpy.linalg import solve
import numpy as np
import math as m
scene = display(x=0,y=0,width=500,height=500,
title='String and masses configuration')
tempe = curve(x=range(0,500),color=color.black)
n = 9
eps = 1*10**(-6)
deriv = np.zeros( (n, n), float)
f = np.zeros( (n), float)
x = np.array([0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 1., 1., 1.])
def plotconfig():
for obj in scene.objects:
obj.visible=0 # to erase the previous configuration
L1 = 3.0
L2 = 4.0
L3 = 4.0
xa = L1*x[3] # L1*cos(th1)
ya = L1*x[0] # L1 sin(th1)
xb = xa+L2*x[4] # L1*cos(th1)+L2*cos(th2)
yb = ya+L2*x[1] # L1*sin(th1)+L2*sen(th2)
xc = xb+L3*x[5] # L1*cos(th1)+L2*cos(th2)+L3*cos(th3)
yc = yb-L3*x[2] # L1*sin(th1)+L2*sen(th2)-L3*sin(th3)
mx = 100.0 # for linear coordinate transformation
bx = -500.0 # from 0=< x =<10
my = -100.0 # to -500 =<x_window=>500
by = 400.0 # same transformation for y
xap = mx*xa+bx # to keep aspect ratio
yap = my*ya+by
ball1 = sphere(pos=(xap,yap), color=color.cyan,radius=15)
xbp = mx*xb+bx
ybp = my*yb+by
ball2 = sphere(pos=(xbp,ybp), color=color.cyan,radius=25)
xcp = mx*xc+bx
ycp = my*yc+by
x0 = mx*0+bx
y0 = my*0+by
line1 = curve(pos=[(x0,y0),(xap,yap)], color=color.yellow,radius=4)
line2 = curve(pos=[(xap,yap),(xbp,ybp)], color=color.yellow,radius=4)
line3 = curve(pos=[(xbp,ybp),(xcp,ycp)], color=color.yellow,radius=4)
topline = curve(pos=[(x0,y0),(xcp,ycp)], color=color.red,radius=4)
def F(x, f): # Define F function
f[0] = 3*x[3] + 4*x[4] + 4*x[5] - 8.0
f[1] = 3*x[0] + 4*x[1] - 4*x[2]
f[2] = x[6]*x[0] - x[7]*x[1] - 10.0
f[3] = x[6]*x[3] - x[7]*x[4]
f[4] = x[7]*x[1] + x[8]*x[2] - 20.0
f[5] = x[7]*x[4] - x[8]*x[5]
f[6] = pow(x[0], 2) + pow(x[3], 2) - 1.0
f[7] = pow(x[1], 2) + pow(x[4], 2) - 1.0
f[8] = pow(x[2], 2) + pow(x[5], 2) - 1.0
def dFi_dXj(x, deriv, n): # Define derivative function
h = 1*10**(-4)
for j in range(0, n):
temp = x[j]
x[j] = x[j] + h/2.
F(x, f)
for i in range(0, n): deriv[i, j] = f[i]
x[j] = temp
for j in range(0, n):
temp = x[j]
x[j] = x[j] - h/2.
F(x, f)
for i in range(0, n): deriv[i, j] = (deriv[i, j] - f[i])/h
x[j] = temp
for it in range(1, 100):
rate(1) # 1 second between graphs
F(x, f)
dFi_dXj(x, deriv, n)
B = np.array([[-f[0]], [-f[1]], [-f[2]], [-f[3]], [-f[4]], [-f[5]],[-f[6]], [-f[7]], [-f[8]]])
sol = solve(deriv, B)
dx = np.take(sol, (0, ), 1) # take the first column of matrix sol
for i in range(0, n):
x[i] = x[i] + dx[i]
plotconfig()
errX = errF = errXi = 0.0
for i in range(0, n):
if ( x[i] != 0.): errXi = abs(dx[i]/x[i])
else: errXi = abs(dx[i])
if ( errXi > errX): errX = errXi
if ( abs(f[i]) > errF ): errF = abs(f[i])
if ( (errX <= eps) and (errF <= eps) ): break
print('Number of iterations = ', it)
print('Solution:')
for i in range(0, n):
print('x[', i, '] = ', x[i])
AttributeError Traceback (most recent call last)
<ipython-input-5-78050c1f23ab> in <module>()
76 for i in range(0, n):
77 x[i] = x[i] + dx[i]
---> 78 plotconfig()
79 errX = errF = errXi = 0.0
80
<ipython-input-5-78050c1f23ab> in plotconfig()
10
11 def plotconfig():
---> 12 for obj in scene.objects:
13 obj.visible=0 # to erase the previous configuration
14 L1 = 3.0
AttributeError: 'NoneType' object has no attribute 'objects'
I tried your example : It works like it is, but first install vpython this way
pip install vpython (after to upgrade : --upgrade)
pip install ivisual --upgrade
If you work system wide
sudo -H pip.....

Efficient way to create a dense matrix from diagonal vectors in Python?

I am trying to create this matrix in Python using numpy vectors:
where the values come from a function. I have implemented it with repeatedly using numpy.diag but for large dimensions, it becomes very slow. Here is the code:
def makeS(N):
vec = np.full(N, 2*v(x_range[1]))
vec[0]*=0.5
S = np.diag(vec)
vec = np.full(N-1, v(x_range[0]))
S+= np.diag(vec, 1)
for m in xrange(1, N):
vec = np.full(N-m, 2*v(x_range[m+1]))
vec[0]*= 0.5
S += np.diag(vec, -m)
return S
where v() is the said function and x_range is a vector of x-values. Is there a way to make this more efficient?
Edit:
Here is a full example:
import numpy as np
import math
N = 5
x_range = np.linspace(0, 1, N+1)
def v(x):
return math.exp(x)
def makeS(N):
vec = np.full(N, 2*v(x_range[1]))
vec[0]*=0.5
S = np.diag(vec)
vec = np.full(N-1, v(x_range[0]))
S+= np.diag(vec, 1)
for m in xrange(1, N):
vec = np.full(N-m, 2*v(x_range[m+1]))
vec[0]*= 0.5
S += np.diag(vec, -m)
return S
print makeS(N)
which outputs
[[ 1.22140276 1. 0. 0. 0. ]
[ 1.4918247 2.44280552 1. 0. 0. ]
[ 1.8221188 2.9836494 2.44280552 1. 0. ]
[ 2.22554093 3.6442376 2.9836494 2.44280552 1. ]
[ 2.71828183 4.45108186 3.6442376 2.9836494 2.44280552]]
This is the fastest approach I could find:
def makeS(N):
values = np.array([v(x) for x in x_range])
values_doubled = 2 * values
result = np.eye(N, k=1) * values[0]
result[:, 0] = values[1:]
for i in xrange(N - 1):
result[i + 1, 1:i + 2] = values_doubled[1:i + 2][::-1]
return result
With N=2000 the original takes 26.97 seconds on my machine while the new version takes 0.02339 seconds.
Here is the complete script for evaluating timings with some additional approaches.
import numpy as np
import math
import timeit
def v(x):
return math.exp(x)
def makeS1(N, x_range):
vec = np.full(N, 2 * v(x_range[1]))
vec[0] *= 0.5
S = np.diag(vec)
vec = np.full(N - 1, v(x_range[0]))
S += np.diag(vec, 1)
for m in xrange(1, N):
vec = np.full(N - m, 2 * v(x_range[m + 1]))
vec[0] *= 0.5
S += np.diag(vec, -m)
return S
def makeS2(N, x_range):
values = np.array([v(x) for x in x_range])
values_doubled = 2 * values
def value_at_position(ai, aj):
result = np.zeros((N, N))
for i, j in zip(ai.flatten(), aj.flatten()):
if j > i + 1:
continue
elif j == i + 1:
result[i, j] = values[0]
elif j == 0:
result[i, j] = values[i + 1]
else:
result[i, j] = values_doubled[i - j + 1]
return result
return np.fromfunction(value_at_position, (N, N))
def makeS3(N, x_range):
values = np.array([v(x) for x in x_range])
values_doubled = 2 * values
result = np.zeros((N, N))
for i in xrange(N):
for j in xrange(min(i + 2, N)):
if j == i + 1:
result[i, j] = values[0]
elif j == 0:
result[i, j] = values[i + 1]
else:
result[i, j] = values_doubled[i - j + 1]
return result
def makeS4(N, x_range):
values = np.array([v(x) for x in x_range])
values_doubled = 2 * values
result = np.eye(N, k=1) * values[0]
result[:, 0] = values[1:]
for i in xrange(N - 1):
result[i + 1, 1:i + 2] = values_doubled[1:i + 2][::-1]
return result
def main():
N = 2000
x_range = np.random.randn(N + 1)
start = timeit.default_timer()
s1 = makeS1(N, x_range)
print 'makeS1', timeit.default_timer() - start
start = timeit.default_timer()
s2 = makeS2(N, x_range)
print 'makeS2', timeit.default_timer() - start
start = timeit.default_timer()
s3 = makeS3(N, x_range)
print 'makeS3', timeit.default_timer() - start
start = timeit.default_timer()
s4 = makeS4(N, x_range)
print 'makeS4', timeit.default_timer() - start
if N < 10:
print s1
print s2
print s2
print s4
assert np.allclose(s1, s2)
assert np.allclose(s2, s3)
assert np.allclose(s3, s4)
main()
On my machine, this produces the output:
makeS1 26.9707232448
makeS2 11.7728229076
makeS3 0.643742975052
makeS4 0.0233912765665

Scikit-learn: How to run KMeans on a one-dimensional array?

I have an array of 13.876(13,876) values between 0 and 1. I would like to apply sklearn.cluster.KMeans to only this vector to find the different clusters in which the values are grouped. However, it seems KMeans works with a multidimensional array and not with one-dimensional ones. I guess there is a trick to make it work but I don't know how. I saw that KMeans.fit() accepts "X : array-like or sparse matrix, shape=(n_samples, n_features)", but it wants the n_samples to be bigger than one
I tried putting my array on a np.zeros() matrix and run KMeans, but then is putting all the non-null values on class 1 and the rest on class 0.
Can anyone help in running this algorithm on a one-dimensional array?
You have many samples of 1 feature, so you can reshape the array to (13,876, 1) using numpy's reshape:
from sklearn.cluster import KMeans
import numpy as np
x = np.random.random(13876)
km = KMeans()
km.fit(x.reshape(-1,1)) # -1 will be calculated to be 13876 here
Read about Jenks Natural Breaks. Function in Python found the link from the article:
def get_jenks_breaks(data_list, number_class):
data_list.sort()
mat1 = []
for i in range(len(data_list) + 1):
temp = []
for j in range(number_class + 1):
temp.append(0)
mat1.append(temp)
mat2 = []
for i in range(len(data_list) + 1):
temp = []
for j in range(number_class + 1):
temp.append(0)
mat2.append(temp)
for i in range(1, number_class + 1):
mat1[1][i] = 1
mat2[1][i] = 0
for j in range(2, len(data_list) + 1):
mat2[j][i] = float('inf')
v = 0.0
for l in range(2, len(data_list) + 1):
s1 = 0.0
s2 = 0.0
w = 0.0
for m in range(1, l + 1):
i3 = l - m + 1
val = float(data_list[i3 - 1])
s2 += val * val
s1 += val
w += 1
v = s2 - (s1 * s1) / w
i4 = i3 - 1
if i4 != 0:
for j in range(2, number_class + 1):
if mat2[l][j] >= (v + mat2[i4][j - 1]):
mat1[l][j] = i3
mat2[l][j] = v + mat2[i4][j - 1]
mat1[l][1] = 1
mat2[l][1] = v
k = len(data_list)
kclass = []
for i in range(number_class + 1):
kclass.append(min(data_list))
kclass[number_class] = float(data_list[len(data_list) - 1])
count_num = number_class
while count_num >= 2: # print "rank = " + str(mat1[k][count_num])
idx = int((mat1[k][count_num]) - 2)
# print "val = " + str(data_list[idx])
kclass[count_num - 1] = data_list[idx]
k = int((mat1[k][count_num] - 1))
count_num -= 1
return kclass
Use and visualization:
import numpy as np
import matplotlib.pyplot as plt
def get_jenks_breaks(...):...
x = np.random.random(30)
breaks = get_jenks_breaks(x, 5)
for line in breaks:
plt.plot([line for _ in range(len(x))], 'k--')
plt.plot(x)
plt.grid(True)
plt.show()
Result:

Categories