Visual for masses on a string using ivisual - python

Hi I am trying to create a visual for a physics problem using ivisual. The best visual I could find to describe the problem is below:
The problem I am having is I found an example in a text book of the same problem. The textbook visualizes the answers using visual module, however from the research I have been doing I believe that the visual module is now called ivisual. I can get the answer numerically when I hash out the function call and any other code that uses the ivisual module. In my specific example the masses are 10 and 20 and the length of each rope segment is 3, 4, and 4, and the distance between a and a is 8. I tried fixing the visual code, but I cannot get it work, it is probably something stupid.
My code is below with the error I am currently getting:
from ivisual import *
from numpy.linalg import solve
import numpy as np
import math as m
scene = display(x=0,y=0,width=500,height=500,
title='String and masses configuration')
tempe = curve(x=range(0,500),color=color.black)
n = 9
eps = 1*10**(-6)
deriv = np.zeros( (n, n), float)
f = np.zeros( (n), float)
x = np.array([0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 1., 1., 1.])
def plotconfig():
for obj in scene.objects:
obj.visible=0 # to erase the previous configuration
L1 = 3.0
L2 = 4.0
L3 = 4.0
xa = L1*x[3] # L1*cos(th1)
ya = L1*x[0] # L1 sin(th1)
xb = xa+L2*x[4] # L1*cos(th1)+L2*cos(th2)
yb = ya+L2*x[1] # L1*sin(th1)+L2*sen(th2)
xc = xb+L3*x[5] # L1*cos(th1)+L2*cos(th2)+L3*cos(th3)
yc = yb-L3*x[2] # L1*sin(th1)+L2*sen(th2)-L3*sin(th3)
mx = 100.0 # for linear coordinate transformation
bx = -500.0 # from 0=< x =<10
my = -100.0 # to -500 =<x_window=>500
by = 400.0 # same transformation for y
xap = mx*xa+bx # to keep aspect ratio
yap = my*ya+by
ball1 = sphere(pos=(xap,yap), color=color.cyan,radius=15)
xbp = mx*xb+bx
ybp = my*yb+by
ball2 = sphere(pos=(xbp,ybp), color=color.cyan,radius=25)
xcp = mx*xc+bx
ycp = my*yc+by
x0 = mx*0+bx
y0 = my*0+by
line1 = curve(pos=[(x0,y0),(xap,yap)], color=color.yellow,radius=4)
line2 = curve(pos=[(xap,yap),(xbp,ybp)], color=color.yellow,radius=4)
line3 = curve(pos=[(xbp,ybp),(xcp,ycp)], color=color.yellow,radius=4)
topline = curve(pos=[(x0,y0),(xcp,ycp)], color=color.red,radius=4)
def F(x, f): # Define F function
f[0] = 3*x[3] + 4*x[4] + 4*x[5] - 8.0
f[1] = 3*x[0] + 4*x[1] - 4*x[2]
f[2] = x[6]*x[0] - x[7]*x[1] - 10.0
f[3] = x[6]*x[3] - x[7]*x[4]
f[4] = x[7]*x[1] + x[8]*x[2] - 20.0
f[5] = x[7]*x[4] - x[8]*x[5]
f[6] = pow(x[0], 2) + pow(x[3], 2) - 1.0
f[7] = pow(x[1], 2) + pow(x[4], 2) - 1.0
f[8] = pow(x[2], 2) + pow(x[5], 2) - 1.0
def dFi_dXj(x, deriv, n): # Define derivative function
h = 1*10**(-4)
for j in range(0, n):
temp = x[j]
x[j] = x[j] + h/2.
F(x, f)
for i in range(0, n): deriv[i, j] = f[i]
x[j] = temp
for j in range(0, n):
temp = x[j]
x[j] = x[j] - h/2.
F(x, f)
for i in range(0, n): deriv[i, j] = (deriv[i, j] - f[i])/h
x[j] = temp
for it in range(1, 100):
rate(1) # 1 second between graphs
F(x, f)
dFi_dXj(x, deriv, n)
B = np.array([[-f[0]], [-f[1]], [-f[2]], [-f[3]], [-f[4]], [-f[5]],[-f[6]], [-f[7]], [-f[8]]])
sol = solve(deriv, B)
dx = np.take(sol, (0, ), 1) # take the first column of matrix sol
for i in range(0, n):
x[i] = x[i] + dx[i]
plotconfig()
errX = errF = errXi = 0.0
for i in range(0, n):
if ( x[i] != 0.): errXi = abs(dx[i]/x[i])
else: errXi = abs(dx[i])
if ( errXi > errX): errX = errXi
if ( abs(f[i]) > errF ): errF = abs(f[i])
if ( (errX <= eps) and (errF <= eps) ): break
print('Number of iterations = ', it)
print('Solution:')
for i in range(0, n):
print('x[', i, '] = ', x[i])
AttributeError Traceback (most recent call last)
<ipython-input-5-78050c1f23ab> in <module>()
76 for i in range(0, n):
77 x[i] = x[i] + dx[i]
---> 78 plotconfig()
79 errX = errF = errXi = 0.0
80
<ipython-input-5-78050c1f23ab> in plotconfig()
10
11 def plotconfig():
---> 12 for obj in scene.objects:
13 obj.visible=0 # to erase the previous configuration
14 L1 = 3.0
AttributeError: 'NoneType' object has no attribute 'objects'

I tried your example : It works like it is, but first install vpython this way
pip install vpython (after to upgrade : --upgrade)
pip install ivisual --upgrade
If you work system wide
sudo -H pip.....

Related

Heat equation divide by zero issue

I'm writing a code that solves a heat equation implementing an implicit method. The problem is that the values between first and last layer of the matrix are NaNs. What could be the problem?
From my problem of view, the main issue might be with the 105th line, which represents the convrsion of original function to the one that includes the boundary function.
Boundary functions code:
def func(x, t):
return x*(1 - x)*np.exp(-2*t)
# boundary function for x = 0 and x = 1
def q0(t):
return t*np.exp(-t/0.1)*np.cos(t) # граничное условие при x = 0
def q1(t):
return t*np.exp(-t/0.5)*np.cos(t) # граничное уcловие при x = 1
def derivative(f, x0, step):
return (f(x0+step) - f(x0))/step
# boundary function that for t = 0
def u_x0(x):
return (-x + 1)*x
Function that solves the three-diagonal matrix equation
def solution(a, b):
n = len(a)
x = [0 for k in range(0, n)]
# forward
v = [0 for k in range(0, n)]
u = [0 for k in range(0, n)]
# first string (t = 0)
v[0] = a[0][1] / (-a[0][0])
u[0] = ( - b[0]) / (-a[0][0])
for i in range(1, n - 1):
v[i] = a[i][i+1] / ( -a[i][i] - a[i][i-1]*v[i-1] )
u[i] = ( a[i][i-1]*u[i-1] - b[i] ) / ( -a[i][i] - a[i][i-1]*v[i-1] )
# last string (t = 1)
v[n-1] = 0
u[n-1] = (a[n-1][n-2]*u[n-2] - b[n-1]) / (-a[n-1][n-1] - a[n-1][n-2]*v[n-2])
x[n-1] = u[n-1]
for i in range(n-1, 0, -1):
x[i-1] = v[i-1] * x[i] + u[i-1]
return x
Coefficent matrix values:
A = -t/h**2
B = 1 + 2*t/h**2
C = -t/h**2
Code that actually solves the matrix:
i = 1
X =[]
while i < 99:
X = solution(cool_array, f)
k = 0
while k < len(x_i):
#line-105
X[k] += 0.01*(func(x_i[k], x_i[i]) - (1 - x_i[i])*derivative(q0, x_i[i], 0.01) - (x_i[i])*derivative(q1, x_i[i], 0.01))
k+=1
a = 1
while a < 98:
w_h_t[i][a] = X[a]
a+=1
f = X
f[0] = w_h_t[i][0]
f[99] = w_h_t[i][99]
i+=1
print(w_h_t)
As far as I understand, the algorith solution(a, b) is written properly, so I guess the problem might be with the boundary functions or with the 105th line. The output I expect is at least an array of number, not NaNs.

speedup dtaidistance key function with numba

The DTAIDistance package can be used to find k best matches of the input query. but it cannot be used for multi-dimensional input query. moreover, I want to find the k best matches of many input queries in one run.
I modified the DTAIDistance function so that it can be used to search subsequences of multi-dimensions of multi-queries. I use njit with parallel to speed up the process,i.e.the p_calc function which applies numba-parallel to each of the input query. but I find that the parallel calculation seems not to speed up the calculation compared to just simply looping over the input queries one by one, i.e. the calc function.
import time
from tqdm import tqdm
from numba import njit, prange
import numpy as np
inf = np.inf
argmin=np.argmin
#njit(fastmath=True, nogil=True, error_model="numpy", cache=True, parallel=False)
def p_calc(d, dtw, s1, s2, r, c, psi_1b, psi_1e, psi_2b, psi_2e, window, max_step, max_dist, penalty, psi_neg):
n_series = s1.shape[1]
ndim = s1.shape[2]
# s1 = np.ascontiguousarray(s1)#.shape
# s2 = np.ascontiguousarray(s2)#.shape
# dtw = np.full((n_series,r + 1, c + 1), np.inf,dtype=s1.dtype) # cmath.inf
# d = np.full((n_series), np.inf,dtype=s1.dtype) # cmath.inf
for i in range(psi_2b + 1):
dtw[:, 0, i] = 0
for i in range(psi_1b + 1):
dtw[:, i, 0] = 0
for nn in prange(n_series):
print('im alive...')
i0 = 1
i1 = 0
sc = 0
ec = 0
smaller_found = False
ec_next = 0
for i in range(r):
i0 = i
i1 = i + 1
j_start = max(0, i - max(0, r - c) - window + 1)
j_end = min(c, i + max(0, c - r) + window)
if sc > j_start:
j_start = sc
smaller_found = False
ec_next = i
for j in range(j_start, j_end):
val = 0
tmp = ((s1[i, nn] - s2[j]) ** 2)
# tmp = (np.abs(s1[i, nn] - s2[j, 0]))
for nd in range(ndim):
val += tmp[nd]
d[nn] = val
# d = np.sum(np.abs(s1[i] - s2[j]) ) # multi-d
if max_step is not None and d[nn] > max_step:
continue
# print(i, j + 1 - skip, j - skipp, j + 1 - skipp, j - skip)
dtw[nn, i1, j + 1] = d[nn] + min(dtw[nn, i0, j],
dtw[nn, i0, j + 1] + penalty,
dtw[nn, i1, j] + penalty)
# dtw[i + 1, j + 1 - skip] = d + min(dtw[i + 1, j + 1 - skip], dtw[i + 1, j - skip])
if dtw[nn, i1, j + 1] > max_dist:
if not smaller_found:
sc = j + 1
if j >= ec:
break
else:
smaller_found = True
ec_next = j + 1
ec = ec_next
# Decide which d to return
dtw[nn] = np.sqrt(dtw[nn])
if psi_1e == 0 and psi_2e == 0:
d[nn] = dtw[nn, i1, min(c, c + window - 1)]
else:
ir = i1
ic = min(c, c + window - 1)
if psi_1e != 0:
vr = dtw[nn, ir:max(0, ir - psi_1e - 1):-1, ic]
mir = np.argmin(vr)
vr_mir = vr[mir]
else:
mir = ir
vr_mir = inf
if psi_2e != 0:
vc = dtw[nn, ir, ic:max(0, ic - psi_2e - 1):-1]
mic = np.argmin(vc)
vc_mic = vc[mic]
else:
mic = ic
vc_mic = inf
if vr_mir < vc_mic:
if psi_neg:
dtw[nn, ir:ir - mir:-1, ic] = -1
d[nn] = vr_mir
else:
if psi_neg:
dtw[nn, ir, ic:ic - mic:-1] = -1
d[nn] = vc_mic
if max_dist and d[nn] ** 2 > max_dist:
# if max_dist and d[nn] > max_dist:
d[nn] = inf
return d, dtw
#njit(fastmath=True, nogil=True) # Set "nopython" mode for best performance, equivalent to #njit
def calc(s1, s2, r, c, psi_1b, psi_1e, psi_2b, psi_2e, window, max_step, max_dist, penalty, psi_neg):
dtw = np.full((r + 1, c + 1), np.inf) # cmath.inf
for i in range(psi_2b + 1):
dtw[0, i] = 0
for i in range(psi_1b + 1):
dtw[i, 0] = 0
i0 = 1
i1 = 0
sc = 0
ec = 0
smaller_found = False
ec_next = 0
for i in range(r):
i0 = i
i1 = i + 1
j_start = max(0, i - max(0, r - c) - window + 1)
j_end = min(c, i + max(0, c - r) + window)
if sc > j_start:
j_start = sc
smaller_found = False
ec_next = i
for j in range(j_start, j_end):
# d = (s1[i] - s2[j]) ** 2# 1-d
d = np.sum((s1[i] - s2[j]) ** 2) # multi-d
# d = np.sum(np.abs(s1[i] - s2[j]) ) # multi-d
if max_step is not None and d > max_step:
continue
dtw[i1, j + 1] = d + min(dtw[i0, j],
dtw[i0, j + 1] + penalty,
dtw[i1, j] + penalty)
if dtw[i1, j + 1] > max_dist:
if not smaller_found:
sc = j + 1
if j >= ec:
break
else:
smaller_found = True
ec_next = j + 1
ec = ec_next
# Decide which d to return
dtw = np.sqrt(dtw)
if psi_1e == 0 and psi_2e == 0:
d = dtw[i1, min(c, c + window - 1)]
else:
ir = i1
ic = min(c, c + window - 1)
if psi_1e != 0:
vr = dtw[ir:max(0, ir - psi_1e - 1):-1, ic]
mir = argmin(vr)
vr_mir = vr[mir]
else:
mir = ir
vr_mir = inf
if psi_2e != 0:
vc = dtw[ir, ic:max(0, ic - psi_2e - 1):-1]
mic = argmin(vc)
vc_mic = vc[mic]
else:
mic = ic
vc_mic = inf
if vr_mir < vc_mic:
if psi_neg:
dtw[ir:ir - mir:-1, ic] = -1
d = vr_mir
else:
if psi_neg:
dtw[ir, ic:ic - mic:-1] = -1
d = vc_mic
if max_dist and d * d > max_dist:
d = inf
return d, dtw
mydtype = np.float32
series1 = np.random.random((16, 30, 2)).astype(mydtype)
series2 = np.random.random((100000, 2)).astype(mydtype)
n_series = series1.shape[1]
r = series1.shape[0]
c = series2.shape[0]
dtw = np.full((n_series, r + 1, c + 1), np.inf, dtype=mydtype) # cmath.inf
d = np.full((n_series), np.inf, dtype=mydtype) # cmath.inf
time1 = time.time()
d, dtw1 = p_calc(d, dtw, series1, series2, series1.shape[0], series2.shape[0], 0, 0,
series2.shape[0], series2.shape[0], series2.shape[0], np.inf, np.inf, 0.01, False)
print(time.time() - time1)
time1 = time.time()
for ii in tqdm(range(series1.shape[1])):
d, dtw1 = calc( series1[:, ii, :], series2, series1.shape[0], series2.shape[0], 0, 0,
series2.shape[0], series2.shape[0], series2.shape[0], np.inf, np.inf, 0.01, False)
print(time.time() - time1)# this one is faster
How can I speed up the calc function or p_calc function so that I can calculate the dynamic time warping paths of multi-dimensional multi-queries?
thanks for the answer,and then i modified the code for simplification.
i delete the np.sum part and use loop,i can get another speedup. any suggestions for further speedups ?
import time
from numba import njit, prange
import numpy as np
inf = np.inf
argmin=np.argmin
#njit(fastmath=True, nogil=True, error_model="numpy", cache=False, parallel=True)
def p_calc(d, dtw, s1, s2, r, c, psi_1b, psi_1e, psi_2b, psi_2e, window, max_step, max_dist, penalty, psi_neg):
n_series = s1.shape[1]
ndim = s1.shape[2]
for nn in prange(n_series):
for i in range(r):
j_start = 0
j_end = c
for j in range(j_start, j_end):
val = 0
# tmp = ((s1[i, nn] - s2[j]) ** 2)
# tmp = (np.abs(s1[i, nn] - s2[j, 0]))
for nd in range(ndim):
tmp = ((s1[i, nn,nd] - s2[j,nd]) ** 2)
val += tmp
d[nn] = val
return d, dtw
#njit(fastmath=True, nogil=True) # Set "nopython" mode for best performance, equivalent to #njit
def calc(dtw,s1, s2, r, c, psi_1b, psi_1e, psi_2b, psi_2e, window, max_step, max_dist, penalty, psi_neg):
ndim = s1.shape[-1]
for i in range(r):
j_start = 0
j_end = c
for j in range(j_start, j_end):
d = 0
for kk in range(ndim):
d += (s1[i, kk] - s2[j, kk]) ** 2
return d, dtw
mydtype = np.float32
series1 = np.random.random((16, 300, 2)).astype(mydtype)
series2 = np.random.random((1000000, 2)).astype(mydtype)
n_series = series1.shape[1]
r = series1.shape[0]
c = series2.shape[0]
dtw = np.full((n_series, r + 1, c + 1), np.inf, dtype=mydtype) # cmath.inf
d = np.full((n_series), np.inf, dtype=mydtype) # cmath.inf
time1 = time.time()
# assert 1==2
# dtw[:,series2.shape[0]]
d1, dtw1 = p_calc(d, dtw, series1, series2, series1.shape[0], series2.shape[0], 0, 0, series2.shape[0], series2.shape[0], series2.shape[0], np.inf, np.inf, 0.01, False)
print(time.time() - time1)
# assert 1==2
time1 = time.time()
dtw = np.full(( r + 1, c + 1), np.inf, dtype=mydtype) # cmath.inf
for ii in (range(series1.shape[1])):
d2, dtw2 = calc( dtw,series1[:, ii, :], series2, series1.shape[0], series2.shape[0], 0, 0,
series2.shape[0], series2.shape[0], series2.shape[0], np.inf, np.inf, 0.01, False)
print(time.time() - time1)# this one is faster
np.allclose(dtw1[-1],dtw2)
np.allclose(d1[-1],d2)
EDIT:
i found the following code's performance is very different if use pass or break. i don't understand why?
#njit(fastmath=True, nogil=True)
def kbest_matches(matching,k=4000):
ki = 0
while ki < k:
best_idx =np.argmin(matching)# np.argmin(np.arange(10000000))#
if best_idx == 0 :
# pass
break
ki += 1
return 0
ss= np.random.random((1575822,))
time1 = time.time()
pp = kbest_matches(ss)
print(time.time() - time1)
I assume the code of both implementations are correct and as been carefully checked (otherwise the benchmark would be pointless).
The issue likely comes from the compilation time of the function. Indeed, the first call is significantly slower than next calls, even with cache=True. This is especially important for the parallel implementation as compiling parallel Numba code is often slower (since it is more complex). The best solution to avoid this is to compile Numba functions ahead of time by providing types to Numba.
Besides this, benchmarking a computation only once is usually considered as a bad practice. Good benchmarks perform multiple iterations and remove the first ones (or consider them separately). Indeed, several other problems can appear when a code is executed for the first time: CPU caches (and the TLB) are cold, the CPU frequency can change during the execution and is likely smaller when the program is just started, page faults may need to be needed, etc.
In practice, I cannot reproduce the issue. Actually, p_calc is 3.3 times faster on my 6-core machine. When the benchmark is done in a loop of 5 iterations, the measured time of the parallel implementation is much smaller: about 13 times (which is actually suspicious for a parallel implementation using 6 threads on a 6-core machine).

Python: got an output image with unexpected grid lines

I am writing a function that scales the input image into times of
its input size. The function Resize(Mat I, float s) first fills in the and Mat’s
that contained the query point coordinates. Then I calculate the query value by
using bilinear interpolation.
The output image seems to be alright except it has an unexpected # shape grid on it. Can you provide any hint for the resolution?
Output image:
Code:
import numpy as np
import cv2 as cv
import math
import matplotlib.pyplot as plt
#Mat I, float s
def Resize(I, s):
orig_x = I.shape[0];
orig_y = I.shape[1];
tar_x = int (orig_x * s) #int tar_x and tar_y
tar_y = int (orig_y * s);
#print(tar_x)
# Query points
X = np.empty((tar_y, tar_x), np.float32)
Y = np.empty((tar_y, tar_x), np.float32)
# calc interval between output points
interval = (orig_x-1) / (tar_x-1)
# Setting the query points
for i in range(0, tar_y):
for j in range(0, tar_x):
#set X[i, j] and Y[i,j]
X[i][j] = j * interval
Y[i][j] = i * interval
# Output image
output = np.empty((tar_y, tar_x), np.uint8)
# Performing the interpolation
for i in range(0, tar_y):
for j in range(0, tar_x):
#set output[i,j] using X[i, j] and Y[i,j]
x = X[i][j]
y = Y[i][j]
x1 = math.floor(x)
x2 = math.ceil(x)
y1 = math.floor(y)
y2 = math.ceil(y)
vq1= (x-x1)*I[y1,x2] + (x2-x)*I[y1,x1]
vq2= (x-x1)*I[y2,x2] + (x2-x)*I[y2,x1]
output[i,j] = (y-y1)*vq2 + (y2-y)*vq1
return output
s= 640 / 256
I = cv.imread("aerial_256.png", cv.IMREAD_GRAYSCALE)
output = Resize(I,s)
output = cv.cvtColor(output, cv.COLOR_BGR2RGB)
plt.imshow(output)
plt.savefig("aerial_640.png",bbox_inches='tight',transparent=True, pad_inches=0)
plt.show()
You are getting a black pixel where x is an integer and where y is an integer.
Take a look at the following code:
x1 = math.floor(x)
x2 = math.ceil(x)
vq1= (x-x1)*I[y1,x2] + (x2-x)*I[y1,x1]
vq2= (x-x1)*I[y2,x2] + (x2-x)*I[y2,x1]
Assume: x = 85.0
x1 = floor(x) = 85
x2 = ceil(x) = 85
(x-x1) = (85-85) = 0
(x2-x) = (85-85) = 0
vq1 = (x-x1)*I[y1,x2] + (x2-x)*I[y1,x1] = 0*I[y1,x2] + 0*I[y1,x1] = 0
vq2 = (x-x1)*I[y2,x2] + (x2-x)*I[y2,x1] = 0*I[y2,x2] + 0*I[y2,x1] = 0
output[i,j] = (y-y1)*vq2 + (y2-y)*vq1 = (y-y1)*0 + (y2-y)*0 = 0
Result:
In the entire column where x = 85.0 the value of output[i,j] is zero (we are getting a black column).
Same result applied to y = 85.0 - we are getting a black row.
When does x value is an integer?
Take a look at the following code:
# calc interval between output points
interval = (orig_x-1) / (tar_x-1)
# Setting the query points
for i in range(0, tar_y):
for j in range(0, tar_x):
#set X[i, j] and Y[i,j]
X[i][j] = j * interval
interval = (orig_x-1) / (tar_x-1) = 255/639 = (3*5*17/(3*3*71) = 85/213
j * interval = j * 85/213
Each time j is a multiple of 213, j * interval is an integer (we are getting a black column).
It happens when j=0, j=213, j=426, j=639, so there are two black columns (beside margins).
There are also two visible black rows (beside margins).
Suggested solution:
Replace x2 = math.ceil(x) with x2 = min(x1 + 1, orig_x-1).
Replace y2 = math.ceil(y) with y2 = min(y1 + 1, orig_y-1).
Corrected loop:
for i in range(0, tar_y):
for j in range(0, tar_x):
#set output[i,j] using X[i, j] and Y[i,j]
x = X[i][j]
y = Y[i][j]
x1 = math.floor(x)
x2 = min(x1 + 1, orig_x-1)
y1 = math.floor(y)
y2 = min(y1 + 1, orig_y-1)
vq1= (x-x1)*I[y1,x2] + (x2-x)*I[y1,x1]
vq2= (x-x1)*I[y2,x2] + (x2-x)*I[y2,x1]
output[i,j] = (y-y1)*vq2 + (y2-y)*vq1
Result:

Complex vectors and shape for arrays

I am slightly new to python and I am trying to convert some code.This is an approximation method. Which isn't important. In my oddev function I get returned
c2[1:modes+1] = v* 1j
ValueError: could not broadcast input array from shape (25) into shape (25,1)
When I do this Matlab I believe it automatically casts it, and will store the complex array. The function is a getting the coefficient from a partial sine transform to do this. At first I tried storing the random matrix which just an array using np.matlib method and this had the same shape but I believe I will lose the real values of the filter when I cast it. How do I store this?
import math
import numpy as np
def quickcontmin(datain):
n = np.shape(datain)[0]
m = math.floor(n / 2)
modes = math.floor(m / 2)
addl = 20
nn = 20 * n
chi = 10 ** -13
def evenhp(xv):
"Even high pass"
n1 = np.shape(xv)[0]
vx = np.array(xv[:-1])
vx = vx[::-1]
c1 = np.append(xv,vx)
c1 = np.fft.fft(c1)
c1[0:modes-1] = 0.0
c1[-1 - modes + 2:-1] = 0.0
evenl = np.real(np.fft.ifft(c1))
even = evenl[0:n1-1]
return even
def evenhpt(xv):
" Transpose of EvenHP"
n1 = np.shape(xv)[0]
xy = np.zeros((n1- 2, 1))
c1 = np.append(xv,xy)
c1 = np.fft.fft(c1)
c1[0:modes-1] = 0.0
c1[-1 - modes + 1:-1] = 0.0
evenl = np.real(np.fft.ifft(c1))
even = evenl[0:n1-1]
even[1:-2] = even[1:-2] + evenl[-1:-1:n1+1]
return even``
def evenlp(xv):
" Low pass cosine filter"
n1 = np.shape(xv)[0]
vx = np.array(xv[:-1])
vx = vx[::-1]
c1 = np.append(xv,vx)
c1 = np.fft.fft(c1)
c1[modes + 1:-1 - modes + 1] = 0.0
evenl = np.real(np.fft.ifft(c1))
even = evenl[0:n1-1]
return even
def oddev(xv):
"Evaluate the sine modes on the grid"
c2 = np.zeros((2 *n - 2, 1))*1j
v = np.array(xv[:])
v1 = v[:-1]
v1 = v[::-1]
c2[1:modes+1] = v* 1j
c2[-1 - modes + 1:-1] = -v1* 1j
evall = np.fft.ifft(c2) * math.sqrt(2 * n - 2)
eva = evall[0:n-1]
return eva
def oddevt(xv):
" Transpose the sine modes on the function OddEv"
c1 = np.array(xv[1:-2])
c1 = np.insert(c1,0.0,0)
c1 = np.append(c1,0.0)
c1 = np.append(c1,xv[-2:-1:2])
c1a = np.divide(np.fft.fft(c1),math.sqrt(2 * n - 2))
fcoef = np.imag(c1a[1:modes])
return fcoef
def eextnd(xv):
"Obtain cosine coefficients and evalue on the refined grid"
vx = np.array(xv[:-1])
vx = vx[::-1]
c1 = np.append(xv,vx)
c1 = np.fft.fft(c1)
cL = np.zeros((2*nn-2,1))
cL[0:modes-1] = c1[0:modes-1]
cL[-1 - modes + 1:-1] = c1[-1 - modes + 1:-1]
evenexL = np.multiply(np.fft.ifft(cL) , (nn - 1) / (n - 1))
evenex = evenexL[0:nn-1]
return evenex
def oextnd(xv):
"Evaluate sine coefficients on the refined grid"
c2 = np.zeros((2 * nn - 2, 1))
c2[0] = 0.0
c2[1:modes + 1] = np.multiply(xv[0:-1],1j)
c2[-1 - modes + 1:-1] = np.multiply(-xv[-1:-1:1],1j)
evall = np.real(np.multiply(np.fft.ifft(c2), math.sqrt(2 * n - 2) * (2 *nn - 2) / (2 * n - 2)))
oox = evall[0:nn-1]
return oox
dc = evenlp(datain)
#L in paper, number of vectors used to sample the columnspace
lll = round(4 * math.log(m )/ math.log(2)) + addl
lll = int(lll)
#The following should be straightforward from the psuedo-code
w=2 * np.random.rand(modes , lll) - 1
p=np.matlib.zeros(shape=(n,lll))
for j in range(lll):
p[:,j] = evenhp(oddev(w[:,j]))
q,r = np.linalg.qr(p , mode='reduced')
z = np.zeros(shape=(modes,lll))
for j in range(lll):
z[:,j]= oddevt(evenhpt(q[:,j]))
un,s,v = np.linalg.svd(z,full_matrices='False')
ds=np.diag(s)
aa=np.extract(np.diag(s)>(chi))
aa[-1] = aa
aa = int(aa)
s = 0 * s
for j in range(aa):
s[j,j] = 1.0 / ds(j)
#find the sine coefficents
b=un*s* v.T* q.T* evenhp(datain)
#Constructing the continuation
exs=oddev(b)
pexs = evenlp(exs)
dataCont=exs-pexs+dc
dataCont[n+1:2*n-2]=-exs[-2:-1:1]-pexs[-2:-1:1]+dc[-2:-1:1]
#Evaluate the continuation on the refined grid
dataRefined=eextnd(dc-exs)+oextnd(b)
return dataRefined, dataCont
n1 = 100
t = np.linspace(0,2*math.pi,n1)
y = np.sin(t)
data = quickcontmin(y)
dc1 = data[1]
dc1 = dc1[0:n1-1]`
Replacing c2[1:modes+1] = v* 1j by c2[1:modes+1, 0] = v* 1j should fix that specific error.
More consistent would be to replace:
v = np.array(xv[:])
v1 = v[:-1]
v1 = v[::-1]
by
v = xv
v1 = v[:-1]
v is already a column vector so you don't need to transform it into a 1d vector when you later need a column vector.

Scikit-learn: How to run KMeans on a one-dimensional array?

I have an array of 13.876(13,876) values between 0 and 1. I would like to apply sklearn.cluster.KMeans to only this vector to find the different clusters in which the values are grouped. However, it seems KMeans works with a multidimensional array and not with one-dimensional ones. I guess there is a trick to make it work but I don't know how. I saw that KMeans.fit() accepts "X : array-like or sparse matrix, shape=(n_samples, n_features)", but it wants the n_samples to be bigger than one
I tried putting my array on a np.zeros() matrix and run KMeans, but then is putting all the non-null values on class 1 and the rest on class 0.
Can anyone help in running this algorithm on a one-dimensional array?
You have many samples of 1 feature, so you can reshape the array to (13,876, 1) using numpy's reshape:
from sklearn.cluster import KMeans
import numpy as np
x = np.random.random(13876)
km = KMeans()
km.fit(x.reshape(-1,1)) # -1 will be calculated to be 13876 here
Read about Jenks Natural Breaks. Function in Python found the link from the article:
def get_jenks_breaks(data_list, number_class):
data_list.sort()
mat1 = []
for i in range(len(data_list) + 1):
temp = []
for j in range(number_class + 1):
temp.append(0)
mat1.append(temp)
mat2 = []
for i in range(len(data_list) + 1):
temp = []
for j in range(number_class + 1):
temp.append(0)
mat2.append(temp)
for i in range(1, number_class + 1):
mat1[1][i] = 1
mat2[1][i] = 0
for j in range(2, len(data_list) + 1):
mat2[j][i] = float('inf')
v = 0.0
for l in range(2, len(data_list) + 1):
s1 = 0.0
s2 = 0.0
w = 0.0
for m in range(1, l + 1):
i3 = l - m + 1
val = float(data_list[i3 - 1])
s2 += val * val
s1 += val
w += 1
v = s2 - (s1 * s1) / w
i4 = i3 - 1
if i4 != 0:
for j in range(2, number_class + 1):
if mat2[l][j] >= (v + mat2[i4][j - 1]):
mat1[l][j] = i3
mat2[l][j] = v + mat2[i4][j - 1]
mat1[l][1] = 1
mat2[l][1] = v
k = len(data_list)
kclass = []
for i in range(number_class + 1):
kclass.append(min(data_list))
kclass[number_class] = float(data_list[len(data_list) - 1])
count_num = number_class
while count_num >= 2: # print "rank = " + str(mat1[k][count_num])
idx = int((mat1[k][count_num]) - 2)
# print "val = " + str(data_list[idx])
kclass[count_num - 1] = data_list[idx]
k = int((mat1[k][count_num] - 1))
count_num -= 1
return kclass
Use and visualization:
import numpy as np
import matplotlib.pyplot as plt
def get_jenks_breaks(...):...
x = np.random.random(30)
breaks = get_jenks_breaks(x, 5)
for line in breaks:
plt.plot([line for _ in range(len(x))], 'k--')
plt.plot(x)
plt.grid(True)
plt.show()
Result:

Categories