Related
I am implementing the Francis double step QR Iteration algorithm using the notes and psuedocode from lecture https://people.inf.ethz.ch/arbenz/ewp/Lnotes/chapter4.pdf - Algorithm 4.5
The psuedocode is provided in Matlab I believe.
Below is the implementation of my code.
# compute upper hessenberg form of matrix
def hessenberg(A):
m,n = A.shape
H = A.astype(np.float64)
for k in range(n-2):
x = H[k+1:, k]
v = np.concatenate([np.array([np.sign(x[0]) * np.linalg.norm(x)]), x[1:]])
v = v / np.linalg.norm(v)
H[k+1:, k:] -= 2 * np.outer(v, np.dot(v, H[k+1:, k:]))
H[:, k+1:] -= 2 * np.outer(np.dot(H[:, k+1:], v), v)
return(H)
# compute first three elements of M
def first_three_M(T,s,t):
x = T[0, 0]**2 + T[0, 1] * T[1, 0] - s * T[0, 0] + t
y = T[1, 0] * (T[0, 0] + T[1, 1] - s)
z = T[1, 0] * T[2, 1]
return(x,y,z)
# householder reflection
def householder_reflection_step(x_1):
v = x_1[0] + np.sign(x_1[0]) * np.linalg.norm(x_1)
v = v / np.linalg.norm(v)
P = np.eye(3) - 2 * np.outer(v, v)
return(P)
# update elements of M
def update_M(T,k,p):
x = T[k+1, k]
y = T[k+2, k]
if k < p - 3:
z = T[k+3, k]
else:
z = 0
return(x,y,z)
# givens rotation
def givens_step(T,x_2,x,y,p,q,n):
# calculate c and s
c = x / np.sqrt(x**2 + y**2)
s = -y / np.sqrt(x**2 + y**2)
P = np.array([[c, s], [-s, c]])
T[q-1:p, p-3:n] = P.T # T[q-1:p, p-3:n]
T[0:p, p-2:p] = T[0:p, p-2:p] # P
return(T)
# deflation step
def deflation_step(T,p,q,epsilon):
if abs(T[p-1, p-2]) < epsilon * (abs(T[p-2, p-2]) + abs(T[p-1, p-1])):
T[p-1, p-2] = 0
p = p - 1
q = p - 1
elif abs(T[p-2, p-3]) < epsilon * (abs(T[p-3, p-3]) + abs(T[p-2, p-2])):
T[p-2, p-3] = 0
p = p - 2
q = p - 1
return(T,p,q)
# francis qr step
def francis_step(H, epsilon=0.90):
n = H.shape[0]
T = H.copy().astype(np.float64)
p = n - 1
while p > 2:
q = p - 1
s = T[q, q] + T[p, p]
t = T[q, q] * T[p, p] - T[q, p] * T[p, q]
# Compute M
x,y,z = first_three_M(T,s,t)
x_1 = np.transpose([[x], [y], [z]])
# Bulge chasing
for k in range(p - 3):
# Compute Householder reflector
P = householder_reflection_step(x_1)
r = max(1, k-1)
T[k:k+3, r:] = P.T # T[k:k+3, r:]
r = min(k + 3, p)
T[0:r, k:k+3] = T[0:r, k:k+3] # P
# Update M
x,y,z = update_M(T,k,p)
x_2 = np.transpose([[x], [y]])
# Compute Givens rotation
T = givens_step(T,x_2,x,y,p,q,n)
# Check for convergence
T,p,q = deflation_step(T,p,q,epsilon)
return(T)
# francis qr iteration
def francis_qr_iteration(A):
m,n = A.shape
H = hessenberg(A)
eigvals = []
iters = 0
max_iters = 100
while iters<max_iters:
# Perform Francis step
T = francis_step(H)
eigvals.append(np.diag(T))
iters+=1
return(eigvals)
# for quick testing
A = np.array([[2, 2, 3, 4, 2],
[1, 2, 4, 2, 3],
[4, 1, 2, 1, 5],
[5, 2, 5, 2, 1],
[3, 6, 3, 1, 4]])
eigenvals = francis_qr_iteration(A)
#comparing our method to scipy - final eigvals obtained
print(len(eigenvals))
print(sorted(eigenvals[-1]))
print(sorted(scipy.linalg.eig(A)[0].real))
And this is the output I am getting.
100
[-4.421235127393854, -0.909209110641351, -0.8342390091346807, 3.7552499102751575, 8.215454029003958]
[-3.0411228516834217, -1.143605409373778, -1.143605409373778, 3.325396565009845, 14.002937105421134]
The matrix T is not changing and hence it does not converge to the Schur form through which I can obtain the eigenvalues by using np.diag(T). I believe the error is coming either from the Givens rotation step or the Householder reflection step. It could be an indexing issue since I tried to work in python using matlab psuedocode. Please let me know where I am going wrong so I can improve the code and make it converge.
I have the following function:
def h_Y1(X, theta):
EF = X[0]
FF = X[1]
j = X[-2]
k = X[-1]
W = X[2:-2]
Sigma = theta[0]
sigma_xi2 = theta[1]
gamma_alpha = theta[2]
gamma_z = np.array(theta[3:])
gW = gamma_z # W
eps1 = EF - gamma_alpha * gW
if j == k:
eps2 = FF - (gamma_alpha**2)*gW - gW*sigma_xi2 - Sigma
eps3 = 0
else:
eps2 = 0
eps3 = FF - (gamma_alpha**2)*gW
h1 = [eps1 * Wk for Wk in W]
h2 = [eps2 * Wk for Wk in W]
h3 = [eps3 * Wk for Wk in W]
return np.concatenate([h1, h2, h3])
I need to execute the function for a range of values, which are stored in gmmarray. Specifically, I'd like the function to be run for each row of gmmarray as the function argument X, for a fixed theta.
I'm currently doing this using the following code:
import numpy as np
theta = [0.01, 1, 1, 0, 0]
gmmarray = np.random.random((1120451, 6))
test = np.apply_along_axis(h_Y1, 1, gmmarray, theta = init)
However, this is slow - it takes around 19 seconds. I tried vectorizing the function as follows:
Vh_Y1 = np.vectorize(h_Y1, signature = '(n),(j)->(i)')
test1 = Vh_Y1(gmmarray, init)
However, this still takes 16 seconds. Am I doing something wrong here or is there a way to speed things up further?
Thanks so much!
You can pass the full gmmarray as the X parameter. Then, instead of looping through each row of gmmarray, you can use vectorized operations on its columns.
Something like this:
def h_Y1_vectorized(X, theta):
EF, FF, W, j, k = np.hsplit(X, [1,2,4,5]) # Column vectors (except W)
Sigma, sigma_xi2, gamma_alpha, *gamma_z = theta
gW = (W # gamma_z)[:, None] # Ensure column vector
ga_gW = gamma_alpha * gW
FF_ga2gW = FF - gamma_alpha * ga_gW
eps1 = EF - ga_gW
j_equal_k = j == k
eps2 = np.where(j_equal_k, FF_ga2gW - gW * sigma_xi2 - Sigma, 0)
eps3 = np.where(j_equal_k, 0, FF_ga2gW)
h1 = eps1 * W
h2 = eps2 * W
h3 = eps3 * W
return np.hstack([h1, h2, h3])
Calling
>>> h_Y1_vectorized(gmmarray, theta)
produces the same result with roughly a 100x speed increase.
The DTAIDistance package can be used to find k best matches of the input query. but it cannot be used for multi-dimensional input query. moreover, I want to find the k best matches of many input queries in one run.
I modified the DTAIDistance function so that it can be used to search subsequences of multi-dimensions of multi-queries. I use njit with parallel to speed up the process,i.e.the p_calc function which applies numba-parallel to each of the input query. but I find that the parallel calculation seems not to speed up the calculation compared to just simply looping over the input queries one by one, i.e. the calc function.
import time
from tqdm import tqdm
from numba import njit, prange
import numpy as np
inf = np.inf
argmin=np.argmin
#njit(fastmath=True, nogil=True, error_model="numpy", cache=True, parallel=False)
def p_calc(d, dtw, s1, s2, r, c, psi_1b, psi_1e, psi_2b, psi_2e, window, max_step, max_dist, penalty, psi_neg):
n_series = s1.shape[1]
ndim = s1.shape[2]
# s1 = np.ascontiguousarray(s1)#.shape
# s2 = np.ascontiguousarray(s2)#.shape
# dtw = np.full((n_series,r + 1, c + 1), np.inf,dtype=s1.dtype) # cmath.inf
# d = np.full((n_series), np.inf,dtype=s1.dtype) # cmath.inf
for i in range(psi_2b + 1):
dtw[:, 0, i] = 0
for i in range(psi_1b + 1):
dtw[:, i, 0] = 0
for nn in prange(n_series):
print('im alive...')
i0 = 1
i1 = 0
sc = 0
ec = 0
smaller_found = False
ec_next = 0
for i in range(r):
i0 = i
i1 = i + 1
j_start = max(0, i - max(0, r - c) - window + 1)
j_end = min(c, i + max(0, c - r) + window)
if sc > j_start:
j_start = sc
smaller_found = False
ec_next = i
for j in range(j_start, j_end):
val = 0
tmp = ((s1[i, nn] - s2[j]) ** 2)
# tmp = (np.abs(s1[i, nn] - s2[j, 0]))
for nd in range(ndim):
val += tmp[nd]
d[nn] = val
# d = np.sum(np.abs(s1[i] - s2[j]) ) # multi-d
if max_step is not None and d[nn] > max_step:
continue
# print(i, j + 1 - skip, j - skipp, j + 1 - skipp, j - skip)
dtw[nn, i1, j + 1] = d[nn] + min(dtw[nn, i0, j],
dtw[nn, i0, j + 1] + penalty,
dtw[nn, i1, j] + penalty)
# dtw[i + 1, j + 1 - skip] = d + min(dtw[i + 1, j + 1 - skip], dtw[i + 1, j - skip])
if dtw[nn, i1, j + 1] > max_dist:
if not smaller_found:
sc = j + 1
if j >= ec:
break
else:
smaller_found = True
ec_next = j + 1
ec = ec_next
# Decide which d to return
dtw[nn] = np.sqrt(dtw[nn])
if psi_1e == 0 and psi_2e == 0:
d[nn] = dtw[nn, i1, min(c, c + window - 1)]
else:
ir = i1
ic = min(c, c + window - 1)
if psi_1e != 0:
vr = dtw[nn, ir:max(0, ir - psi_1e - 1):-1, ic]
mir = np.argmin(vr)
vr_mir = vr[mir]
else:
mir = ir
vr_mir = inf
if psi_2e != 0:
vc = dtw[nn, ir, ic:max(0, ic - psi_2e - 1):-1]
mic = np.argmin(vc)
vc_mic = vc[mic]
else:
mic = ic
vc_mic = inf
if vr_mir < vc_mic:
if psi_neg:
dtw[nn, ir:ir - mir:-1, ic] = -1
d[nn] = vr_mir
else:
if psi_neg:
dtw[nn, ir, ic:ic - mic:-1] = -1
d[nn] = vc_mic
if max_dist and d[nn] ** 2 > max_dist:
# if max_dist and d[nn] > max_dist:
d[nn] = inf
return d, dtw
#njit(fastmath=True, nogil=True) # Set "nopython" mode for best performance, equivalent to #njit
def calc(s1, s2, r, c, psi_1b, psi_1e, psi_2b, psi_2e, window, max_step, max_dist, penalty, psi_neg):
dtw = np.full((r + 1, c + 1), np.inf) # cmath.inf
for i in range(psi_2b + 1):
dtw[0, i] = 0
for i in range(psi_1b + 1):
dtw[i, 0] = 0
i0 = 1
i1 = 0
sc = 0
ec = 0
smaller_found = False
ec_next = 0
for i in range(r):
i0 = i
i1 = i + 1
j_start = max(0, i - max(0, r - c) - window + 1)
j_end = min(c, i + max(0, c - r) + window)
if sc > j_start:
j_start = sc
smaller_found = False
ec_next = i
for j in range(j_start, j_end):
# d = (s1[i] - s2[j]) ** 2# 1-d
d = np.sum((s1[i] - s2[j]) ** 2) # multi-d
# d = np.sum(np.abs(s1[i] - s2[j]) ) # multi-d
if max_step is not None and d > max_step:
continue
dtw[i1, j + 1] = d + min(dtw[i0, j],
dtw[i0, j + 1] + penalty,
dtw[i1, j] + penalty)
if dtw[i1, j + 1] > max_dist:
if not smaller_found:
sc = j + 1
if j >= ec:
break
else:
smaller_found = True
ec_next = j + 1
ec = ec_next
# Decide which d to return
dtw = np.sqrt(dtw)
if psi_1e == 0 and psi_2e == 0:
d = dtw[i1, min(c, c + window - 1)]
else:
ir = i1
ic = min(c, c + window - 1)
if psi_1e != 0:
vr = dtw[ir:max(0, ir - psi_1e - 1):-1, ic]
mir = argmin(vr)
vr_mir = vr[mir]
else:
mir = ir
vr_mir = inf
if psi_2e != 0:
vc = dtw[ir, ic:max(0, ic - psi_2e - 1):-1]
mic = argmin(vc)
vc_mic = vc[mic]
else:
mic = ic
vc_mic = inf
if vr_mir < vc_mic:
if psi_neg:
dtw[ir:ir - mir:-1, ic] = -1
d = vr_mir
else:
if psi_neg:
dtw[ir, ic:ic - mic:-1] = -1
d = vc_mic
if max_dist and d * d > max_dist:
d = inf
return d, dtw
mydtype = np.float32
series1 = np.random.random((16, 30, 2)).astype(mydtype)
series2 = np.random.random((100000, 2)).astype(mydtype)
n_series = series1.shape[1]
r = series1.shape[0]
c = series2.shape[0]
dtw = np.full((n_series, r + 1, c + 1), np.inf, dtype=mydtype) # cmath.inf
d = np.full((n_series), np.inf, dtype=mydtype) # cmath.inf
time1 = time.time()
d, dtw1 = p_calc(d, dtw, series1, series2, series1.shape[0], series2.shape[0], 0, 0,
series2.shape[0], series2.shape[0], series2.shape[0], np.inf, np.inf, 0.01, False)
print(time.time() - time1)
time1 = time.time()
for ii in tqdm(range(series1.shape[1])):
d, dtw1 = calc( series1[:, ii, :], series2, series1.shape[0], series2.shape[0], 0, 0,
series2.shape[0], series2.shape[0], series2.shape[0], np.inf, np.inf, 0.01, False)
print(time.time() - time1)# this one is faster
How can I speed up the calc function or p_calc function so that I can calculate the dynamic time warping paths of multi-dimensional multi-queries?
thanks for the answer,and then i modified the code for simplification.
i delete the np.sum part and use loop,i can get another speedup. any suggestions for further speedups ?
import time
from numba import njit, prange
import numpy as np
inf = np.inf
argmin=np.argmin
#njit(fastmath=True, nogil=True, error_model="numpy", cache=False, parallel=True)
def p_calc(d, dtw, s1, s2, r, c, psi_1b, psi_1e, psi_2b, psi_2e, window, max_step, max_dist, penalty, psi_neg):
n_series = s1.shape[1]
ndim = s1.shape[2]
for nn in prange(n_series):
for i in range(r):
j_start = 0
j_end = c
for j in range(j_start, j_end):
val = 0
# tmp = ((s1[i, nn] - s2[j]) ** 2)
# tmp = (np.abs(s1[i, nn] - s2[j, 0]))
for nd in range(ndim):
tmp = ((s1[i, nn,nd] - s2[j,nd]) ** 2)
val += tmp
d[nn] = val
return d, dtw
#njit(fastmath=True, nogil=True) # Set "nopython" mode for best performance, equivalent to #njit
def calc(dtw,s1, s2, r, c, psi_1b, psi_1e, psi_2b, psi_2e, window, max_step, max_dist, penalty, psi_neg):
ndim = s1.shape[-1]
for i in range(r):
j_start = 0
j_end = c
for j in range(j_start, j_end):
d = 0
for kk in range(ndim):
d += (s1[i, kk] - s2[j, kk]) ** 2
return d, dtw
mydtype = np.float32
series1 = np.random.random((16, 300, 2)).astype(mydtype)
series2 = np.random.random((1000000, 2)).astype(mydtype)
n_series = series1.shape[1]
r = series1.shape[0]
c = series2.shape[0]
dtw = np.full((n_series, r + 1, c + 1), np.inf, dtype=mydtype) # cmath.inf
d = np.full((n_series), np.inf, dtype=mydtype) # cmath.inf
time1 = time.time()
# assert 1==2
# dtw[:,series2.shape[0]]
d1, dtw1 = p_calc(d, dtw, series1, series2, series1.shape[0], series2.shape[0], 0, 0, series2.shape[0], series2.shape[0], series2.shape[0], np.inf, np.inf, 0.01, False)
print(time.time() - time1)
# assert 1==2
time1 = time.time()
dtw = np.full(( r + 1, c + 1), np.inf, dtype=mydtype) # cmath.inf
for ii in (range(series1.shape[1])):
d2, dtw2 = calc( dtw,series1[:, ii, :], series2, series1.shape[0], series2.shape[0], 0, 0,
series2.shape[0], series2.shape[0], series2.shape[0], np.inf, np.inf, 0.01, False)
print(time.time() - time1)# this one is faster
np.allclose(dtw1[-1],dtw2)
np.allclose(d1[-1],d2)
EDIT:
i found the following code's performance is very different if use pass or break. i don't understand why?
#njit(fastmath=True, nogil=True)
def kbest_matches(matching,k=4000):
ki = 0
while ki < k:
best_idx =np.argmin(matching)# np.argmin(np.arange(10000000))#
if best_idx == 0 :
# pass
break
ki += 1
return 0
ss= np.random.random((1575822,))
time1 = time.time()
pp = kbest_matches(ss)
print(time.time() - time1)
I assume the code of both implementations are correct and as been carefully checked (otherwise the benchmark would be pointless).
The issue likely comes from the compilation time of the function. Indeed, the first call is significantly slower than next calls, even with cache=True. This is especially important for the parallel implementation as compiling parallel Numba code is often slower (since it is more complex). The best solution to avoid this is to compile Numba functions ahead of time by providing types to Numba.
Besides this, benchmarking a computation only once is usually considered as a bad practice. Good benchmarks perform multiple iterations and remove the first ones (or consider them separately). Indeed, several other problems can appear when a code is executed for the first time: CPU caches (and the TLB) are cold, the CPU frequency can change during the execution and is likely smaller when the program is just started, page faults may need to be needed, etc.
In practice, I cannot reproduce the issue. Actually, p_calc is 3.3 times faster on my 6-core machine. When the benchmark is done in a loop of 5 iterations, the measured time of the parallel implementation is much smaller: about 13 times (which is actually suspicious for a parallel implementation using 6 threads on a 6-core machine).
I am currently working on implementing the Adams Bashforth Moulton Method for solving a pendulum problem.
My current code is as follows:
import numpy as np
#####################################################################################
# DEF func
#####################################################################################
def func(y,t):
n=y.size
f=np.zeros(6)
xp=np.array([y[3],y[4],y[5]])[np.newaxis]
mass=1.
F1=0.
F2=0.
F3=-mass*9.8
F=np.array([F1,F2,F3])[np.newaxis]
phix=2.*y[0]
phiy=2.*y[1]
phiz=2.*y[2]
G=np.array([phix,phiy,phiz])[np.newaxis]
H=2.*np.eye(3)
lambd=(mass*np.dot(xp,np.dot(H,xp.T))+np.dot(F,G.T))/np.dot(G,G.T)
f[0]=y[3]
f[1]=y[4]
f[2]=y[5]
for k in range(0,3):
f[k+3]=(F[0,k]-lambd*G[0,k])/mass
return f
def dF_matrix(y):
n=y.size
dF=np.zeros((6,6))
xp=np.array([y[3],y[4],y[5]])[np.newaxis]
mass=1.
F1=0.
F2=0.
F3=-mass*9.8
F=np.array([F1,F2,F3])[np.newaxis]
phix=2.*y[0]
phiy=2.*y[1]
phiz=2.*y[2]
G=np.array([phix,phiy,phiz])[np.newaxis]
H=2.*np.eye(3)
lambd=(mass*np.dot(xp,np.dot(H,xp.T))+np.dot(F,G.T))/np.dot(G,G.T)
dF[0,3]=1
dF[1,4]=1
dF[2,5]=1
dF[3,0]=(y[0]*F1+2*lambd)/mass
dF[3,1]=(y[0]*F2)/mass
dF[3,2]=(y[0]*F3)/mass
dF[3,3]=phix*y[3]
dF[3,4]=phix*y[4]
dF[3,5]=phix*y[5]
dF[4,0]=(y[1]*F1)/mass
dF[4,1]=(y[1]*F2+2*lambd)/mass
dF[4,2]=(y[1]*F3)/mass
dF[4,3]=phiy*y[3]
dF[4,4]=phiy*y[4]
dF[4,5]=phiy*y[5]
dF[5,0]=(y[2]*F1)/mass
dF[5,1]=(y[2]*F2)/mass
dF[5,2]=(y[2]*F3+2*lambd)/mass
dF[5,3]=phiz*y[3]
dF[5,4]=phiz*y[4]
dF[5,5]=phiz*y[5]
return dF
#####################################################################################
# PYTHON script:
# Solve ODE for pendulum problem
#####################################################################################
from scipy.integrate import odeint
from mpl_toolkits import mplot3d
import matplotlib.pyplot as plt
import time
### Forward Euler Method
def forward_Euler(function, y_matrix, time):
y = np.zeros((np.size(time), np.size(y_matrix)))
y[0, :] = y_matrix
for i in range(len(time) - 1):
dt = time[i + 1] - time[i]
y[i + 1, :] = y[i, :] + np.asarray(function(y[i, :], time[i])) * dt
return y
### Modified Euler Method
def modified_Euler(function, y_matrix, time):
y = np.zeros((np.size(time), np.size(y_matrix)))
y[0, :] = y_matrix
for i in range(len(time) - 1):
dt = time[i + 1] - time[i]
k1 = np.asarray(function(y[i, :], time[i]))
k2 = np.asarray(function(y[i, : + k1], time[i]))
y[i + 1, :] = y[i, :] + (k1 + k2) / 2
return y
### Adams-Bashforth 2nd order
def Adams_Bash_2nd(function, y_matrix, time):
y = np.zeros((np.size(time), np.size(y_matrix)))
y[0, :] = y_matrix
for i in range(len(time) - 1):
dt = time[i + 1] - time[i]
f_1 = function(y[i, :], time[i])
f_2 = function(f_1, time[i])
y[i + 1, :] = y[i, :] + dt * f_1 + ((dt**2)/2) * f_2
return y
### Adams Bashforth Moulton Method
def Adams_Moulton(function, y_matrix, time):
y = np.zeros((np.size(time), np.size(y_matrix)))
y[0, :] = y_matrix
### predictor formula
for i in range(len(time) - 1):
dt = time[i + 1] - time[i]
f_1 = function(y[i, :], time[i])
f_2 = function(f_1, time[i])
y[i + 1, :] = y[i, :] + dt * f_1 + ((dt**2)/2) * f_2
### Corrector formula
for i in range(len(time-1)):
dt = time[i + 1] - time[i]
k_1 = 9 * (function(y[i, :], time[i+1]))
k_2 = 19 * (function(y[i, :], time[i]))
k_3 = 5 * (function(y[i, :], time[i-1]))
k_4 = (function(y[i, :], time[i-2]))
# === ERROR HAPPENS HERE ===
y[i + 1, :] = y + (dt/24) * (k_1 + k_2 - k_3 + k_4)
return y
# initial condition
y0=np.array([0.0,1.0,0.0,0.8,0.0,1.2])
print('ODE Python ..')
time_start = time.clock()
nt = 500
# time points
t = np.linspace(0,25,nt)
# solve ODE
y1 = odeint(func,y0,t)
time_elapsed = (time.clock() - time_start)
print('elapsed time',time_elapsed)
# compute residual:
r=y1[:,0]**2+y1[:,1]**2+y1[:,2]**2-1
rmax1=np.max(np.abs(r))
print('error',rmax1)
fig = plt.figure()
ax = plt.axes(projection='3d')
ax.plot3D(y1[:,0],y1[:,1],y1[:,2], 'gray')
plt.show()
# print ODE Euler Method
time_start = time.clock()
nt = 500
# time points
t = np.linspace(0, 25, nt)
# solve ODE
y1 = forward_Euler(func, y0, t)
print(y1)
time_elapsed = (time.clock() - time_start)
print('elapsed time', time_elapsed)
# compute residual:
r = y1[:, 0] ** 2 + y1[:, 1] ** 2 + y1[:, 2] ** 2 - 1
rmax1 = np.max(np.abs(r))
print('error', rmax1)
fig = plt.figure()
ax = plt.axes(projection='3d')
ax.plot3D(y1[:, 0], y1[:, 1], y1[:, 2], 'gray')
plt.show()
### Modified Euler method
time_start = time.clock()
nt = 500
# time points
t = np.linspace(0, 25, nt)
# solve ODE
y1 = forward_Euler(func, y0, t)
print(y1)
time_elapsed = (time.clock() - time_start)
print('elapsed time', time_elapsed)
# compute residual:
r = y1[:, 0] ** 2 + y1[:, 1] ** 2 + y1[:, 2] ** 2 - 1
rmax1 = np.max(np.abs(r))
print('error', rmax1)
fig = plt.figure()
ax = plt.axes(projection='3d')
ax.plot3D(y1[:, 0], y1[:, 1], y1[:, 2], 'gray')
plt.show()
### Adams-Bashforth 2nd order
time_start = time.clock()
nt = 500
# time points
t = np.linspace(0, 25, nt)
# solve ODE
y1 = Adams_Bash_2nd(func, y0, t)
print(y1)
time_elapsed = (time.clock() - time_start)
print('elapsed time', time_elapsed)
# compute residual:
r = y1[:, 0] ** 2 + y1[:, 1] ** 2 + y1[:, 2] ** 2 - 1
rmax1 = np.max(np.abs(r))
print('error', rmax1)
fig = plt.figure()
ax = plt.axes(projection='3d')
ax.plot3D(y1[:, 0], y1[:, 1], y1[:, 2], 'gray')
plt.show()
### Adams-Moulton 1st order
# time_start = time.clock()
nt = 500
# time points
t = np.linspace(0, 25, nt)
# solve ODE
y1 = Adams_Moulton(func, y0, t)
print(y1)
# time_elapsed = (time.clock() - time_start)
# print('elapsed time', time_elapsed)
# compute residual:
r = y1[:, 0] ** 2 + y1[:, 1] ** 2 + y1[:, 2] ** 2 - 1
rmax1 = np.max(np.abs(r))
print('error', rmax1)
fig = plt.figure()
ax = plt.axes(projection='3d')
ax.plot3D(y1[:, 0], y1[:, 1], y1[:, 2], 'gray')
plt.show()
exit()
My fucntion adam_moulton() keeps returning the following error:
Traceback (most recent call last):
File "C:/Users/Andrew/Documents/solve_pendulum.py", line 249, in <module>
y1 = Adams_Moulton(func, y0, t)
File "C:/Users/Andrew/Documents/solve_pendulum.py", line 148, in Adams_Moulton
y[i + 1, :] = y + (dt/24) * (k_1 + k_2 - k_3 + k_4)
ValueError: could not broadcast input array from shape (500,6) into shape (6)
What is wrong in my thought process for implementing this multistep method?
Thank you all for the help!
The function definition for the pendulum in Cartesian coordinates, a particle moving under gravity and the constraint |x|^2=L^2=const., can be written much shorter as
def func(y,t):
x,v = y[:3],y[3:6]
grav = np.array([0., 0., -9.8 ])
lambd = (grav.dot(x)+v.dot(v))/x.dot(x)
return np.concatenate([v, grav - lambd*x] )
# initial condition
y0=np.array([0.0,1.0,0.0,0.8,0.0,1.2])
As long as no friction term is included, the mass cancels out and need not be considered in the equations.
An inefficient implementation (no use of Jacobian and Newton-like steps) of the 4th order Adams-Moulton PECE method is
def Adams_Moulton_4th(function, y_matrix, time):
y = np.zeros((np.size(time), np.size(y_matrix)))
y[0] = y_matrix
### bootstrap steps with 4th order one-step method
dt = time[1] - time[0]
y[1] = RK4_step(function,y[0], time[0], dt, N=4)
y[2] = RK4_step(function,y[1], time[1], dt, N=4)
y[3] = RK4_step(function,y[2], time[2], dt, N=4)
f_m2 = function(y[0], time[0])
f_m1 = function(y[1], time[1])
f_0 = function(y[2], time[2])
f_1 = function(y[3], time[3])
for i in range(3,len(time) - 1):
### first shift the "virtual" function value array so that
### [f_m3, f_m2, f_m1, f_0] corresponds to [ f[i-3], f[i-2], f[i-1], f[i] ]
f_m3, f_m2, f_m1, f_0 = f_m2, f_m1, f_0, f_1
### predictor formula 4th order [ 55/24, -59/24, 37/24, -3/8 ]
y[i+1] = y[i] + (dt/24) * (55*f_0 - 59*f_m1 + 37*f_m2 - 9*f_m3)
f_1 = function(y[i+1], time[i+1])
### Corrector formula 4th order [ 3/8, 19/24, -5/24, 1/24 ]
y[i+1] = y[i] + (dt/24) * (9*f_1 + 19*f_0 - 5*f_m1 + f_m2)
f_1 = function(y[i+1], time[i+1])
return y
and results in with nt=2500 steps in a radius error of 2.39e-05 with a plot that is close to the reference solution by odeint.
The classical RK4 method used in the bootstrapping process was implemented as
def RK4_step(f,y,t,dt, N=1):
dt /= N;
for k in range(N):
k1=f(y,t)*dt; k2=f(y+k1/2,t+dt/2)*dt; k3=f(y+k2/2,t+dt/2)*dt; k4=f(y+k3,t+dt)*dt;
y, t = y+(k1+2*(k2+k3)+k4)/6, t+dt
return y
There's some code that I have basically translated from its original version in MATLAB to Python but there is a part that I don't understand. I have one triangle mesh that I want to transform. For this, the transformations for each triangle face is computed. After that, the edges of the transformed triangle are specified. However, what happens then is unclear to me. If you have any mathematical or geometric background to share concerning the following steps, I would appreciate it very much!
weight = np.ones(1)
fixvertex = 1
fixto = np.zeros((3))
M = scipy.sparse.lil_matrix((len(template) + fixvertex, len(template)))
dx = scipy.sparse.lil_matrix((len(template) + fixvertex, 3))
for i in range(len(faces)):
v = faces[i, :]
### ... what happens inbetween is irrelevant
# Transform triangle
x = T # np.array([template[v[0], :], template[v[1], :], template[v[2], :]]).reshape(3, 3).T
p12 = x[:, 0] - x[:, 1]
p13 = x[:, 0] - x[:, 2]
p23 = x[:, 1] - x[:, 2]
p12 = p12 / np.linalg.norm(p12)
p13 = p13 / np.linalg.norm(p13)
p23 = p23 / np.linalg.norm(p23)
# I do not get the point of the following lines
wts = [(p13.T).dot(p23), -(p12.T).dot(p23), (p12.T).dot(p13)]
wij3 = [[0, wts[0] / np.sqrt(1 - wts[0] ** 2), wts[1] / np.sqrt(1 - wts[1] ** 2)],
[0, 0, wts[2] / np.sqrt(1 - wts[2] ** 2)],
[0, 0, 0]]
wij3 = np.asarray(wij3)
wij3 = wij3 + wij3.T
WIJ = wij3 - np.diag([sum(x) for x in zip(*wij3)])
M[np.ix_(v, v)] = M[np.ix_(v, v)] + WIJ
dx[v, :] = dx[v, :] + (WIJ # x.T)
weight = np.ones((fixvertex)) * weight
for i in range(fixvertex):
M[len(template) + i, fixvertex - 1] = weight[i]
dx[len(template):len(template) + fixvertex, :] = np.multiply(fixto, np.tile(weight, (3)))
M = np.real(M)
dx = np.real(dx)
Mt = M.T
model = scipy.sparse.linalg.spsolve(Mt # M, Mt # dx)