I am trying to speed up some multi-camera system that relies on calculation of fundamental matrices between each camera pair.
Please notice the following is pseudocode. # means matrix multiplication, | means concatenation.
I have code to calculate F for each pair calculate_f(camera_matrix1_3x4, camera_matrix1_3x4), and the naiive solution is
for c1 in cameras:
for c2 in cameras:
if c1 != c2:
f = calculate_f(c1.proj_matrix, c2.proj_matrix)
This is slow, and I would like to speed it up. I have ~5000 cameras.
I have pre calculated all rotations and translations (in world coordinates) between every pair of cameras, and internal parameters k, such that for each camera c, it holds that c.matrix = c.k # (c.rot | c.t)
Can I use the parameters r, t to help speed up following calculations for F?
In mathematical form, for 3 different cameras c1, c2, c3 I have
f12=(c1.proj_matrix, c2.proj_matrix), and I want f23=(c2.proj_matrix, c3.proj_matrix), f13=(c1.proj_matrix, c3.proj_matrix) with some function f23, f13 = fast_f(f12, c1.r, c1.t, c2.r, c2.t, c3.r, c3.t)?
A working function for calculating the fundamental matrix in numpy:
def fundamental_3x3_from_projections(p_left_3x4: np.array, p_right__3x4: np.array) -> np.array:
# The following is based on OpenCv-contrib's c++ implementation.
# see https://github.com/opencv/opencv_contrib/blob/master/modules/sfm/src/fundamental.cpp#L109
# see https://sourishghosh.com/2016/fundamental-matrix-from-camera-matrices/
# see https://answers.opencv.org/question/131017/how-do-i-compute-the-fundamental-matrix-from-2-projection-matrices/
f_3x3 = np.zeros((3, 3))
p1, p2 = p_left_3x4, p_right__3x4
x = np.empty((3, 2, 4), dtype=np.float)
x[0, :, :] = np.vstack([p1[1, :], p1[2, :]])
x[1, :, :] = np.vstack([p1[2, :], p1[0, :]])
x[2, :, :] = np.vstack([p1[0, :], p1[1, :]])
y = np.empty((3, 2, 4), dtype=np.float)
y[0, :, :] = np.vstack([p2[1, :], p2[2, :]])
y[1, :, :] = np.vstack([p2[2, :], p2[0, :]])
y[2, :, :] = np.vstack([p2[0, :], p2[1, :]])
for i in range(3):
for j in range(3):
xy = np.vstack([x[j, :], y[i, :]])
f_3x3[i, j] = np.linalg.det(xy)
return f_3x3
Numpy is clearly not optimized for working on small matrices. The parsing of CPython input objects, internal checks and function calls introduce a significant overhead which is far bigger than the execution time need to perform the actual computation. Not to mention the creation of many temporary arrays is also expensive. One solution to solve this problem is to use Numba or Cython.
Moreover, the computation of the determinant can be optimized a lot since you know the exact size of the matrix and a part of the matrix does not always change. Indeed, using a basic algebraic expression for the 4x4 determinant help compilers to optimize a lot the overall computation thanks to the common sub-expression elimination (not performed by the CPython interpreter) and the removal of complex loops/conditionals in np.linalg.det.
Here is the resulting code:
import numba as nb
#nb.njit('float64(float64[:,::1])')
def det_4x4(mat):
a, b, c, d = mat[0,0], mat[0,1], mat[0,2], mat[0,3]
e, f, g, h = mat[1,0], mat[1,1], mat[1,2], mat[1,3]
i, j, k, l = mat[2,0], mat[2,1], mat[2,2], mat[2,3]
m, n, o, p = mat[3,0], mat[3,1], mat[3,2], mat[3,3]
return a * (f * (k*p - l*o) + g * (l*n - j*p) + h * (j*o - k*n)) + \
b * (e * (l*o - k*p) + g * (i*p - l*m) + h * (k*m - i*o)) + \
c * (e * (j*p - l*n) + f * (l*m - i*p) + h * (i*n - j*m)) + \
d * (e * (k*n - j*o) + f * (i*o - k*m) + g * (j*m - i*n))
#nb.njit('float64[:,::1](float64[:,::1], float64[:,::1])')
def fundamental_3x3_from_projections(p_left_3x4, p_right_3x4):
f_3x3 = np.empty((3, 3))
p1, p2 = p_left_3x4, p_right_3x4
x = np.empty((3, 2, 4), dtype=np.float64)
x[0, 0, :] = p1[1, :]
x[0, 1, :] = p1[2, :]
x[1, 0, :] = p1[2, :]
x[1, 1, :] = p1[0, :]
x[2, 0, :] = p1[0, :]
x[2, 1, :] = p1[1, :]
y = np.empty((3, 2, 4), dtype=np.float64)
y[0, 0, :] = p2[1, :]
y[0, 1, :] = p2[2, :]
y[1, 0, :] = p2[2, :]
y[1, 1, :] = p2[0, :]
y[2, 0, :] = p2[0, :]
y[2, 1, :] = p2[1, :]
xy = np.empty((4, 4), dtype=np.float64)
for i in range(3):
xy[2:4, :] = y[i, :, :]
for j in range(3):
xy[0:2, :] = x[j, :, :]
f_3x3[i, j] = det_4x4(xy)
return f_3x3
This is 130 times faster on my machine (85.6 us VS 0.66 us).
You can speed up the process even more by a factor of two if the applied function is commutative (ie. f(c1, c2) == f(c2, c1)). If so, you could compute only the upper part. It turns out that your function have some interesting property since f(c1, c2) == f(c2, c1).T appear to be always true. Another possible optimization is to run the loop in parallel.
With all these optimizations, the resulting program should be about 3 order of magnitude faster.
Analysis of the accuracy of the approach
The precision provided appear to be similar than the original one. Regarding the input matrix, results are sometime more accurate and sometimes less accurate than the Numpy method. This is specifically due to the computation of the determinant. With 24-digit decimals, there is no visible error compared to the reliable result of Wolphram Alpha. This show that the method is correct, results as not the same due to numerical stability details. Here is the code used to test the accuracy of the methods:
# Imports
from decimal import Decimal
import numba as nb
# Definitions
def det_4x4(mat):
a, b, c, d = mat[0,0], mat[0,1], mat[0,2], mat[0,3]
e, f, g, h = mat[1,0], mat[1,1], mat[1,2], mat[1,3]
i, j, k, l = mat[2,0], mat[2,1], mat[2,2], mat[2,3]
m, n, o, p = mat[3,0], mat[3,1], mat[3,2], mat[3,3]
return a * (f * (k*p - l*o) + g * (l*n - j*p) + h * (j*o - k*n)) + \
b * (e * (l*o - k*p) + g * (i*p - l*m) + h * (k*m - i*o)) + \
c * (e * (j*p - l*n) + f * (l*m - i*p) + h * (i*n - j*m)) + \
d * (e * (k*n - j*o) + f * (i*o - k*m) + g * (j*m - i*n))
#nb.njit('float64(float64[:,::1])')
def det_4x4_numba(mat):
a, b, c, d = mat[0,0], mat[0,1], mat[0,2], mat[0,3]
e, f, g, h = mat[1,0], mat[1,1], mat[1,2], mat[1,3]
i, j, k, l = mat[2,0], mat[2,1], mat[2,2], mat[2,3]
m, n, o, p = mat[3,0], mat[3,1], mat[3,2], mat[3,3]
return a * (f * (k*p - l*o) + g * (l*n - j*p) + h * (j*o - k*n)) + \
b * (e * (l*o - k*p) + g * (i*p - l*m) + h * (k*m - i*o)) + \
c * (e * (j*p - l*n) + f * (l*m - i*p) + h * (i*n - j*m)) + \
d * (e * (k*n - j*o) + f * (i*o - k*m) + g * (j*m - i*n))
# Example matrix
precise_xy = np.array(
[[Decimal('42'),Decimal('-6248'),Decimal('4060'),Decimal('845')],
[Decimal('-0.00992'),Decimal('-0.704'),Decimal('-0.71173298417'),Decimal('300.532')],
[Decimal('-8.94274'),Decimal('-7554.39'),Decimal('604.57'),Decimal('706282')],
[Decimal('-0.0132'),Decimal('-0.2757'),Decimal('-0.961'),Decimal('247.65')]]
)
xy = precise_xy.astype(np.float64)
res_numpy = Decimal(np.linalg.det(xy))
res_numba = Decimal(det_4x4_numba(xy))
res_precise = det_4x4(precise_xy)
# The Wolphram Alpha expression used is:
# det({{42,-6248,4060,845},
# {-0.00992,-0.704,-0.71173298417,300.532},
# {-8.94274,-7554.39,604.57,706282},
# {-0.0132,-0.2757,-0.961,247.65}})
res_wolframalpha = Decimal('-323312.2164828991329828243')
# The result got from Wolfram-Alpha have a 25-digit precision
# and is exactly the same than the one of det_4x4 using 24-digit decimals.
assert res_precise == res_wolframalpha
print(abs((res_numpy-res_precise)/res_precise)) # 1.7E-14
print(abs((res_numba-res_precise)/res_precise)) # 3.1E-14
# => Similar relative error (Numba slightly less accurate
# but both are not close to the 1e-16 relative epsilon)
Say I have the following expression which I would like to integrate over the variable z from 0 to L.
import sympy as sp
mdot, D, R, alpha, beta, xi, mu0, q, cp, Tin, L = sp.symbols("\dot{m}, D, R, alpha, beta, xi, mu_0, q, c_p, T_in, L", real=True, positive=True, constant=True)
z = sp.symbols("z", real=True, positive=True)
n = sp.Symbol("n", real=True)
firstexpr = 8 * mdot**2 * R / (sp.pi**2 * D**5) * (alpha + beta * (sp.pi * D * mu0 / (4 * mdot))**xi * (q * z / (mdot * cp) + Tin)**(n * xi)) * (q * z / (mdot * cp) + Tin)
res1 = sp.integrate(firstexpr, (z, 0, L), conds="none")
This will take forever: I had to stop the computation after 10 minutes on my pc without getting an answer.
Situation improves dramatically if I rewrite my expression so that it contains only the minimum number of constant symbols, integrating it, and finally substituting the original symbols:
a = 8 * mdot**2 * R / (sp.pi**2 * D**5)
b = beta * (sp.pi * D * mu0 / (4 * mdot))**xi
c = q / (mdot * cp)
_a, _b, _c = sp.symbols("a, b, c", real=True, positive=True, constant=True)
secondexpr = _a * (alpha + _b * (_c * z + Tin)**(n * xi)) * (_c * z + Tin)
res2 = sp.integrate(secondexpr, (z, 0, L), conds="none")
sp.simplify(res2.subs([(_a, a), (_b, b), (_c, c)]))
Why is sympy taking extremely long time in the first case? Did I miss some assumption in the creation of my symbols?
I'm working in Jupyter Notebook. When I want to compile a .pyx in cython, it throws an error similar to this:
%run -i setup.py build_ext --inplace
unable to find vcvarsall.bat
The setup.py file looks like this:
from distutils.core import setup
from Cython.Build import cythonize
setup(
ext_modules=cythonize("hh_vers_vector.pyx"),
)
This only happens, however, on my computer at work. At the one at home, it works just fine.
It is probably an issue with Visual Studio as explained here. The thing is, I installed the very same version of Visual Studio 2017 Community on both computers. The latest Anaconda 3 version is installed on both computers. Both use Python 3.6.2 and IPython 6.1.0. So how can that be? Both run with Windows 10. I'll also show you my .pyx file. If you need more information I will edit my post.
from math import exp
import numpy as np
import time
def hhModel(*params, Iext, float dt, int Vref):
## Unwrap params argument: these variables are going to be optimized
cdef float ENa = params[0]
cdef float EK = params[1]
cdef float EL = params[2]
cdef float GNa = params[3]
cdef float GK = params[4]
cdef float GL = params[5]
## Input paramters
# I : a list containing external current steps, your stimulus vector [nA]
# dt : a crazy time parameter [ms]
# Vref : reference potential [mV]
def alphaM(float v, float vr): return 0.1 * (v-vr-25) / ( 1 - exp(-(v-vr-25)/10) )
def betaM(float v, float vr): return 4 * exp(-(v-vr)/18)
def alphaH(float v, float vr): return 0.07 * exp(-(v-vr)/20)
def betaH(float v, float vr): return 1 / ( 1 + exp( -(v-vr-30)/10 ) )
def alphaN(float v, float vr): return 0.01 * (v-vr-10) / ( 1 - exp(-(v-vr-10)/10) )
def betaN(float v, float vr): return 0.125 * exp(-(v-vr)/80)
## steady-state values and time constants of m,h,n
def m_infty(float v, float vr): return alphaM(v,vr) / ( alphaM(v,vr) + betaM(v,vr) )
def h_infty(float v, float vr): return alphaH(v,vr) / ( alphaH(v,vr) + betaH(v,vr) )
def n_infty(float v, float vr): return alphaN(v,vr) / ( alphaN(v,vr) + betaN(v,vr) )
## parameters
cdef float Cm, gK, gL, INa, IK, IL, dv_dt, dm_dt, dh_dt, dn_dt, aM, bM, aH, bH, aN, bN
cdef float Smemb = 4000 # [um^2] surface area of the membrane
cdef float Cmemb = 1 # [uF/cm^2] membrane capacitance density
Cm = Cmemb * Smemb * 1e-8 # [uF] membrane capacitance
gNa = GNa * Smemb * 1e-8 # Na conductance [mS]
gK = GK * Smemb * 1e-8 # K conductance [mS]
gL = GL * Smemb * 1e-8 # leak conductance [mS]
# numSamples = int(T/dt);
cdef int numSamples = len(Iext);
# DEF numSamples = 200000
# initial values
cdef float[:] v = np.empty(numSamples, dtype=np.float)
cdef float[:] m = np.empty(numSamples, dtype=np.float)
cdef float[:] h = np.empty(numSamples, dtype=np.float)
cdef float[:] n = np.empty(numSamples, dtype=np.float)
#cdef float v[numSamples]
#cdef float m[numSamples]
#cdef float h[numSamples]
#cdef float n[numSamples]
v[0] = Vref # initial membrane potential
m[0] = m_infty(v[0], Vref) # initial m
h[0] = h_infty(v[0], Vref) # initial h
n[0] = n_infty(v[0], Vref) # initial n
## calculate membrane response step-by-step
for j in range(0, numSamples-1):
# ionic currents: g[mS] * V[mV] = I[uA]
INa = gNa * m[j]*m[j]*m[j] * h[j] * (ENa-v[j])
IK = gK * n[j]*n[j]*n[j]*n[j] * (EK-v[j])
IL = gL * (EL-v[j])
# derivatives
# I[uA] / C[uF] * dt[ms] = dv[mV]
dv_dt = ( INa + IK + IL + Iext[j]*1e-3) / Cm;
aM = 0.1 * (v[j]-Vref-25) / ( 1 - exp(-(v[j]-Vref-25)/10))
bM = 4 * exp(-(v[j]-Vref)/18)
aH = 0.07 * exp(-(v[j]-Vref)/20)
bH = 1 / ( 1 + exp( -(v[j]-Vref-30)/10 ) )
aN = 0.01 * (v[j]-Vref-10) / ( 1 - exp(-(v[j]-Vref-10)/10) )
bN = 0.125 * exp(-(v[j]-Vref)/80)
dm_dt = (1-m[j])* aM - m[j]*bM
dh_dt = (1-h[j])* aH - h[j]*bH
dn_dt = (1-n[j])* aN - n[j]*bN
# calculate next step
v[j+1] = (v[j] + dv_dt * dt)
m[j+1] = (m[j] + dm_dt * dt)
h[j+1] = (h[j] + dh_dt * dt)
n[j+1] = (n[j] + dn_dt * dt)
return v
edit: It's been five years, has SciPy.integrate.odeint learned to stop yet?
The script below integrates magnetic field lines around closed paths and stops when it returns to original value within some tolerance, using Runge-Kutta RK4 in Python. I would like to use SciPy.integrate.odeint, but I can not see how I can tell it to stop when the path is approximately closed.
Of course odeint may be much faster than integrating in Python, I could just let it go around blindly and look for closure in the results, but in the future I'll do much larger problems.
Is there a way that I can implement a "OK that's close enough - you can stop now!" method into odeint? Or should I just integrate for a while, check, integrate more, check...
This discussion seems relevant, and seems to suggest that "you can't from within SciPy" might be the answer.
Note: I usually use RK45 (Runge-Kutta-Fehlberg) which is more accurate at a given steop size to speed it up, but I kept it simple here. It also makes variable step size possible.
Update: But sometimes I need fixed step size. I've found that Scipy.integrate.ode does provide a testing/stopping method ode.solout(t, y) but doesn't seem to have the ability to evaluate at fixed points of t. odeint allows evaluation at fixed points of t, but doesn't seem to have a testing/stopping method.
def rk4Bds_stops(x, h, n, F, fclose=0.1):
h_over_two, h_over_six = h/2.0, h/6.0
watching = False
distance_max = 0.0
distance_old = -1.0
i = 0
while i < n and not (watching and greater):
k1 = F( x[i] )
k2 = F( x[i] + k1*h_over_two)
k3 = F( x[i] + k2*h_over_two)
k4 = F( x[i] + k3*h )
x[i+1] = x[i] + h_over_six * (k1 + 2.*(k2 + k3) + k4)
distance = np.sqrt(((x[i+1] - x[0])**2).sum())
distance_max = max(distance, distance_max)
getting_closer = distance < distance_old
if getting_closer and distance < fclose*distance_max:
watching = True
greater = distance > distance_old
distance_old = distance
i += 1
return i
def get_BrBztanVec(rz):
Brz = np.zeros(2)
B_zero = 0.5 * i * mu0 / a
zz = rz[1] - h
alpha = rz[0] / a
beta = zz / a
gamma = zz / rz[0]
Q = ((1.0 + alpha)**2 + beta**2)
k = np.sqrt(4. * alpha / Q)
C1 = 1.0 / (pi * np.sqrt(Q))
C2 = gamma / (pi * np.sqrt(Q))
C3 = (1.0 - alpha**2 - beta**2) / (Q - 4.0*alpha)
C4 = (1.0 + alpha**2 + beta**2) / (Q - 4.0*alpha)
E, K = spe.ellipe(k**2), spe.ellipk(k**2)
Brz[0] += B_zero * C2 * (C4*E - K)
Brz[1] += B_zero * C1 * (C3*E + K)
Bmag = np.sqrt((Brz**2).sum())
return Brz/Bmag
import numpy as np
import matplotlib.pyplot as plt
import scipy.special as spe
from scipy.integrate import odeint as ODEint
pi = np.pi
mu0 = 4.0 * pi * 1.0E-07
i = 1.0 # amperes
a = 1.0 # meters
h = 0.0 # meters
ds = 0.04 # step distance (meters)
r_list, z_list, n_list = [], [], []
dr_list, dz_list = [], []
r_try = np.linspace(0.15, 0.95, 17)
x = np.zeros((1000, 2))
nsteps = 500
for rt in r_try:
x[:] = np.nan
x[0] = np.array([rt, 0.0])
n = rk4Bds_stops(x, ds, nsteps, get_BrBztanVec)
n_list.append(n)
r, z = x[:n+1].T.copy() # make a copy is necessary
dr, dz = r[1:] - r[:-1], z[1:] - z[:-1]
r_list.append(r)
z_list.append(z)
dr_list.append(dr)
dz_list.append(dz)
plt.figure(figsize=[14, 8])
fs = 20
plt.subplot(2,3,1)
for r in r_list:
plt.plot(r)
plt.title("r", fontsize=fs)
plt.subplot(2,3,2)
for z in z_list:
plt.plot(z)
plt.title("z", fontsize=fs)
plt.subplot(2,3,3)
for r, z in zip(r_list, z_list):
plt.plot(r, z)
plt.title("r, z", fontsize=fs)
plt.subplot(2,3,4)
for dr, dz in zip(dr_list, dz_list):
plt.plot(dr, dz)
plt.title("dr, dz", fontsize=fs)
plt.subplot(2, 3, 5)
plt.plot(n_list)
plt.title("n", fontsize=fs)
plt.show()
What you need is 'event handling'. The scipy.integrate.odeint cannot do this yet. But you could use sundials (see https://pypi.python.org/pypi/python-sundials/0.5), which can do event handling.
The other option, keeping speed as a priority, is to simply code up rkf in cython. I have an implementation lying around which should be easy to change to stop after some criteria:
cythoncode.pyx
import numpy as np
cimport numpy as np
import cython
#cython: boundscheck=False
#cython: wraparound=False
cdef double a2 = 2.500000000000000e-01 # 1/4
cdef double a3 = 3.750000000000000e-01 # 3/8
cdef double a4 = 9.230769230769231e-01 # 12/13
cdef double a5 = 1.000000000000000e+00 # 1
cdef double a6 = 5.000000000000000e-01 # 1/2
cdef double b21 = 2.500000000000000e-01 # 1/4
cdef double b31 = 9.375000000000000e-02 # 3/32
cdef double b32 = 2.812500000000000e-01 # 9/32
cdef double b41 = 8.793809740555303e-01 # 1932/2197
cdef double b42 = -3.277196176604461e+00 # -7200/2197
cdef double b43 = 3.320892125625853e+00 # 7296/2197
cdef double b51 = 2.032407407407407e+00 # 439/216
cdef double b52 = -8.000000000000000e+00 # -8
cdef double b53 = 7.173489278752436e+00 # 3680/513
cdef double b54 = -2.058966861598441e-01 # -845/4104
cdef double b61 = -2.962962962962963e-01 # -8/27
cdef double b62 = 2.000000000000000e+00 # 2
cdef double b63 = -1.381676413255361e+00 # -3544/2565
cdef double b64 = 4.529727095516569e-01 # 1859/4104
cdef double b65 = -2.750000000000000e-01 # -11/40
cdef double r1 = 2.777777777777778e-03 # 1/360
cdef double r3 = -2.994152046783626e-02 # -128/4275
cdef double r4 = -2.919989367357789e-02 # -2197/75240
cdef double r5 = 2.000000000000000e-02 # 1/50
cdef double r6 = 3.636363636363636e-02 # 2/55
cdef double c1 = 1.157407407407407e-01 # 25/216
cdef double c3 = 5.489278752436647e-01 # 1408/2565
cdef double c4 = 5.353313840155945e-01 # 2197/4104
cdef double c5 = -2.000000000000000e-01 # -1/5
cdef class cyfunc:
cdef double dy[2]
cdef double* f(self, double* y):
return self.dy
def __cinit__(self):
pass
#cython.cdivision(True)
#cython.boundscheck(False)
#cython.wraparound(False)
cpdef rkf(cyfunc f, np.ndarray[double, ndim=1] times,
np.ndarray[double, ndim=1] x0,
double tol=1e-7, double dt_max=-1.0, double dt_min=1e-8):
# Initialize
cdef double t = times[0]
cdef int times_index = 1
cdef int add = 0
cdef double end_time = times[len(times) - 1]
cdef np.ndarray[double, ndim=1] res = np.empty_like(times)
res[0] = x0[1] # Only storing second variable
cdef double x[2]
x[:] = x0
cdef double k1[2]
cdef double k2[2]
cdef double k3[2]
cdef double k4[2]
cdef double k5[2]
cdef double k6[2]
cdef double r[2]
while abs(t - times[times_index]) < tol: # if t = 0 multiple times
res[times_index] = res[0]
t = times[times_index]
times_index += 1
if dt_max == -1.0:
dt_max = 5. * (times[times_index] - times[0])
cdef double dt = dt_max/10.0
cdef double tolh = tol*dt
while t < end_time:
# If possible, step to next time to save
if t + dt >= times[times_index]:
dt = times[times_index] - t;
add = 1
# Calculate Runga Kutta variables
k1 = f.f(x)
k1[0] *= dt; k1[1] *= dt;
r[0] = x[0] + b21 * k1[0]
r[1] = x[1] + b21 * k1[1]
k2 = f.f(r)
k2[0] *= dt; k2[1] *= dt;
r[0] = x[0] + b31 * k1[0] + b32 * k2[0]
r[1] = x[1] + b31 * k1[1] + b32 * k2[1]
k3 = f.f(r)
k3[0] *= dt; k3[1] *= dt;
r[0] = x[0] + b41 * k1[0] + b42 * k2[0] + b43 * k3[0]
r[1] = x[1] + b41 * k1[1] + b42 * k2[1] + b43 * k3[1]
k4 = f.f(r)
k4[0] *= dt; k4[1] *= dt;
r[0] = x[0] + b51 * k1[0] + b52 * k2[0] + b53 * k3[0] + b54 * k4[0]
r[1] = x[1] + b51 * k1[1] + b52 * k2[1] + b53 * k3[1] + b54 * k4[1]
k5 = f.f(r)
k5[0] *= dt; k5[1] *= dt;
r[0] = x[0] + b61 * k1[0] + b62 * k2[0] + b63 * k3[0] + b64 * k4[0] + b65 * k5[0]
r[1] = x[1] + b61 * k1[1] + b62 * k2[1] + b63 * k3[1] + b64 * k4[1] + b65 * k5[1]
k6 = f.f(r)
k6[0] *= dt; k6[1] *= dt;
# Find largest error
r[0] = abs(r1 * k1[0] + r3 * k3[0] + r4 * k4[0] + r5 * k5[0] + r6 * k6[0])
r[1] = abs(r1 * k1[1] + r3 * k3[1] + r4 * k4[1] + r5 * k5[1] + r6 * k6[1])
if r[1] > r[0]:
r[0] = r[1]
# If error is smaller than tolerance, take step
tolh = tol*dt
if r[0] <= tolh:
t = t + dt
x[0] = x[0] + c1 * k1[0] + c3 * k3[0] + c4 * k4[0] + c5 * k5[0]
x[1] = x[1] + c1 * k1[1] + c3 * k3[1] + c4 * k4[1] + c5 * k5[1]
# Save if at a save time index
if add:
while abs(t - times[times_index]) < tol:
res[times_index] = x[1]
t = times[times_index]
times_index += 1
add = 0
# Update time stepping
dt = dt * min(max(0.84 * ( tolh / r[0] )**0.25, 0.1), 4.0)
if dt > dt_max:
dt = dt_max
elif dt < dt_min: # Equations are too stiff
return res*0 - 100 # or something
# ADD STOPPING CONDITION HERE...
return res
cdef class F(cyfunc):
cdef double a
def __init__(self, double a):
self.a = a
cdef double* f(self, double y[2]):
self.dy[0] = self.a*y[1] - y[0]
self.dy[1] = y[0] - y[1]**2
return self.dy
The code can be run by
test.py
import numpy as np
import matplotlib.pyplot as plt
import pyximport
pyximport.install(setup_args={'include_dirs': np.get_include()})
from cythoncode import rkf, F
x0 = np.array([1, 0], dtype=np.float64)
f = F(a=0.1)
t = np.linspace(0, 30, 100)
y = rkf(f, t, x0)
plt.plot(t, y)
plt.show()