How to get SciPy.integrate.odeint to stop when path is closed? - python

edit: It's been five years, has SciPy.integrate.odeint learned to stop yet?
The script below integrates magnetic field lines around closed paths and stops when it returns to original value within some tolerance, using Runge-Kutta RK4 in Python. I would like to use SciPy.integrate.odeint, but I can not see how I can tell it to stop when the path is approximately closed.
Of course odeint may be much faster than integrating in Python, I could just let it go around blindly and look for closure in the results, but in the future I'll do much larger problems.
Is there a way that I can implement a "OK that's close enough - you can stop now!" method into odeint? Or should I just integrate for a while, check, integrate more, check...
This discussion seems relevant, and seems to suggest that "you can't from within SciPy" might be the answer.
Note: I usually use RK45 (Runge-Kutta-Fehlberg) which is more accurate at a given steop size to speed it up, but I kept it simple here. It also makes variable step size possible.
Update: But sometimes I need fixed step size. I've found that Scipy.integrate.ode does provide a testing/stopping method ode.solout(t, y) but doesn't seem to have the ability to evaluate at fixed points of t. odeint allows evaluation at fixed points of t, but doesn't seem to have a testing/stopping method.
def rk4Bds_stops(x, h, n, F, fclose=0.1):
h_over_two, h_over_six = h/2.0, h/6.0
watching = False
distance_max = 0.0
distance_old = -1.0
i = 0
while i < n and not (watching and greater):
k1 = F( x[i] )
k2 = F( x[i] + k1*h_over_two)
k3 = F( x[i] + k2*h_over_two)
k4 = F( x[i] + k3*h )
x[i+1] = x[i] + h_over_six * (k1 + 2.*(k2 + k3) + k4)
distance = np.sqrt(((x[i+1] - x[0])**2).sum())
distance_max = max(distance, distance_max)
getting_closer = distance < distance_old
if getting_closer and distance < fclose*distance_max:
watching = True
greater = distance > distance_old
distance_old = distance
i += 1
return i
def get_BrBztanVec(rz):
Brz = np.zeros(2)
B_zero = 0.5 * i * mu0 / a
zz = rz[1] - h
alpha = rz[0] / a
beta = zz / a
gamma = zz / rz[0]
Q = ((1.0 + alpha)**2 + beta**2)
k = np.sqrt(4. * alpha / Q)
C1 = 1.0 / (pi * np.sqrt(Q))
C2 = gamma / (pi * np.sqrt(Q))
C3 = (1.0 - alpha**2 - beta**2) / (Q - 4.0*alpha)
C4 = (1.0 + alpha**2 + beta**2) / (Q - 4.0*alpha)
E, K = spe.ellipe(k**2), spe.ellipk(k**2)
Brz[0] += B_zero * C2 * (C4*E - K)
Brz[1] += B_zero * C1 * (C3*E + K)
Bmag = np.sqrt((Brz**2).sum())
return Brz/Bmag
import numpy as np
import matplotlib.pyplot as plt
import scipy.special as spe
from scipy.integrate import odeint as ODEint
pi = np.pi
mu0 = 4.0 * pi * 1.0E-07
i = 1.0 # amperes
a = 1.0 # meters
h = 0.0 # meters
ds = 0.04 # step distance (meters)
r_list, z_list, n_list = [], [], []
dr_list, dz_list = [], []
r_try = np.linspace(0.15, 0.95, 17)
x = np.zeros((1000, 2))
nsteps = 500
for rt in r_try:
x[:] = np.nan
x[0] = np.array([rt, 0.0])
n = rk4Bds_stops(x, ds, nsteps, get_BrBztanVec)
n_list.append(n)
r, z = x[:n+1].T.copy() # make a copy is necessary
dr, dz = r[1:] - r[:-1], z[1:] - z[:-1]
r_list.append(r)
z_list.append(z)
dr_list.append(dr)
dz_list.append(dz)
plt.figure(figsize=[14, 8])
fs = 20
plt.subplot(2,3,1)
for r in r_list:
plt.plot(r)
plt.title("r", fontsize=fs)
plt.subplot(2,3,2)
for z in z_list:
plt.plot(z)
plt.title("z", fontsize=fs)
plt.subplot(2,3,3)
for r, z in zip(r_list, z_list):
plt.plot(r, z)
plt.title("r, z", fontsize=fs)
plt.subplot(2,3,4)
for dr, dz in zip(dr_list, dz_list):
plt.plot(dr, dz)
plt.title("dr, dz", fontsize=fs)
plt.subplot(2, 3, 5)
plt.plot(n_list)
plt.title("n", fontsize=fs)
plt.show()

What you need is 'event handling'. The scipy.integrate.odeint cannot do this yet. But you could use sundials (see https://pypi.python.org/pypi/python-sundials/0.5), which can do event handling.
The other option, keeping speed as a priority, is to simply code up rkf in cython. I have an implementation lying around which should be easy to change to stop after some criteria:
cythoncode.pyx
import numpy as np
cimport numpy as np
import cython
#cython: boundscheck=False
#cython: wraparound=False
cdef double a2 = 2.500000000000000e-01 # 1/4
cdef double a3 = 3.750000000000000e-01 # 3/8
cdef double a4 = 9.230769230769231e-01 # 12/13
cdef double a5 = 1.000000000000000e+00 # 1
cdef double a6 = 5.000000000000000e-01 # 1/2
cdef double b21 = 2.500000000000000e-01 # 1/4
cdef double b31 = 9.375000000000000e-02 # 3/32
cdef double b32 = 2.812500000000000e-01 # 9/32
cdef double b41 = 8.793809740555303e-01 # 1932/2197
cdef double b42 = -3.277196176604461e+00 # -7200/2197
cdef double b43 = 3.320892125625853e+00 # 7296/2197
cdef double b51 = 2.032407407407407e+00 # 439/216
cdef double b52 = -8.000000000000000e+00 # -8
cdef double b53 = 7.173489278752436e+00 # 3680/513
cdef double b54 = -2.058966861598441e-01 # -845/4104
cdef double b61 = -2.962962962962963e-01 # -8/27
cdef double b62 = 2.000000000000000e+00 # 2
cdef double b63 = -1.381676413255361e+00 # -3544/2565
cdef double b64 = 4.529727095516569e-01 # 1859/4104
cdef double b65 = -2.750000000000000e-01 # -11/40
cdef double r1 = 2.777777777777778e-03 # 1/360
cdef double r3 = -2.994152046783626e-02 # -128/4275
cdef double r4 = -2.919989367357789e-02 # -2197/75240
cdef double r5 = 2.000000000000000e-02 # 1/50
cdef double r6 = 3.636363636363636e-02 # 2/55
cdef double c1 = 1.157407407407407e-01 # 25/216
cdef double c3 = 5.489278752436647e-01 # 1408/2565
cdef double c4 = 5.353313840155945e-01 # 2197/4104
cdef double c5 = -2.000000000000000e-01 # -1/5
cdef class cyfunc:
cdef double dy[2]
cdef double* f(self, double* y):
return self.dy
def __cinit__(self):
pass
#cython.cdivision(True)
#cython.boundscheck(False)
#cython.wraparound(False)
cpdef rkf(cyfunc f, np.ndarray[double, ndim=1] times,
np.ndarray[double, ndim=1] x0,
double tol=1e-7, double dt_max=-1.0, double dt_min=1e-8):
# Initialize
cdef double t = times[0]
cdef int times_index = 1
cdef int add = 0
cdef double end_time = times[len(times) - 1]
cdef np.ndarray[double, ndim=1] res = np.empty_like(times)
res[0] = x0[1] # Only storing second variable
cdef double x[2]
x[:] = x0
cdef double k1[2]
cdef double k2[2]
cdef double k3[2]
cdef double k4[2]
cdef double k5[2]
cdef double k6[2]
cdef double r[2]
while abs(t - times[times_index]) < tol: # if t = 0 multiple times
res[times_index] = res[0]
t = times[times_index]
times_index += 1
if dt_max == -1.0:
dt_max = 5. * (times[times_index] - times[0])
cdef double dt = dt_max/10.0
cdef double tolh = tol*dt
while t < end_time:
# If possible, step to next time to save
if t + dt >= times[times_index]:
dt = times[times_index] - t;
add = 1
# Calculate Runga Kutta variables
k1 = f.f(x)
k1[0] *= dt; k1[1] *= dt;
r[0] = x[0] + b21 * k1[0]
r[1] = x[1] + b21 * k1[1]
k2 = f.f(r)
k2[0] *= dt; k2[1] *= dt;
r[0] = x[0] + b31 * k1[0] + b32 * k2[0]
r[1] = x[1] + b31 * k1[1] + b32 * k2[1]
k3 = f.f(r)
k3[0] *= dt; k3[1] *= dt;
r[0] = x[0] + b41 * k1[0] + b42 * k2[0] + b43 * k3[0]
r[1] = x[1] + b41 * k1[1] + b42 * k2[1] + b43 * k3[1]
k4 = f.f(r)
k4[0] *= dt; k4[1] *= dt;
r[0] = x[0] + b51 * k1[0] + b52 * k2[0] + b53 * k3[0] + b54 * k4[0]
r[1] = x[1] + b51 * k1[1] + b52 * k2[1] + b53 * k3[1] + b54 * k4[1]
k5 = f.f(r)
k5[0] *= dt; k5[1] *= dt;
r[0] = x[0] + b61 * k1[0] + b62 * k2[0] + b63 * k3[0] + b64 * k4[0] + b65 * k5[0]
r[1] = x[1] + b61 * k1[1] + b62 * k2[1] + b63 * k3[1] + b64 * k4[1] + b65 * k5[1]
k6 = f.f(r)
k6[0] *= dt; k6[1] *= dt;
# Find largest error
r[0] = abs(r1 * k1[0] + r3 * k3[0] + r4 * k4[0] + r5 * k5[0] + r6 * k6[0])
r[1] = abs(r1 * k1[1] + r3 * k3[1] + r4 * k4[1] + r5 * k5[1] + r6 * k6[1])
if r[1] > r[0]:
r[0] = r[1]
# If error is smaller than tolerance, take step
tolh = tol*dt
if r[0] <= tolh:
t = t + dt
x[0] = x[0] + c1 * k1[0] + c3 * k3[0] + c4 * k4[0] + c5 * k5[0]
x[1] = x[1] + c1 * k1[1] + c3 * k3[1] + c4 * k4[1] + c5 * k5[1]
# Save if at a save time index
if add:
while abs(t - times[times_index]) < tol:
res[times_index] = x[1]
t = times[times_index]
times_index += 1
add = 0
# Update time stepping
dt = dt * min(max(0.84 * ( tolh / r[0] )**0.25, 0.1), 4.0)
if dt > dt_max:
dt = dt_max
elif dt < dt_min: # Equations are too stiff
return res*0 - 100 # or something
# ADD STOPPING CONDITION HERE...
return res
cdef class F(cyfunc):
cdef double a
def __init__(self, double a):
self.a = a
cdef double* f(self, double y[2]):
self.dy[0] = self.a*y[1] - y[0]
self.dy[1] = y[0] - y[1]**2
return self.dy
The code can be run by
test.py
import numpy as np
import matplotlib.pyplot as plt
import pyximport
pyximport.install(setup_args={'include_dirs': np.get_include()})
from cythoncode import rkf, F
x0 = np.array([1, 0], dtype=np.float64)
f = F(a=0.1)
t = np.linspace(0, 30, 100)
y = rkf(f, t, x0)
plt.plot(t, y)
plt.show()

Related

Solver Issue No Algorithm Found

Trying to replace the function U2(x,y,z) with specified values of x,y,z. Not sure how to do that with sympy because they are as "x = arange.(-h,h,0.001)" as seen in the code below.
Below you will find my implementation with sympy. Additionally I am using PyCharm.
This implementation is based on Dr. Annabestani and Dr. Naghavis' paper: A 3D analytical ion transport model for ionic polymer metal composite actuators in large bending deformations
import sympy as sp
h = 0.1 # [mm] half of thickness
W: float = 6 # [mm] width
L: float = 28 # [mm] length
F: float = 96458 # [C/mol] Faraday's constant
k_e = 1.34E-6 # [F/m]
Y = 5.71E8 # [Pa]
d = 1.03 - 11 # [m^2/s] diffiusitivity coefficient
T = 293 # [K]
C_minus = 1200 # [mol/m^3] Cation concentration
C_plus = 1200 # [mol/m^3] anion concentration
R = 8.3143 # [J/mol*K] Gas constant
Vol = 2*h*W*L
#dVol = diff(Vol,x) + diff(Vol, y) + diff(Vol, z) # change in Volume
theta = 1 / W
x, y, z, m, n, p, t = sp.symbols('x y z m n p t')
V_1 = 0.5 * sp.sin(2 * sp.pi * t) # Voltage as a function of time
k_f = 0.5
t_f = 44
k_g = 4.5
t_g = 0.07
B_mnp = 0.003
b_mnp: float = B_mnp
gamma_hat_2 = 0.04
gamma_hat_5 = 0.03
gamma_hat_6 = 5E-3
r_M = 0.15 # membrane resistance
r_ew = 0.175 # transverse resistance of electrode
r_el = 0.11 # longitudinal resistance of electrode
mu = 2.4
sigma_not = 0.1
a_L: float = 1.0 # distrubuted surface attentuation
r_hat = sp.sqrt(r_M ** 2 + r_ew ** 2 + r_el ** 2)
lambda_1 = 0.0001
dVol = 1
K = (F ** 2 * C_minus * d * (1 - C_minus * dVol)) / (R * T * k_e) # also K = a
K_hat = (K-lambda_1)/d
gamma_1 = 1.0
gamma_2 = 1.0
gamma_3 = 1.0
gamma_4 = 1.0
gamma_5 = 1.0
gamma_6 = 1.0
gamma_7 = 1.0
small_gamma_1 = 1.0
small_gamma_2 = 1.0
small_gamma_3 = 1.0
psi = gamma_1*x + gamma_2*y + gamma_3*z + gamma_4*x*y + gamma_5*x*z + gamma_6*y*z + gamma_7*x*y*z + (small_gamma_1/2)*x**2 + (small_gamma_2/2)*y**2 + (small_gamma_3/2)*x*z**2
psi_hat_part = ((sp.sin(((m + 1) * sp.pi) / 2 * h)) * x) * ((sp.sin(((n + 1) * sp.pi) / W)) * y) * ((sp.sin(((p + 1) * sp.pi) / L)) * z)
psi_hat = psi * psi_hat_part # Eqn. 19
print(psi_hat)
x1: float = -h
x2: float = h
y1: float = 0
y2: float = W
z1: float = 0
z2: float = L
I = psi_hat.integrate((x, x1, x2), (y, y1, y2), (z, z1, z2)) # Integration for a_mnp Eqn. 18
A_mnp = ((8 * K_hat) / (2 * h * W * L)) * I
Partial = A_mnp * ((sp.sin(((m + 1) * sp.pi) / 2 * h)) * x) * ((sp.sin(((n + 1) * sp.pi) / W)) * y) * ((sp.sin(((p + 1) * sp.pi) / L)) * z)
start = Partial.integrate((p, 0 , 10E9), (n, 0, 10E9), (m, 0, 10E9)) #when using infinity it goes weird, also integrating leads to higher thresholds than summation
a_mnp_denom = (((sp.sin(((m + 1) * sp.pi) / 2 * h)) ** 2) * ((sp.sin(((n + 1) * sp.pi) / W)) ** 2) * (
(sp.sin(((p + 1) * sp.pi) / L)) ** 2) + K_hat)
a_mnp = A_mnp / a_mnp_denom # Eqn. 18
U2 = sp.Function("U2")
U2 = a_mnp * ((sp.sin(((m + 1) * sp.pi) / 2 * h)) * x) * ((sp.sin(((n + 1) * sp.pi) / W)) * y) * (
(sp.sin(((p + 1) * sp.pi) / L)) * z) # Eqn. 13
x = np.arange(-h, h, 0.001)
y = np.arange(-h, h, 0.001)
z = np.arange(-h, h, 0.001)
f= sp.subs((U2), (x ,y ,z))
I currently get the error message: ValueError: subs accepts either 1 or 2 arguments. So that means I can't use the subs() method and replace() also doesn't work too well. Are there any other methods one can use?
Any help will be grateful, thank you!
Oscar is right: you are trying to deal with too much of the problem at once. That aside, Numpy and SymPy do not work like you think they do. What were you hoping to see when you replaced 3 variables, each with a range?
You cannot replace a SymPy variable/Symbol with a Numpy arange object, but you can replace a Symbol with a single value:
>>> from sympy.abc import x, y
>>> a = 1.0
>>> u = x + y + a
>>> u.subs(x, 1)
y + 2.0
>>> u.subs([(x,1), (y,2)])
4.0
You might iterate over the arange values, creating values of f and then doing something with each value:
f = lambda v: u.subs(dict(zip((x,y),v)))
for xi in range(1,3): # replace range with your arange call
for yi in range(-4,-2):
fi = f((xi,yi))
print(xi,yi,fi)
Be careful about iterating and using x or y as your loop variable, however, since that will then lose the assignment of the Symbol to that variable,
for x in range(2):
print(u.subs(x, x)) # no change and x is no longer a Symbol, it is now an int

Why Won't This Python Code match the Formula for a European Call Option?

import math
import numpy as np
S0 = 100.; K = 100.; T = 1.0; r = 0.05; sigma = 0.2
M = 100; dt = T / M; I = 500000
S = np.zeros((M + 1, I))
S[0] = S0
for t in range(1, M + 1):
z = np.random.standard_normal(I)
S[t] = S[t - 1] * np.exp((r - 0.5 * sigma ** 2) * dt + sigma *
math.sqrt(dt) * z)
C0 = math.exp(-r * T) * np.sum(np.maximum(S[-1] - K, 0)) / I
print ("European Option Value is ", C0)
It gives a value of around 10.45 as you increase the number of simulations, but using the B-S formula the value should be around 10.09. Anybody know why the code isn't giving a number closer to the formula?

Odeint Error - Excess work done on this call

I am writing a code to solve coupled harmonic oscillator equations using odeint from scipy. I want to add a random number to one of the equations at every time step of the ODESolver. To do this, I have written two time dependent constants, and used them. However, this gives me the following error.
ODEintWarning: Excess work done on this call (perhaps wrong Dfun type). Run
with full_output = 1 to get quantitative information.
warnings.warn(warning_msg, ODEintWarning)
My code is given below.
import matplotlib.pyplot as plt
import numpy as np
from scipy.integrate import odeint
import scipy.stats as stats
from scipy.stats import beta
m1 = 1.1
m2 = 1.0
k1 = 1000.0
k2 = 1000.0
k12 = 100
g = 0.0
global Steps
Steps = 0
x10 = 1
x20 = 0
alpha = 1
a = 2
b = 3
v10 = 0
v20 = 0
#A = np.random.beta(a,b, 10) * alpha
#B = np.random.beta(a,b, 10) * alpha
def c(t):
return np.random.beta(a,b) * alpha
def d(t):
return np.random.beta(a,b) * alpha
def f(x, t, c, d):
y = []
y.append(x[1] - c(t) * x[0])
#print(c(t))
y.append(-(k1 + k12) / m1 * x[0] + k12 / m1 * x[2] - 2 * g * x[1] - c(t) * x[1])
y.append(x[3] - d(t) * x[2])
y.append(-(k2 + k12) / m2 * x[2] + k12 / m2 * x[0] - 2 * g * x[3] - d(t) * x[3])
return y
b0 = [x10, v10, x20, v20]
b0 = np.array(b0)
args = (c, d)
t = np.linspace(0, 1, 1000 )
t = np.array(t)
X1, infodict = odeint(f, b0, t, args, full_output = 1)
X1 = X1.T
Q1 = X1[0]
Q2 = X1[2]
plt.plot(t, Q1, 'g-')
plt.plot(t, Q2, 'b-')
plt.show()
a = m1*m2
b = -(m1*(k2 + k12) + m2*(k1 + k12))
c = k1*k2 + k12*(k1 + k2)
wp = np.sqrt((-b + np.sqrt(b**2 - 4*a*c))/(2*a))
wm = np.sqrt((-b - np.sqrt(b**2 - 4*a*c))/(2*a))
print(wp)
print(wm)
f = open('simdata.csv', mode='w')
for i in range(len(t)):
p = str(t[i]) + ',' + str(Q1[i]) + ',' + str(Q2[i]) + '\n'
f.write(p)
f.close()

Error "unable to find vcvarsall.bat" despite newest visual studio version

I'm working in Jupyter Notebook. When I want to compile a .pyx in cython, it throws an error similar to this:
%run -i setup.py build_ext --inplace
unable to find vcvarsall.bat
The setup.py file looks like this:
from distutils.core import setup
from Cython.Build import cythonize
setup(
ext_modules=cythonize("hh_vers_vector.pyx"),
)
This only happens, however, on my computer at work. At the one at home, it works just fine.
It is probably an issue with Visual Studio as explained here. The thing is, I installed the very same version of Visual Studio 2017 Community on both computers. The latest Anaconda 3 version is installed on both computers. Both use Python 3.6.2 and IPython 6.1.0. So how can that be? Both run with Windows 10. I'll also show you my .pyx file. If you need more information I will edit my post.
from math import exp
import numpy as np
import time
def hhModel(*params, Iext, float dt, int Vref):
## Unwrap params argument: these variables are going to be optimized
cdef float ENa = params[0]
cdef float EK = params[1]
cdef float EL = params[2]
cdef float GNa = params[3]
cdef float GK = params[4]
cdef float GL = params[5]
## Input paramters
# I : a list containing external current steps, your stimulus vector [nA]
# dt : a crazy time parameter [ms]
# Vref : reference potential [mV]
def alphaM(float v, float vr): return 0.1 * (v-vr-25) / ( 1 - exp(-(v-vr-25)/10) )
def betaM(float v, float vr): return 4 * exp(-(v-vr)/18)
def alphaH(float v, float vr): return 0.07 * exp(-(v-vr)/20)
def betaH(float v, float vr): return 1 / ( 1 + exp( -(v-vr-30)/10 ) )
def alphaN(float v, float vr): return 0.01 * (v-vr-10) / ( 1 - exp(-(v-vr-10)/10) )
def betaN(float v, float vr): return 0.125 * exp(-(v-vr)/80)
## steady-state values and time constants of m,h,n
def m_infty(float v, float vr): return alphaM(v,vr) / ( alphaM(v,vr) + betaM(v,vr) )
def h_infty(float v, float vr): return alphaH(v,vr) / ( alphaH(v,vr) + betaH(v,vr) )
def n_infty(float v, float vr): return alphaN(v,vr) / ( alphaN(v,vr) + betaN(v,vr) )
## parameters
cdef float Cm, gK, gL, INa, IK, IL, dv_dt, dm_dt, dh_dt, dn_dt, aM, bM, aH, bH, aN, bN
cdef float Smemb = 4000 # [um^2] surface area of the membrane
cdef float Cmemb = 1 # [uF/cm^2] membrane capacitance density
Cm = Cmemb * Smemb * 1e-8 # [uF] membrane capacitance
gNa = GNa * Smemb * 1e-8 # Na conductance [mS]
gK = GK * Smemb * 1e-8 # K conductance [mS]
gL = GL * Smemb * 1e-8 # leak conductance [mS]
# numSamples = int(T/dt);
cdef int numSamples = len(Iext);
# DEF numSamples = 200000
# initial values
cdef float[:] v = np.empty(numSamples, dtype=np.float)
cdef float[:] m = np.empty(numSamples, dtype=np.float)
cdef float[:] h = np.empty(numSamples, dtype=np.float)
cdef float[:] n = np.empty(numSamples, dtype=np.float)
#cdef float v[numSamples]
#cdef float m[numSamples]
#cdef float h[numSamples]
#cdef float n[numSamples]
v[0] = Vref # initial membrane potential
m[0] = m_infty(v[0], Vref) # initial m
h[0] = h_infty(v[0], Vref) # initial h
n[0] = n_infty(v[0], Vref) # initial n
## calculate membrane response step-by-step
for j in range(0, numSamples-1):
# ionic currents: g[mS] * V[mV] = I[uA]
INa = gNa * m[j]*m[j]*m[j] * h[j] * (ENa-v[j])
IK = gK * n[j]*n[j]*n[j]*n[j] * (EK-v[j])
IL = gL * (EL-v[j])
# derivatives
# I[uA] / C[uF] * dt[ms] = dv[mV]
dv_dt = ( INa + IK + IL + Iext[j]*1e-3) / Cm;
aM = 0.1 * (v[j]-Vref-25) / ( 1 - exp(-(v[j]-Vref-25)/10))
bM = 4 * exp(-(v[j]-Vref)/18)
aH = 0.07 * exp(-(v[j]-Vref)/20)
bH = 1 / ( 1 + exp( -(v[j]-Vref-30)/10 ) )
aN = 0.01 * (v[j]-Vref-10) / ( 1 - exp(-(v[j]-Vref-10)/10) )
bN = 0.125 * exp(-(v[j]-Vref)/80)
dm_dt = (1-m[j])* aM - m[j]*bM
dh_dt = (1-h[j])* aH - h[j]*bH
dn_dt = (1-n[j])* aN - n[j]*bN
# calculate next step
v[j+1] = (v[j] + dv_dt * dt)
m[j+1] = (m[j] + dm_dt * dt)
h[j+1] = (h[j] + dh_dt * dt)
n[j+1] = (n[j] + dn_dt * dt)
return v

Cython code 3x slower than corresponding NumPy version

I'm currently writing my thesis on the use of particle filters for filtering out latent states in stochastic volatility models. To improve the filtering results I've added option prices as an observed process. This means that for a given time series, I have to calculate the option prices at each time step - a "normal" time series is 100-200 points.
Without going too deep into the algorithm, I'm having a serious problem with performance. The last for-loop loops over all of the particles that I use, which is somewhere around a 1,000 (as determined by M). Running this code for only one particle takes 0.25 seconds - which means that it takes around 4 minutes per time step to run using 1,000 particles (which is rather infeasible).
from __future__ import division
import numpy as np
import numexpr as ne
from fftInC import fft
import time
import math
import pyfftw
def HestonCallPrice(M, N, S, V, t, T, strikes, r, param, b, NFFT, inp, v, alphaC, eta, k, weights):
"""
This will be the pricing function for the European call option. Since we found the
quadrature procedure to be too slow we shall move on to use FFT instead.
So, we begin defining all of the constants etc.
"""
vT, weightsT, inpJ, vJT = v.T, weights.T, inp * 1j, v.T * 1j
p1, p2, p3_2, p3, p4 = param[1,:], param[2,:], param[3,:], np.sqrt(param[3,:]), param[4,:]
"""
Next we move on to the calculations. These have been found to be rather fast, and hence do not
need any Cythonization.
"""
gamma = p3_2 / 2
beta = ne.evaluate("p1 - p4 * p3 * 1j * inp")
alpha = ne.evaluate("(-inp**2 - inpJ)/2")
d = ne.evaluate("sqrt(beta**2 - 4 * alpha * gamma)")
r_pos, r_neg = ne.evaluate("(beta + d)/(2 * gamma)"), ne.evaluate("(beta - d)/(2 * gamma)")
g, inpJT = ne.evaluate("r_neg / r_pos"), inpJ.T
D = ne.evaluate("r_neg * (1 - exp( -d * (T - t) ) ) / (1 - g * exp( -d * (T - t) ) )" )
C = ne.evaluate("p1 * (r_neg*(T - t) - 2 / p3_2 * log( (1 - g*exp(-d*(T - t)))/(1 - g) ) )")
A = 1j * inp.T * (math.log(S) + r * (T - t))
C_tmp = (C * p2).T
"""
The matrices and vectors that are sent into the Cython version of the code are
A = (1, 2048)
C_tmp = (4, 2048)
D.T = (4, 2048)
V = (4, 1000)
vJT[0, :] = (2048,)
k[:, 0] = (2048,)
weights.T[0, :] = (2048,)
This is now where we call the Cython script.
"""
start = time.time()
prices = fft(A, float(r), float(t), float(T), C_tmp, D.T, V, float(alphaC), vJT[0, :], k[:, 0],
float(b), strikes, float(eta), weights.T[0, :])
print 'Cythonized version: ', time.time() - start, ' seconds'
"""
The below code is the original code which has been "cythonized".
"""
start = time.time()
outPrices = np.empty( (M, N) )
prices = np.empty( (M * N, len(strikes)) )
"""
Regularly I use pyFFTW since it's a bit faster, but I couldn't figure out how to use the C
version of this, so to be fair when comparing speeds I disable pyFFTW. However, turning this on
using the below settings it's 20-30% faster.
"""
# fftIn = pyfftw.n_byte_align_empty((N, NFFT), 16, 'complex128')
#
# fftOut = fftIn.copy()
#
# fft_object = pyfftw.FFTW(fftIn, fftOut, nthreads=8)
for j in range( len(strikes) ):
position = (np.log(strikes[j]) + b) / ( 2 * b / NFFT)
x_1 = np.exp( k[ int(math.floor(position)) ] )
x_2 = np.exp( k[ int(math.ceil(position)) ] )
for m in range(M):
C_m, D_m, V_m = C_tmp[m, :], D[:, m].T, V[m, :][:, np.newaxis]
F_cT = ne.evaluate("exp( -r*(T - t) ) * exp(C_m + D_m * V_m + A) / \
( (alphaC + vJT) * (alphaC + 1 + vJT) )")
toFFT = ne.evaluate("exp( b * vJT ) * F_cT * eta / 3 * weightsT")
price = np.exp( -alphaC * k.T ) / math.pi * np.real ( np.fft.fft(toFFT) )
y_1 = price[ :, int(math.floor(position)) ]
y_2 = price[ :, int(math.ceil(position)) ]
dydx = (y_2 - y_1)/(x_2 - x_1)
outPrices[m, :] = dydx * (strikes[j] - x_1) + y_1
prices[:, j] = outPrices.reshape(M * N)
print 'Non-cythonized version: ', time.time() - start, ' seconds'
return prices
" ------ Defining constants etc, nothing to say really ----- "
M, N, S, t, T, r, NFFT, alphaC = 1, 1000, 1000, 0, 1, 0, 2048, 1.5
strikes = np.array([900, 1100])
c, V = 600, np.random.normal(loc=0.2, scale=0.05, size=(M, N))
param = np.repeat(np.array([0.05, 0.5, 0.15, 0.15**2, 0]), M).reshape((5, M))
eta = c / NFFT
b = np.pi / eta
j = np.arange(1, NFFT+1)[:, np.newaxis]
v, k = eta * (j - 1), -b + 2 * b/ NFFT*(j - 1)
inp = v - (alphaC + 1)*1j
weights = 3 + (-1)**j - np.array([1] + [0]*(NFFT-1))[:, np.newaxis]
" ------------------------------------------------------------- "
HestonCallPrice(M, N, S, V, t, T, strikes, r, param, b, NFFT, inp, v, alphaC, eta, k, weights)
I found that the bottleneck is the last for-loop. I got a tip to rewrite the for-loop in Cython instead, see below
" --------------------------------- C IMPORTED PACKAGES ------------------------------------------ "
from __future__ import division
import cython
cimport cython
import math
cimport numpy as np
import numpy as np
import pyfftw
" ------------------------------------------------------------------------------------------------ "
"""
I heard that the boundscheck and wraparound functions could improve the performance, but I didn't
notice any performance gain whatsoever.
"""
#cython.profile(False)
#cython.boundscheck(False)
#cython.wraparound(False)
def fft(np.ndarray[double complex, ndim=2] A, float r, float t, float T,
np.ndarray[double complex, ndim=2] C, np.ndarray[double complex, ndim=2] D,
np.ndarray[double, ndim=2] V, float alphaC, np.ndarray[double complex, ndim=1] vJT,
np.ndarray[double, ndim=1] k, float b,
np.ndarray[long, ndim=1] strikes, float eta,
np.ndarray[long, ndim=1] weightsT):
cdef int M = V.shape[0]
cdef int N = V.shape[1]
cdef int NFFT = D.shape[1]
cdef np.ndarray[double complex, ndim=1] F_cT
cdef np.ndarray[double complex, ndim=2] toFFT = np.empty( (N, NFFT), dtype=complex)
cdef np.ndarray[double, ndim=2] prices
cdef float x_1, x_2, position
cdef np.ndarray[double, ndim=1] y_1
cdef np.ndarray[double, ndim=1] y_2
cdef np.ndarray[double, ndim=1] dydx
cdef int m, j, n
cdef np.ndarray[double, ndim=2] price = np.empty( (M * N, len(strikes)) )
cdef np.ndarray[double complex, ndim=1] A_inp = A[0, :]
for j in range( len(strikes) ):
position = (math.log(strikes[j]) + b) / ( 2 * b / NFFT)
x_1 = math.exp ( k[ int(math.floor(position)) ] )
x_2 = math.exp ( k[ int(math.ceil(position)) ] )
for m in range(M):
"""
M is the number of rows we have in A, C, D and V, so we need to loop over all of those.
"""
for n in range(N):
"""
Next we loop over all of the elements for each row in V, corresponding to N. For
us this corresponds to 1000 (if you haven't changed to N in the main program).
Each of the rows of A, C and D are 2048 in length. So I tried to loop over all of
those as well as for each n, but this made the code 4 times slower.
"""
F_cT = math.exp( -r*(T - t) ) * np.exp (A_inp + C[m, :] + D[m, :] * V[m, n]) / \
( (alphaC + vJT) * (alphaC + 1 + vJT) )
toFFT[n, :] = np.exp (b * vJT) * F_cT * eta / 3 * weightsT
"""
I'm guessing FFT'ing is rather slow using NumPy in Cython?
"""
prices = np.exp ( -alphaC * k ) / math.pi * np.real ( np.fft.fft(toFFT) )
y_1 = prices[ :, int(math.floor(position)) ]
y_2 = prices[ :, int(math.ceil(position)) ]
dydx = (y_2 - y_1)/(x_2 - x_1)
price[m * N:(m + 1) * N, j] = dydx * (strikes[j] - x_1) + y_1
return price
I'm compiling the code as
from distutils.core import setup, Extension
from Cython.Distutils import build_ext
import numpy.distutils.misc_util
include_dirs = numpy.distutils.misc_util.get_numpy_include_dirs()
setup(
name = 'fftInC',
ext_modules = [Extension('fftInC', ['fftInC.pyx'], include_dirs=include_dirs)],
cmdclass = {'build_ext':build_ext}
)
But to my surprise, the Cython version is about 3x slower than the original one. And I can't really figure out where I'm going wrong. I think I've defined the input types correctly (which I understand should give a considerable performance boost).
My question is therefore: Can you identify where I'm going wrong? Is it the type definition, for-loops or FFT'ing (or something else)?

Categories