I am having some trouble translating my MATLAB code into Python via Scipy & Numpy. I am stuck on how to find optimal parameter values (k0 and k1) for my system of ODEs to fit to my ten observed data points. I currently have an initial guess for k0 and k1. In MATLAB, I can using something called 'fminsearch' which is a function that takes the system of ODEs, the observed data points, and the initial values of the system of ODEs. It will then calculate a new pair of parameters k0 and k1 that will fit the observed data. I have included my code to see if you can help me implement some kind of 'fminsearch' to find the optimal parameter values k0 and k1 that will fit my data. I want to add whatever code to do this to my lsqtest.py file.
I have three .py files - ode.py, lsq.py, and lsqtest.py
ode.py:
def f(y, t, k):
return (-k[0]*y[0],
k[0]*y[0]-k[1]*y[1],
k[1]*y[1])
lsq.py:
import pylab as py
import numpy as np
from scipy import integrate
from scipy import optimize
import ode
def lsq(teta,y0,data):
#INPUT teta, the unknowns k0,k1
# data, observed
# y0 initial values needed by the ODE
#OUTPUT lsq value
t = np.linspace(0,9,10)
y_obs = data #data points
k = [0,0]
k[0] = teta[0]
k[1] = teta[1]
#call the ODE solver to get the states:
r = integrate.odeint(ode.f,y0,t,args=(k,))
#the ODE system in ode.py
#at each row (time point), y_cal has
#the values of the components [A,B,C]
y_cal = r[:,1] #separate the measured B
#compute the expression to be minimized:
return sum((y_obs-y_cal)**2)
lsqtest.py:
import pylab as py
import numpy as np
from scipy import integrate
from scipy import optimize
import lsq
if __name__ == '__main__':
teta = [0.2,0.3] #guess for parameter values k0 and k1
y0 = [1,0,0] #initial conditions for system
y = [0.000,0.416,0.489,0.595,0.506,0.493,0.458,0.394,0.335,0.309] #observed data points
data = y
resid = lsq.lsq(teta,y0,data)
print resid
For these kind of fitting tasks you could use the package lmfit. The outcome of the fit would look like this; as you can see, the data are reproduced very well:
For now, I fixed the initial concentrations, you could also set them as variables if you like (just remove the vary=False in the code below). The parameters you obtain are:
[[Variables]]
x10: 5 (fixed)
x20: 0 (fixed)
x30: 0 (fixed)
k0: 0.12183301 +/- 0.005909 (4.85%) (init= 0.2)
k1: 0.77583946 +/- 0.026639 (3.43%) (init= 0.3)
[[Correlations]] (unreported correlations are < 0.100)
C(k0, k1) = 0.809
The code that reproduces the plot looks like this (some explanation can be found in the inline comments):
import numpy as np
import matplotlib.pyplot as plt
from scipy.integrate import odeint
from lmfit import minimize, Parameters, Parameter, report_fit
from scipy.integrate import odeint
def f(y, t, paras):
"""
Your system of differential equations
"""
x1 = y[0]
x2 = y[1]
x3 = y[2]
try:
k0 = paras['k0'].value
k1 = paras['k1'].value
except KeyError:
k0, k1 = paras
# the model equations
f0 = -k0 * x1
f1 = k0 * x1 - k1 * x2
f2 = k1 * x2
return [f0, f1, f2]
def g(t, x0, paras):
"""
Solution to the ODE x'(t) = f(t,x,k) with initial condition x(0) = x0
"""
x = odeint(f, x0, t, args=(paras,))
return x
def residual(paras, t, data):
"""
compute the residual between actual data and fitted data
"""
x0 = paras['x10'].value, paras['x20'].value, paras['x30'].value
model = g(t, x0, paras)
# you only have data for one of your variables
x2_model = model[:, 1]
return (x2_model - data).ravel()
# initial conditions
x10 = 5.
x20 = 0
x30 = 0
y0 = [x10, x20, x30]
# measured data
t_measured = np.linspace(0, 9, 10)
x2_measured = np.array([0.000, 0.416, 0.489, 0.595, 0.506, 0.493, 0.458, 0.394, 0.335, 0.309])
plt.figure()
plt.scatter(t_measured, x2_measured, marker='o', color='b', label='measured data', s=75)
# set parameters including bounds; you can also fix parameters (use vary=False)
params = Parameters()
params.add('x10', value=x10, vary=False)
params.add('x20', value=x20, vary=False)
params.add('x30', value=x30, vary=False)
params.add('k0', value=0.2, min=0.0001, max=2.)
params.add('k1', value=0.3, min=0.0001, max=2.)
# fit model
result = minimize(residual, params, args=(t_measured, x2_measured), method='leastsq') # leastsq nelder
# check results of the fit
data_fitted = g(np.linspace(0., 9., 100), y0, result.params)
# plot fitted data
plt.plot(np.linspace(0., 9., 100), data_fitted[:, 1], '-', linewidth=2, color='red', label='fitted data')
plt.legend()
plt.xlim([0, max(t_measured)])
plt.ylim([0, 1.1 * max(data_fitted[:, 1])])
# display fitted statistics
report_fit(result)
plt.show()
If you have data for additional variables, you can simply update the function residual.
The following worked for me:
import pylab as pp
import numpy as np
from scipy import integrate, interpolate
from scipy import optimize
##initialize the data
x_data = np.linspace(0,9,10)
y_data = np.array([0.000,0.416,0.489,0.595,0.506,0.493,0.458,0.394,0.335,0.309])
def f(y, t, k):
"""define the ODE system in terms of
dependent variable y,
independent variable t, and
optinal parmaeters, in this case a single variable k """
return (-k[0]*y[0],
k[0]*y[0]-k[1]*y[1],
k[1]*y[1])
def my_ls_func(x,teta):
"""definition of function for LS fit
x gives evaluation points,
teta is an array of parameters to be varied for fit"""
# create an alias to f which passes the optional params
f2 = lambda y,t: f(y, t, teta)
# calculate ode solution, retuen values for each entry of "x"
r = integrate.odeint(f2,y0,x)
#in this case, we only need one of the dependent variable values
return r[:,1]
def f_resid(p):
""" function to pass to optimize.leastsq
The routine will square and sum the values returned by
this function"""
return y_data-my_ls_func(x_data,p)
#solve the system - the solution is in variable c
guess = [0.2,0.3] #initial guess for params
y0 = [1,0,0] #inital conditions for ODEs
(c,kvg) = optimize.leastsq(f_resid, guess) #get params
print "parameter values are ",c
# fit ODE results to interpolating spline just for fun
xeval=np.linspace(min(x_data), max(x_data),30)
gls = interpolate.UnivariateSpline(xeval, my_ls_func(xeval,c), k=3, s=0)
#pick a few more points for a very smooth curve, then plot
# data and curve fit
xeval=np.linspace(min(x_data), max(x_data),200)
#Plot of the data as red dots and fit as blue line
pp.plot(x_data, y_data,'.r',xeval,gls(xeval),'-b')
pp.xlabel('xlabel',{"fontsize":16})
pp.ylabel("ylabel",{"fontsize":16})
pp.legend(('data','fit'),loc=0)
pp.show()
Look at the scipy.optimize module. The minimize function looks fairly similar to fminsearch, and I believe that both basically use a simplex algorithm for optimization.
# cleaned up a bit to get my head around it - thanks for sharing
import pylab as pp
import numpy as np
from scipy import integrate, optimize
class Parameterize_ODE():
def __init__(self):
self.X = np.linspace(0,9,10)
self.y = np.array([0.000,0.416,0.489,0.595,0.506,0.493,0.458,0.394,0.335,0.309])
self.y0 = [1,0,0] # inital conditions ODEs
def ode(self, y, X, p):
return (-p[0]*y[0],
p[0]*y[0]-p[1]*y[1],
p[1]*y[1])
def model(self, X, p):
return integrate.odeint(self.ode, self.y0, X, args=(p,))
def f_resid(self, p):
return self.y - self.model(self.X, p)[:,1]
def optim(self, p_quess):
return optimize.leastsq(self.f_resid, p_guess) # fit params
po = Parameterize_ODE(); p_guess = [0.2, 0.3]
c, kvg = po.optim(p_guess)
# --- show ---
print "parameter values are ", c, kvg
x = np.linspace(min(po.X), max(po.X), 2000)
pp.plot(po.X, po.y,'.r',x, po.model(x, c)[:,1],'-b')
pp.xlabel('X',{"fontsize":16}); pp.ylabel("y",{"fontsize":16}); pp.legend(('data','fit'),loc=0); pp.show()
Related
I have an ODE which I need to solve and plot. This is my code so far:
import numpy as np
import math
from scipy.integrate import odeint
import matplotlib.pyplot as plt
import math
from scipy import integrate
##############################
# Constants
#α = 1,β = 12,μ = 0.338,Vb = 0.01,ω = 0.5,
alpha = 1#p1
beta = 12#p2
mu = 0.338#p3
omega = 0.5#p5
gamma = 0.25#p6
Acoef=0.5
tmax = 1000
t = np.arange(0.0, tmax, 0.001)
################################
# Initial conditions vector
X0=0
I0=0
Z0=0.06
H0=0.04
E0=0
O0=0.02
# The model differential equations.
def deriv(y,t):
X, I, Z, H, E, O = y
dXdt = I
dIdt = -(alpha)*(X)-(beta)*(X**3)-(mu)*I+gamma*(1/(1-X)**2)-gamma*(1/(1+X)**2)+Acoef*(1/(1-X)**2)*(np.sin(omega*t))
dZdt = H
dHdt =-(alpha)*(Z)-(beta)*(Z**3)-(mu)*I+gamma*(1/(1-Z)**2)-gamma*(1/(1+Z)**2)+Acoef*(1/(1-Z)**2)*(np.sin(omega*t))
dEdt=O
dOdt=-(alpha)*(X)-(beta)*(X**3)-(mu)*I+gamma*(1/(1-X)**2)-gamma*(1/(1+X)**2)+Acoef*(1/(1-X)**2)*(np.sin(omega*t))-(-(alpha)*(Z)-(beta)*(Z**3)-(mu)*I+gamma*(1/(1-Z)**2)-gamma*(1/(1+Z)**2)+Acoef*(1/(1-Z)**2)*(np.sin(omega*t)))
return dXdt, dIdt, dZdt, dHdt, dEdt,dOdt
# Initial conditions vector
y0 = X0,I0,Z0,H0,E0,O0
# Integrate the SIR equations over the time grid, t.
ret = odeint(deriv, y0, t)
X, I, Z, H, E, O = ret.T
# Plot the data on three separate curves for S(t), I(t) and R(t)
plt.xlim([-10, 10])
plt.plot(ret[:,4], ret[:,5])
plt.show()
e1=ret[:,4]
e2=ret[:,5]
Unfortunately, it produces the following warning:
File "C:\Users\user\AppData\Local\Programs\Python\Python37\lib\site-packages\scipy\integrate\odepack.py", line 247
warnings.warn(warning_msg, ODEintWarning)
ODEintWarning: Excess work done on this call (perhaps wrong Dfun type). Run with full_output = 1 to get quantitative information.
And the result is not as expected. I cannot here post graphical result probably.
Does anyone know how I can fix it, please?
I'm almost new to Python and I'm trying to fit data from college using lmfit. The Y variable has a variable error of 3%. How do I add that error to the fitting process? I am changing from scipy's curve fit and in scipy it was really easy to do so, just creating an array with the error values and specifying the error when fitting by adding the text "sigma = [yourarray]"
This is my current code:
from lmfit import Minimizer, Parameters, report_fit
import matplotlib.pyplot as plt
w1, V1, phi1, scal1 = np.loadtxt("./FiltroPasaBajo_1.txt", delimiter = "\t", unpack = True)
t = w1
eV= V1*0.03 + 0.01
def funcion(parametros, x, y):
R = parametros['R'].value
C = parametros['C'].value
modelo = 4/((1+(x**2)*(R**2)*(C**2))**1/2)
return modelo - y
parametros = Parameters()
parametros.add('R', value = 1000, min= 900, max = 1100)
parametros.add('C', value = 1E-6, min = 1E-7, max = 1E-5)
fit = Minimizer(funcion, parametros, fcn_args=(t,V1))
resultado = fit.minimize()
final = V1 + resultado.residual
report_fit(resultado)
try:
plt.plot(t, V1, 'k+')
plt.plot(t, final, 'r')
plt.show()
except ImportError:
pass
V1 are the values I measured, and eV would be the array of errors. t is the x coordinate.
Thank you for your time
The minimize() function minimizes an array in the least-square sense, adjusting the variable parameters in order to minimize (resid**2).sum() for the resid array returned by your objective function. It really does not know anything about the uncertainties in your data or even about your data. To use the uncertainties in your fit, you need to pass in your array eV just as you pass in t and V1 and then use that in your calculation of the array to be minimized.
One typically wants to minimize Sum[ (data-model)^2/epsilon^2 ], where epsilon is the uncertainty in the data (your eV), so the residual array should be altered from data-model to (data-model)/epsilon. For your fit, you would want
def funcion(parametros, x, y, eps):
R = parametros['R'].value
C = parametros['C'].value
modelo = 4/((1+(x**2)*(R**2)*(C**2))**1/2)
return (modelo - y)/eps
and then use this with
fit = Minimizer(funcion, parametros, fcn_args=(t, V1, eV))
resultado = fit.minimize()
...
If you use the lmfit.Model interface (designed for curve-fitting), then you could pass in weights array that multiplies data -model, and so would be 1.0 / eV to represent weighting for uncertainties (as above with minimize). Using the lmfit.Model interface and providing uncertainties would then look like this:
from lmfit import Model
# model function, to model the data
def func(t, r, c):
return 4/((1+(t**2)*(r**2)*(c**2))**1/2)
model = Model(func)
parametros = model.make_params(r=1000, c=1.e-6)
parametros['r'].set(min=900, max=1100)
parametros['c'].set(min=1.e-7, max=1.e-5)
resultado = model.fit(V1, parametros, t=t, weights=1.0/eV)
print(resultado.fit_report())
plt.errorbar(t, V1, eV, 'k+', label='data')
plt.plot(t, resultado.best_fit, 'r', label='fit')
plt.legend()
plt.show()
hope that helps....
I think you cannot provide sigma in fit.minimize() directly.
However I see that fit.minimize() uses scipy's leastsq method (by default) which is the same method used by scipy's curve_fit.
If you look into scipy's curve_fit source, it does following with the sigma (for 1-d case).
transform = 1.0 / sigma
jac = _wrap_jac(jac, xdata, transform)
res = leastsq(func, p0, Dfun=jac, full_output=1, **kwargs)
Since fit.minimize() allows you to pass kwargs (Dfun) for leastsq, you can pass the jac the way it is done in scipy curve_fit.
I am totally new to python, and try to integrate following ode:
$\dot{x} = -2x-y^2$
$\dot{y} = -y-x^2
This results in an array with everything 0 though
What am I doing wrong? It is mostly copied code, and with another, not coupled ode it worked fine.
import numpy as np
import matplotlib.pyplot as plt
from scipy.integrate import ode
def fun(t, z):
"""
Right hand side of the differential equations
dx/dt = -omega * y
dy/dt = omega * x
"""
x, y = z
f = [-2*x-y**2, -y-x**2]
return f
# Create an `ode` instance to solve the system of differential
# equations defined by `fun`, and set the solver method to 'dop853'.
solver = ode(fun)
solver.set_integrator('dopri5')
# Set the initial value z(0) = z0.
t0 = 0.0
z0 = [0, 0]
solver.set_initial_value(z0, t0)
# Create the array `t` of time values at which to compute
# the solution, and create an array to hold the solution.
# Put the initial value in the solution array.
t1 = 2.5
N = 75
t = np.linspace(t0, t1, N)
sol = np.empty((N, 2))
sol[0] = z0
# Repeatedly call the `integrate` method to advance the
# solution to time t[k], and save the solution in sol[k].
k = 1
while solver.successful() and solver.t < t1:
solver.integrate(t[k])
sol[k] = solver.y
k += 1
# Plot the solution...
plt.plot(t, sol[:,0], label='x')
plt.plot(t, sol[:,1], label='y')
plt.xlabel('t')
plt.grid(True)
plt.legend()
plt.show()
Your initial state (z0) is [0,0]. The time derivative (fun) for this initial state is also [0,0]. Hence, for this initial condition, [0,0] is the correct solution for all times.
If you change your initial condition to some other value, you should observe more interesting result.
I solve a differential equation with vector inputs
y' = f(t,y), y(t_0) = y_0
where y0 = y(x)
using the explicit Euler method, which says that
y_(i+1) = y_i + h*f(t_i, y_i)
where t is a time vector, h is the step size, and f is the right-hand side of the differential equation.
The python code for the method looks like this:
for i in np.arange(0,n-1):
y[i+1,...] = y[i,...] + dt*myode(t[i],y[i,...])
The result is a k,m matrix y, where k is the size of the t dimension, and m is the size of y.
The vectors y and t are returned.
t, x, and y are passed to scipy.interpolate.RectBivariateSpline(t, x, y, kx=1, ky=1):
g = scipy.interpolate.RectBivariateSpline(t, x, y, kx=1, ky=1)
The resulting object g takes new vectors ti,xi ( g(p,q) ) to give y_int, which is y interpolated at the points defined by ti and xi.
Here is my problem:
The documentation for RectBivariateSpline describes the __call__ method in terms of x and y:
__call__(x, y[, mth]) Evaluate spline at the grid points defined by the coordinate arrays
The matplotlib documentation for plot_surface uses similar notation:
Axes3D.plot_surface(X, Y, Z, *args, **kwargs)
with the important difference that X and Y are 2D arrays which are generated by numpy.meshgrid().
When I compute simple examples, the input order is the same in both and the result is exactly what I would expect. In my explicit Euler example, however, the initial order is ti,xi, yet the surface plot of the interpolant output only makes sense if I reverse the order of the inputs, like so:
ax2.plot_surface(xi, ti, u, cmap=cm.coolwarm)
While I am glad that it works, I'm not satisfied because I cannot explain why, nor why (apart from the array geometry) it is necessary to swap the inputs. Ideally, I would like to restructure the code so that the input order is consistent.
Here is a working code example to illustrate what I mean:
# Heat equation example with explicit Euler method
import numpy as np
import matplotlib.pyplot as mplot
import matplotlib.cm as cm
import scipy.sparse as sp
import scipy.interpolate as interp
from mpl_toolkits.mplot3d import Axes3D
import pdb
# explicit Euler method
def eev(myode,tspan,y0,dt):
# Preprocessing
# Time steps
tspan[1] = tspan[1] + dt
t = np.arange(tspan[0],tspan[1],dt,dtype=float)
n = t.size
m = y0.shape[0]
y = np.zeros((n,m),dtype=float)
y[0,:] = y0
# explicit Euler recurrence relation
for i in np.arange(0,n-1):
y[i+1,...] = y[i,...] + dt*myode(t[i],y[i,...])
return y,t
# generate matrix A
# u'(t) = A*u(t) + g*u(t)
def a_matrix(n):
aa = sp.diags([1, -2, 1],[-1,0,1],(n,n))
return aa
# System of ODEs with finite differences
def f(t,u):
dydt = np.divide(1,h**2)*A.dot(u)
return dydt
# homogenous Dirichlet boundary conditions
def rbd(t):
ul = np.zeros((t,1))
return ul
# Initial value problem -----------
def main():
# Metal rod
# spatial discretization
# number of inner nodes
m = 20
x0 = 0
xn = 1
x = np.linspace(x0,xn,m+2)
# Step size
global h
h = x[1]-x[0]
# Initial values
u0 = np.sin(np.pi*x)
# A matrix
global A
A = a_matrix(m)
# Time
t0 = 0
tend = 0.2
# Time step width
dt = 0.0001
tspan = [t0,tend]
# Test r for stability
r = np.divide(dt,h**2)
if r <= 0.5:
u,t = eev(f,tspan,u0[1:-1],dt)
else:
print('r = ',r)
print('r > 0.5. Explicit Euler method will not be stable.')
# Add boundary values back
rb = rbd(t.size)
u = np.hstack((rb,u,rb))
# Interpolate heat values
# Create interpolant. Note the parameter order
fi = interp.RectBivariateSpline(t, x, u, kx=1, ky=1)
# Create vectors for interpolant
xi = np.linspace(x[0],x[-1],100)
ti = np.linspace(t0,tend,100)
# Compute function values from interpolant
u_int = fi(ti,xi)
# Change xi, ti in to 2D arrays
xi,ti = np.meshgrid(xi,ti)
# Create figure and axes objects
fig3 = mplot.figure(1)
ax3 = fig3.gca(projection='3d')
print('xi.shape =',xi.shape,'ti.shape =',ti.shape,'u_int.shape =',u_int.shape)
# Plot surface. Note the parameter order, compare with interpolant!
ax3.plot_surface(xi, ti, u_int, cmap=cm.coolwarm)
ax3.set_xlabel('xi')
ax3.set_ylabel('ti')
main()
mplot.show()
As I can see you define :
# Change xi, ti in to 2D arrays
xi,ti = np.meshgrid(xi,ti)
Change this to :
ti,xi = np.meshgrid(ti,xi)
and
ax3.plot_surface(xi, ti, u_int, cmap=cm.coolwarm)
to
ax3.plot_surface(ti, xi, u_int, cmap=cm.coolwarm)
and it works fine (if I understood well ).
I have a data surface that I'm fitting using SciPy's leastsq function.
I would like to have some estimate of the quality of the fit after leastsq returns. I'd expected that this would be included as a return from the function, but, if so, it doesn't seem to be clearly documented.
Is there such a return or, barring that, some function I can pass my data and the returned parameter values and fit function to that will give me an estimate of fit quality (R^2 or some such)?
Thanks!
If you call leastsq like this:
import scipy.optimize
p,cov,infodict,mesg,ier = optimize.leastsq(
residuals,a_guess,args=(x,y),full_output=True)
where
def residuals(a,x,y):
return y-f(x,a)
then, using the definition of R^2 given here,
ss_err=(infodict['fvec']**2).sum()
ss_tot=((y-y.mean())**2).sum()
rsquared=1-(ss_err/ss_tot)
What is infodict['fvec'] you ask? It's the array of residuals:
In [48]: optimize.leastsq?
...
infodict -- a dictionary of optional outputs with the keys:
'fvec' : the function evaluated at the output
For example:
import scipy.optimize as optimize
import numpy as np
import collections
import matplotlib.pyplot as plt
x = np.array([821,576,473,377,326])
y = np.array([255,235,208,166,157])
def sigmoid(p,x):
x0,y0,c,k=p
y = c / (1 + np.exp(-k*(x-x0))) + y0
return y
def residuals(p,x,y):
return y - sigmoid(p,x)
Param=collections.namedtuple('Param','x0 y0 c k')
p_guess=Param(x0=600,y0=200,c=100,k=0.01)
p,cov,infodict,mesg,ier = optimize.leastsq(
residuals,p_guess,args=(x,y),full_output=True)
p=Param(*p)
xp = np.linspace(100, 1600, 1500)
print('''\
x0 = {p.x0}
y0 = {p.y0}
c = {p.c}
k = {p.k}
'''.format(p=p))
pxp=sigmoid(p,xp)
# You could compute the residuals this way:
resid=residuals(p,x,y)
print(resid)
# [ 0.76205302 -2.010142 2.60265297 -3.02849144 1.6739274 ]
# But you don't have to compute `resid` -- `infodict['fvec']` already
# contains the info.
print(infodict['fvec'])
# [ 0.76205302 -2.010142 2.60265297 -3.02849144 1.6739274 ]
ss_err=(infodict['fvec']**2).sum()
ss_tot=((y-y.mean())**2).sum()
rsquared=1-(ss_err/ss_tot)
print(rsquared)
# 0.996768131959
plt.plot(x, y, '.', xp, pxp, '-')
plt.xlim(100,1000)
plt.ylim(130,270)
plt.xlabel('x')
plt.ylabel('y',rotation='horizontal')
plt.grid(True)
plt.show()