I am trying to fit some data that are distributed in the time following a rising gaussian curve, and then decaying exponentially.
I have found this example on the web, that is very similar to my case, but I just started to fit with python, and the example seems quite confusing to me.
Nonetheless, I have tryied to adapt the example to my script and data, and in the following is my progress:
#!/usr/bin/env python
import pyfits, os, re, glob, sys
from scipy.optimize import leastsq
from numpy import *
from pylab import *
from scipy import *
from scipy import optimize
import numpy as N
import pylab as P
data=pyfits.open('http://heasarc.gsfc.nasa.gov/docs/swift/results/transients/weak/GX304-1.orbit.lc.fits')
time = data[1].data.field(0)/86400. + data[1].header['MJDREFF'] + data[1].header['MJDREFI']
rate = data[1].data.field(1)
error = data[1].data.field(2)
data.close()
cond = ((time > 56200) & (time < 56220))
time=time[cond]
rate=rate[cond]
error=error[cond]
def expGauss(x, pos, wid, tConst, expMod = 0.5, amp = 1):
expMod *= 1.0
gNorm = amp * N.exp(-0.5*((x-pos)/(wid))**2)
g = expBroaden(gNorm, tConst, expMod)
return g, gNorm
def expBroaden(y, t, expMod):
fy = F.fft(y)
a = N.exp(-1*expMod*time/t)
fa = F.fft(a)
fy1 = fy*fa
yb = (F.ifft(fy1).real)/N.sum(a)
return yb
if __name__ == '__main__':
# Fit the first set
#p[0] -- amplitude, p[1] -- position, p[2] -- width
fitfuncG = lambda p, x: p[0]*N.exp(-0.5*(x-p[1])**2/p[2]**2) # Target function
errfuncG = lambda p, x, y: fitfuncG(p, x) - y # Distance to the target function
p0 = [0.20, 56210, 2.0] # Initial guess for the parameters
p1, success = optimize.leastsq(errfuncG, p0[:], args=(time, rate))
p1G = fitfuncG(p1, time)
# P.plot(rate, 'ro', alpha = 0.4, label = "Gaussian")
# P.plot(p1G, label = 'G-Fit')
def expGauss(x, pos, wid, tConst, expMod = 0.5, amp = 1):
#p[0] -- amplitude, p[1] -- position, p[2] -- width, p[3]--tConst, p[4] -- expMod
fitfuncExpG = lambda p, x: expGauss(x, p[1], p[2], p[3], p[4], p[0])[0]
errfuncExpG = lambda p, x, y: fitfuncExpG(p, x) - y # Distance to the target function
p0a = [0.20, 56210, 2.0] # Initial guess for the parameters
p1a, success = optimize.leastsq(errfuncExpG, p0a[:], args=(time, rate))
p1aG = fitfuncExpG(p1a, time)
print type(rate), type(time), len(rate), len(time)
P.plot(rate, 'go', alpha = 0.4, label = "ExpGaussian")
P.plot(p1aG, label = 'ExpG-Fit')
P.legend()
P.show()
I am sure to have confused the whole thing, so sorry in advance for that, but at this point I don't know how to go further...
The code take the data from the web, so it is directly executable.
At the moment the code runs without any error, but it doesn't produce any plot.
Again, my goal is to fit the data with those two functions, how can I improve my code to do that?
Any suggestion is really appreciated.
Similarly to your other question, here also I would use a trigonometric function to fit this peaK:
The following code works if pasted after your code:
import numpy as np
from scipy.optimize import curve_fit
x = time
den = x.max() - x.min()
x -= x.min()
y_points = rate
def func(x, a1, a2, a3):
return a1*sin(1*pi*x/den)+\
a2*sin(2*pi*x/den)+\
a3*sin(3*pi*x/den)
popt, pcov = curve_fit(func, x, y_points)
y = func(x, *popt)
plot(time,rate)
plot(x,y, color='r', linewidth=2.)
show()
Related
I have a original curve. I am developing a model curve matching closely the original curve. Everything is working fine but not matching. How to control the curvature of my model curve? Below code is based on answer here.
My code:
def curve_line(point1, point2):
a = (point2[1] - point1[1])/(np.cosh(point2[0]) - np.cosh(point1[0]))
b = point1[1] - a*np.sinh(point1[0])
x = np.linspace(point1[0], point2[0],100).tolist()
y = (a*np.cosh(x) + b).tolist()
return x,y
###### A sample of my code is given below
point1 = [10,100]
point2 = [20,50]
x,y = curve_line(point1, point2)
plt.plot(point1[0], point1[1], 'o')
plt.plot(point2[0], point2[1], 'o')
plt.plot(x,y) ## len(x)
My present output:
I tried following function as well:
y = (50*np.exp(-x/10) +2.5)
The output is:
Instead of just guessing the right parameters of your model function, you can fit a model curve to your data using curve_fit.
import numpy as np
from scipy.optimize import curve_fit
import matplotlib.pyplot as plt
x = np.array([ 1.92, 14.35, 21.50, 25.27, 27.34, 30.32, 32.31, 34.09, 34.21])
y = np.array([8.30, 8.26, 8.13, 7.49, 6.66, 4.59, 2.66, 0.60, 0.06])
def fun(x, a, b, c):
return a * np.cosh(b * x) + c
coef,_ = curve_fit(fun, x, y)
plt.plot(x, y, label='Original curve')
plt.plot(x, fun(x, *coef), label=f'Model: %5.3f cosh(%4.2f x + %4.2f)' % tuple(coef) )
plt.legend()
plt.show()
If it is important that the start and end points are closely fitted, you can pass uncertainties to curve_fit, adjusting them to lower values towards the ends, e.g. by
s = np.ones(len(x))
s[1:-1] = s[1:-1] * 3
coef,_ = curve_fit(fun, x, y, sigma=s)
Your other approach a * np.exp(b * x) + c will also work and gives -0.006 exp(0.21 x + 8.49).
In some cases you'll have to provide an educated guess for the initial values of the coefficients to curve_fit (it uses 1 as default).
I recently started with Python because I have an enormous amount of data where I want to automatically fit a Gaussian to the peaks in spectra. Below is an example of three peaks that I want to fit with three individual peaks.
I have found a question where someone is looking for something very similar, How can I fit multiple Gaussian curved to mass spectrometry data in Python?, and adopted it to my script.
I have added my code at the bottom and when I run the last section I get the error "RuntimeError: Optimal parameters not found: Number of calls to function has reached maxfev = 800." What am I missing?
The data can be downloaded at https://www.dropbox.com/s/zowawljcjco70yh/data_so.h5?dl=0
#%%
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import sparse
from scipy.sparse.linalg import spsolve
from scipy.optimize import curve_fit
#%% Read data
path = 'D:/Python/data_so.h5'
f = pd.read_hdf(path, mode = 'r')
t = f.loc[:, 'Time stamp']
d = f.drop(['Time stamp', 'Name spectrum'], axis = 1)
#%% Extract desired wavenumber range
wn_st=2000
wn_ed=2500
ix_st=np.argmin(abs(d.columns.values-wn_st))
ix_ed=np.argmin(abs(d.columns.values-wn_ed))
d = d.iloc[:, ix_st:ix_ed+1]
#%% AsLS baseline correction
spectrum = 230
y = d.iloc[spectrum]
niter = 10
lam = 200000
p = 0.005
L = len(y)
D = sparse.diags([1,-2,1],[0,-1,-2], shape=(L,L-2))
w = np.ones(L)
for i in range(niter):
W = sparse.spdiags(w, 0, L, L)
Z = W + lam * D.dot(D.transpose())
z = spsolve(Z, w*y)
w = p * (y > z) + (1-p) * (y < z)
corr = d.iloc[spectrum,:] - z
#%% Plot spectrum, baseline and corrected spectrum
plt.clf()
plt.plot(d.columns, d.iloc[spectrum,:])
plt.plot(d.columns, z)
plt.plot(d.columns, corr)
plt.gca().invert_xaxis()
plt.show()
#%%
x = d.columns.values
def gauss(x, a, mu, sig):
return a*np.exp(-(x.astype(float)-mu)**2/(2*sig**2))
fitx = x[(x>2232)*(x<2252)]
fity = y[(x>2232)*(x<2252)]
mu=np.sum(fitx*fity)/np.sum(fity)
sig=np.sqrt(np.sum(fity*(fitx-mu)**2)/np.sum(fity))
popt, pcov = curve_fit(gauss, fitx, fity, p0=[max(fity),mu, sig])
plt.plot(x, gauss(x, popt[0],popt[1],popt[2]), 'r-', label='fit')
I'm trying to find a numerical solution and eventually graph, the Gyllenberg-Webb model (cancer cell growth model). This model looks like:
Where β is the reproduction rate of proliferating cells, µp is the death rate of proliferating cells, µq is the death rate of quiescent cells, and r0 and ri are functions (transition rates) of N(t). Also N(t) = P(t)+Q(t).
For my purposes here I defined r_0(N) = bN and r_i(N) = aN to make things more simple.
My problem is when I try and plot my solution with pyplot I get
ValueError: x and y must have same first dimension
which I guess is self-explanatory, but I'm not sure how to go about fixing it without breaking everything else.
My code, which I've done only for the first equation so far, is:
import numpy as np
import matplotlib.pyplot as plt
import scipy.integrate
def fun(P,t, params):
beta, mp,b,N,Q = params
return(beta-mp-(b*N))*P+(a*N)*Q
params = (0.5,0.6,0.7,0.8,0.9)
tvec = np.arange(0,6,0.1)
s1 = scipy.integrate.odeint(
fun,
y0 = 1,
t = tvec,
args = (params,))
#print(s1)
plt.plot(fun,tvec)
In the end you will want to solve the coupled system. This is not complicated, just make the state object a vector and return the derivatives in the correct order.
def fun(state,t, params):
P, Q = state
beta, mp, mq, a, b = params
N = P+Q
r0N, riN = b*N, a*N
return [ (beta-mp-r0N)*P + riN*Q, r0N*P - (riN+mq)*Q ]
params = (0.5,0.6,0.7,0.8,0.9)
tsol = np.arange(0,6,0.1)
sol = odeint( fun, y0 = [ 1, 0], t = tsol, args = (params,))
Psol, Qsol = sol.T; plt.plot(tsol, Psol, tsol, Qsol)
You are currently plotting fun vs. tvec. What you actually want is to plot tvec vs s1. You will also have to define the parameter a in fun; I set it to 1 in the code below:
import numpy as np
import matplotlib.pyplot as plt
import scipy.integrate
def fun(P, t, params):
beta, mp, b, N, Q = params
return (beta-mp-(b*N))*P + (1.0 * N)*Q
params = (0.5, 0.6, 0.7, 0.8, 0.9)
tvec = np.arange(0, 6, 0.1)
s1 = scipy.integrate.odeint(
fun,
y0=1.,
t=tvec,
args=(params,))
plt.plot(tvec, s1)
plt.show()
This will plot:
I am trying to deblend the emission lines of low resolution spectrum in order to get the gaussian components. This plot represents the kind of data I am using:
After searching a bit, the only option I found was the application of the gauest function from the kmpfit package (http://www.astro.rug.nl/software/kapteyn/kmpfittutorial.html#gauest). I have copied their example but I cannot make it work.
I wonder if anyone could please offer me any alternative to do this or how to correct my code:
import numpy as np
import matplotlib.pyplot as plt
from scipy import optimize
def CurveData():
x = np.array([3963.67285156, 3964.49560547, 3965.31835938, 3966.14111328, 3966.96362305,
3967.78637695, 3968.60913086, 3969.43188477, 3970.25463867, 3971.07714844,
3971.89990234, 3972.72265625, 3973.54541016, 3974.36791992, 3975.19067383])
y = np.array([1.75001533e-16, 2.15520995e-16, 2.85030769e-16, 4.10072843e-16, 7.17558032e-16,
1.27759917e-15, 1.57074192e-15, 1.40802933e-15, 1.45038722e-15, 1.55195653e-15,
1.09280316e-15, 4.96611341e-16, 2.68777266e-16, 1.87075114e-16, 1.64335999e-16])
return x, y
def FindMaxima(xval, yval):
xval = np.asarray(xval)
yval = np.asarray(yval)
sort_idx = np.argsort(xval)
yval = yval[sort_idx]
gradient = np.diff(yval)
maxima = np.diff((gradient > 0).view(np.int8))
ListIndeces = np.concatenate((([0],) if gradient[0] < 0 else ()) + (np.where(maxima == -1)[0] + 1,) + (([len(yval)-1],) if gradient[-1] > 0 else ()))
X_Maxima, Y_Maxima = [], []
for index in ListIndeces:
X_Maxima.append(xval[index])
Y_Maxima.append(yval[index])
return X_Maxima, Y_Maxima
def GaussianMixture_Model(p, x, ZeroLevel):
y = 0.0
N_Comps = int(len(p) / 3)
for i in range(N_Comps):
A, mu, sigma = p[i*3:(i+1)*3]
y += A * np.exp(-(x-mu)*(x-mu)/(2.0*sigma*sigma))
Output = y + ZeroLevel
return Output
def Residuals_GaussianMixture(p, x, y, ZeroLevel):
return GaussianMixture_Model(p, x, ZeroLevel) - y
Wave, Flux = CurveData()
Wave_Maxima, Flux_Maxima = FindMaxima(Wave, Flux)
EmLines_Number = len(Wave_Maxima)
ContinuumLevel = 1.64191e-16
# Define initial values
p_0 = []
for i in range(EmLines_Number):
p_0.append(Flux_Maxima[i])
p_0.append(Wave_Maxima[i])
p_0.append(2.0)
p1, conv = optimize.leastsq(Residuals_GaussianMixture, p_0[:],args=(Wave, Flux, ContinuumLevel))
Fig = plt.figure(figsize = (16, 10))
Axis1 = Fig.add_subplot(111)
Axis1.plot(Wave, Flux, label='Emission line')
Axis1.plot(Wave, GaussianMixture_Model(p1, Wave, ContinuumLevel), 'r', label='Fit with optimize.leastsq')
print p1
Axis1.plot(Wave, GaussianMixture_Model([p1[0],p1[1],p1[2]], Wave, ContinuumLevel), 'g:', label='Gaussian components')
Axis1.plot(Wave, GaussianMixture_Model([p1[3],p1[4],p1[5]], Wave, ContinuumLevel), 'g:')
Axis1.set_xlabel( r'Wavelength $(\AA)$',)
Axis1.set_ylabel('Flux' + r'$(erg\,cm^{-2} s^{-1} \AA^{-1})$')
plt.legend()
plt.show()
A typical simplistic way to fit:
def model(p,x):
A,x1,sig1,B,x2,sig2 = p
return A*np.exp(-(x-x1)**2/sig1**2) + B*np.exp(-(x-x2)**2/sig2**2)
def res(p,x,y):
return model(p,x) - y
from scipy import optimize
p0 = [1e-15,3968,2,1e-15,3972,2]
p1,conv = optimize.leastsq(res,p0[:],args=(x,y))
plot(x,y,'+') # data
#fitted function
plot(arange(3962,3976,0.1),model(p1,arange(3962,3976,0.1)),'-')
Where p0 is your initial guess. By the looks of things, you might want to use Lorentzian functions...
If you use full_output=True, you get all kind of info about the fitting. Also check out curve_fit and the fmin* functions in scipy.optimize. There are plenty of wrappers around these around, but often, like here, it's easier to use them directly.
I am having some trouble translating my MATLAB code into Python via Scipy & Numpy. I am stuck on how to find optimal parameter values (k0 and k1) for my system of ODEs to fit to my ten observed data points. I currently have an initial guess for k0 and k1. In MATLAB, I can using something called 'fminsearch' which is a function that takes the system of ODEs, the observed data points, and the initial values of the system of ODEs. It will then calculate a new pair of parameters k0 and k1 that will fit the observed data. I have included my code to see if you can help me implement some kind of 'fminsearch' to find the optimal parameter values k0 and k1 that will fit my data. I want to add whatever code to do this to my lsqtest.py file.
I have three .py files - ode.py, lsq.py, and lsqtest.py
ode.py:
def f(y, t, k):
return (-k[0]*y[0],
k[0]*y[0]-k[1]*y[1],
k[1]*y[1])
lsq.py:
import pylab as py
import numpy as np
from scipy import integrate
from scipy import optimize
import ode
def lsq(teta,y0,data):
#INPUT teta, the unknowns k0,k1
# data, observed
# y0 initial values needed by the ODE
#OUTPUT lsq value
t = np.linspace(0,9,10)
y_obs = data #data points
k = [0,0]
k[0] = teta[0]
k[1] = teta[1]
#call the ODE solver to get the states:
r = integrate.odeint(ode.f,y0,t,args=(k,))
#the ODE system in ode.py
#at each row (time point), y_cal has
#the values of the components [A,B,C]
y_cal = r[:,1] #separate the measured B
#compute the expression to be minimized:
return sum((y_obs-y_cal)**2)
lsqtest.py:
import pylab as py
import numpy as np
from scipy import integrate
from scipy import optimize
import lsq
if __name__ == '__main__':
teta = [0.2,0.3] #guess for parameter values k0 and k1
y0 = [1,0,0] #initial conditions for system
y = [0.000,0.416,0.489,0.595,0.506,0.493,0.458,0.394,0.335,0.309] #observed data points
data = y
resid = lsq.lsq(teta,y0,data)
print resid
For these kind of fitting tasks you could use the package lmfit. The outcome of the fit would look like this; as you can see, the data are reproduced very well:
For now, I fixed the initial concentrations, you could also set them as variables if you like (just remove the vary=False in the code below). The parameters you obtain are:
[[Variables]]
x10: 5 (fixed)
x20: 0 (fixed)
x30: 0 (fixed)
k0: 0.12183301 +/- 0.005909 (4.85%) (init= 0.2)
k1: 0.77583946 +/- 0.026639 (3.43%) (init= 0.3)
[[Correlations]] (unreported correlations are < 0.100)
C(k0, k1) = 0.809
The code that reproduces the plot looks like this (some explanation can be found in the inline comments):
import numpy as np
import matplotlib.pyplot as plt
from scipy.integrate import odeint
from lmfit import minimize, Parameters, Parameter, report_fit
from scipy.integrate import odeint
def f(y, t, paras):
"""
Your system of differential equations
"""
x1 = y[0]
x2 = y[1]
x3 = y[2]
try:
k0 = paras['k0'].value
k1 = paras['k1'].value
except KeyError:
k0, k1 = paras
# the model equations
f0 = -k0 * x1
f1 = k0 * x1 - k1 * x2
f2 = k1 * x2
return [f0, f1, f2]
def g(t, x0, paras):
"""
Solution to the ODE x'(t) = f(t,x,k) with initial condition x(0) = x0
"""
x = odeint(f, x0, t, args=(paras,))
return x
def residual(paras, t, data):
"""
compute the residual between actual data and fitted data
"""
x0 = paras['x10'].value, paras['x20'].value, paras['x30'].value
model = g(t, x0, paras)
# you only have data for one of your variables
x2_model = model[:, 1]
return (x2_model - data).ravel()
# initial conditions
x10 = 5.
x20 = 0
x30 = 0
y0 = [x10, x20, x30]
# measured data
t_measured = np.linspace(0, 9, 10)
x2_measured = np.array([0.000, 0.416, 0.489, 0.595, 0.506, 0.493, 0.458, 0.394, 0.335, 0.309])
plt.figure()
plt.scatter(t_measured, x2_measured, marker='o', color='b', label='measured data', s=75)
# set parameters including bounds; you can also fix parameters (use vary=False)
params = Parameters()
params.add('x10', value=x10, vary=False)
params.add('x20', value=x20, vary=False)
params.add('x30', value=x30, vary=False)
params.add('k0', value=0.2, min=0.0001, max=2.)
params.add('k1', value=0.3, min=0.0001, max=2.)
# fit model
result = minimize(residual, params, args=(t_measured, x2_measured), method='leastsq') # leastsq nelder
# check results of the fit
data_fitted = g(np.linspace(0., 9., 100), y0, result.params)
# plot fitted data
plt.plot(np.linspace(0., 9., 100), data_fitted[:, 1], '-', linewidth=2, color='red', label='fitted data')
plt.legend()
plt.xlim([0, max(t_measured)])
plt.ylim([0, 1.1 * max(data_fitted[:, 1])])
# display fitted statistics
report_fit(result)
plt.show()
If you have data for additional variables, you can simply update the function residual.
The following worked for me:
import pylab as pp
import numpy as np
from scipy import integrate, interpolate
from scipy import optimize
##initialize the data
x_data = np.linspace(0,9,10)
y_data = np.array([0.000,0.416,0.489,0.595,0.506,0.493,0.458,0.394,0.335,0.309])
def f(y, t, k):
"""define the ODE system in terms of
dependent variable y,
independent variable t, and
optinal parmaeters, in this case a single variable k """
return (-k[0]*y[0],
k[0]*y[0]-k[1]*y[1],
k[1]*y[1])
def my_ls_func(x,teta):
"""definition of function for LS fit
x gives evaluation points,
teta is an array of parameters to be varied for fit"""
# create an alias to f which passes the optional params
f2 = lambda y,t: f(y, t, teta)
# calculate ode solution, retuen values for each entry of "x"
r = integrate.odeint(f2,y0,x)
#in this case, we only need one of the dependent variable values
return r[:,1]
def f_resid(p):
""" function to pass to optimize.leastsq
The routine will square and sum the values returned by
this function"""
return y_data-my_ls_func(x_data,p)
#solve the system - the solution is in variable c
guess = [0.2,0.3] #initial guess for params
y0 = [1,0,0] #inital conditions for ODEs
(c,kvg) = optimize.leastsq(f_resid, guess) #get params
print "parameter values are ",c
# fit ODE results to interpolating spline just for fun
xeval=np.linspace(min(x_data), max(x_data),30)
gls = interpolate.UnivariateSpline(xeval, my_ls_func(xeval,c), k=3, s=0)
#pick a few more points for a very smooth curve, then plot
# data and curve fit
xeval=np.linspace(min(x_data), max(x_data),200)
#Plot of the data as red dots and fit as blue line
pp.plot(x_data, y_data,'.r',xeval,gls(xeval),'-b')
pp.xlabel('xlabel',{"fontsize":16})
pp.ylabel("ylabel",{"fontsize":16})
pp.legend(('data','fit'),loc=0)
pp.show()
Look at the scipy.optimize module. The minimize function looks fairly similar to fminsearch, and I believe that both basically use a simplex algorithm for optimization.
# cleaned up a bit to get my head around it - thanks for sharing
import pylab as pp
import numpy as np
from scipy import integrate, optimize
class Parameterize_ODE():
def __init__(self):
self.X = np.linspace(0,9,10)
self.y = np.array([0.000,0.416,0.489,0.595,0.506,0.493,0.458,0.394,0.335,0.309])
self.y0 = [1,0,0] # inital conditions ODEs
def ode(self, y, X, p):
return (-p[0]*y[0],
p[0]*y[0]-p[1]*y[1],
p[1]*y[1])
def model(self, X, p):
return integrate.odeint(self.ode, self.y0, X, args=(p,))
def f_resid(self, p):
return self.y - self.model(self.X, p)[:,1]
def optim(self, p_quess):
return optimize.leastsq(self.f_resid, p_guess) # fit params
po = Parameterize_ODE(); p_guess = [0.2, 0.3]
c, kvg = po.optim(p_guess)
# --- show ---
print "parameter values are ", c, kvg
x = np.linspace(min(po.X), max(po.X), 2000)
pp.plot(po.X, po.y,'.r',x, po.model(x, c)[:,1],'-b')
pp.xlabel('X',{"fontsize":16}); pp.ylabel("y",{"fontsize":16}); pp.legend(('data','fit'),loc=0); pp.show()