Python - curve fitting of more complex function - python

I wish to find the equation of the curve of best fit of the following graph:
Which has the equation in the form of:
I've attempted to find examples of curve fitting with numpy here and here, but they all only show how to plot only exponential or only sinusoidal, but I'd like to plot a graph combining the two functions.
How would I do this?

Here's one approach you might find useful. This uses lmfit (http://lmfit.github.io/lmfit-py/), which provides a high-level approach to curve fitting:
import numpy as np
import matplotlib.pyplot as plt
from lmfit import Model
def decay_cosine(t, amp, beta, omega, phi):
"""model data as decaying cosine wave"""
return amp * np.exp(-beta*t)* np.cos(omega*t + phi)
# create fake data to be fitted
t = np.linspace(0, 5, 101)
y = decay_cosine(t, 1.4, 0.9, 7.2, 0.23) + np.random.normal(size=len(t), scale=0.05)
# build model from decay_cosine
mod = Model(decay_cosine)
# create parameters, giving initial values
params = mod.make_params(amp=2.0, beta=0.5, omega=5, phi=0)
# you can place bounds on parameters:
params['phi'].max = np.pi/2
params['phi'].min = -np.pi/2
params['amp'].min = 0
# fit data to model
result = mod.fit(y, params, t=t)
# print out fit results
print(result.fit_report())
# plot data with best fit
plt.plot(t, y, 'bo', label='data')
plt.plot(t, result.best_fit, 'r')
plt.show()
This will print out a report like this:
[[Model]]
Model(decay_cosine)
[[Fit Statistics]]
# fitting method = leastsq
# function evals = 46
# data points = 101
# variables = 4
chi-square = 0.25540159
reduced chi-square = 0.00263301
Akaike info crit = -595.983903
Bayesian info crit = -585.523421
[[Variables]]
amp: 1.38812335 +/- 0.03034640 (2.19%) (init = 2)
beta: 0.90760648 +/- 0.02820705 (3.11%) (init = 0.5)
omega: 7.16579292 +/- 0.02891827 (0.40%) (init = 5)
phi: 0.26249321 +/- 0.02225816 (8.48%) (init = 0)
[[Correlations]] (unreported correlations are < 0.100)
C(omega, phi) = -0.713
C(amp, beta) = 0.695
C(amp, phi) = 0.253
C(amp, omega) = -0.183
C(beta, phi) = 0.178
C(beta, omega) = -0.128
and produce a plot like this:

Here is a quite simple example using curve_fit and leastsq from scipy.optimize.
1. Setting parameter values, model and experimental data.
import numpy as np
import scipy.optimize
import matplotlib.pyplot as plt
np.random.seed(0) # choosing seed for reproducibility
# ==== theoretical parameter values ====
x0 = 1
beta = .5
omega = 2*np.pi
phi = 0
params = x0, beta, omega, phi
# ==== model ====
def decay_cosine(t, x0, beta, omega, phi):
x = x0 * np.exp(-beta*t_data) * np.cos(omega*t_data + phi)
return x
# ==== generating experimental data ====
t_data = np.linspace(0, 5, num=80)
noise = .05 * np.random.randn(t_data.size)
x_data = decay_cosine(t_data, *params) + noise
2. Fitting.
# ==== fitting using curve_fit ====
params_cf, _ = scipy.optimize.curve_fit(decay_cosine, t_data, x_data)
# ==== fitting using leastsq ====
def residuals(args, t, x):
return x - decay_cosine(t, *args)
x0 = np.ones(len(params)) # initializing all params at one
params_lsq, _ = scipy.optimize.leastsq(residuals, x0, args=(t_data, x_data))
print(params_cf)
print(params_lsq)
array([ 1.04938794, 0.53877389, 6.30375113, -0.01850761])
array([ 1.04938796, 0.53877389, 6.30375103, -0.01850744])
3. Plotting.
plt.plot(t_data, x_data, '.', label='exp data')
plt.plot(t_data, decay_cosine(t_data, *params_cf), label='curve_fit')
plt.plot(t_data, decay_cosine(t_data, *params_lsq), '--', label='leastsq')
plt.legend()
plt.grid(True)
plt.show()

Related

lmfit not exploring parameter space

I'm trying to use lmfit to find the best fit parameters of a function for some random data using the Model and Parameters classes. However, it doesn't seem to be exploring the parameter space very much. It does ~10 function evaluations and then returns a terrible fit.
Here is the code:
import numpy as np
from lmfit.model import Model
from lmfit.parameter import Parameters
import matplotlib.pyplot as plt
def dip(x, loc, wid, dep):
"""Make a line with a dip in it"""
# Array of ones
y = np.ones_like(x)
# Define start and end points of dip
start = np.abs(x - (loc - (wid/2.))).argmin()
end = np.abs(x - (loc + (wid/2.))).argmin()
# Set depth of the dip
y[start:end] *= dep
return y
def fitter(x, loc, wid, dep, scatter=0.001, sigma=3):
"""Find the parameters of the dip function in random data"""
# Make the lmfit model
model = Model(dip)
# Make random data and print input values
rand_loc = abs(np.random.normal(loc, scale=0.02))
rand_wid = abs(np.random.normal(wid, scale=0.03))
rand_dep = abs(np.random.normal(dep, scale=0.005))
print('rand_loc: {}\nrand_wid: {}\nrand_dep: {}\n'.format(rand_loc, rand_wid, rand_dep))
data = dip(x, rand_loc, rand_wid, rand_dep) + np.random.normal(0, scatter, x.size)
# Make parameter ranges
params = Parameters()
params.add('loc', value=loc, min=x.min(), max=x.max())
params.add('wid', value=wid, min=0, max=x.max()-x.min())
params.add('dep', value=dep, min=scatter*10, max=0.8)
# Fit the data
result = model.fit(data, x=x, params)
print(result.fit_report())
# Plot it
plt.plot(x, data, 'bo')
plt.plot(x, result.init_fit, 'k--', label='initial fit')
plt.plot(x, result.best_fit, 'r-', label='best fit')
plt.legend(loc='best')
plt.show()
And then I run:
fitter(np.linspace(55707.97, 55708.1, 100), loc=55708.02, wid=0.04, dep=0.98)
Which returns (for example, since it's randomized data):
rand_loc: 55707.99659784677
rand_wid: 0.02015076619874132
rand_dep: 0.9849809461153651
[[Model]]
Model(dip)
[[Fit Statistics]]
# fitting method = leastsq
# function evals = 9
# data points = 100
# variables = 3
chi-square = 0.00336780
reduced chi-square = 3.4720e-05
Akaike info crit = -1023.86668
Bayesian info crit = -1016.05117
## Warning: uncertainties could not be estimated:
loc: at initial value
wid: at initial value
[[Variables]]
loc: 55708.0200 (init = 55708.02)
wid: 0.04000000 (init = 0.04)
dep: 0.99754082 (init = 0.98)
Any idea why it executes so few function evaluations returning a bad fit? Any assistance with this would be greatly appreciated!
This is a similar question to fitting step function with variation in the step location with scipy optimize curve_fit. See https://stackoverflow.com/a/59504874/5179748.
Basically, the solvers in scipy.optimize/lmfit assume that parameters are continuous -- not discrete -- variables. They make small changes to the parameters to see what change that makes in the result. A small change in your loc and wid parameters will have no effect on the result, as argmin() will always return an integer value.
You might find that using a Rectangle Model with a finite width (see https://lmfit.github.io/lmfit-py/builtin_models.html#rectanglemodel) will be helpful. I changed your example a bit, but it should be enough to get you started:
import numpy as np
import matplotlib.pyplot as plt
from lmfit.models import RectangleModel, ConstantModel
def dip(x, loc, wid, dep):
"""Make a line with a dip in it"""
# Array of ones
y = np.ones_like(x)
# Define start and end points of dip
start = np.abs(x - (loc - (wid/2.))).argmin()
end = np.abs(x - (loc + (wid/2.))).argmin()
# Set depth of the dip
y[start:end] *= dep
return y
x = np.linspace(0, 1, 201)
data = dip(x, 0.3, 0.09, 0.98) + np.random.normal(0, 0.001, x.size)
model = RectangleModel() + ConstantModel()
params = model.make_params(c=1.0, amplitude=-0.01, center1=.100, center2=0.7, sigma1=0.15)
params['sigma2'].expr = 'sigma1' # force left and right widths to be the same size
params['c'].vary = False # force offset = 1.0 : value away from "dip"
result = model.fit(data, params, x=x)
print(result.fit_report())
plt.plot(x, data, 'bo')
plt.plot(x, result.init_fit, 'k--', label='initial fit')
plt.plot(x, result.best_fit, 'r-', label='best fit')
plt.legend(loc='best')
plt.show()

Why doesn't minimizing squared errors with scipy's minimize give the same result as using curve_fit?

As an example, here's my code for fitting multiexponential decays with a monoexponential decay (this doesn't produce a good fit, but it still works as an example):
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import minimize, curve_fit
lifetimes = [1e-9, 2e-9, 4e-9, 8e-9]
amplitudes = [1000, 1000, 1000, 1000]
background = 10
t = np.arange(1024)*1e-10
y = np.zeros(len(t))
for i in range(len(lifetimes)):
y += amplitudes[i] * np.exp(-t/lifetimes[i])
y += np.random.poisson(background, len(y))
def fit_fun(t, amplitude, lifetime, background):
return amplitude * np.exp(-t/lifetime) + background
def loss_fun(params, x, y, fit_fun, c=5):
fit_y = fit_fun(x, *params)
residuals = y - fit_y
loss = np.sum(residuals**2)
return loss
p0 = [1000, 6e-9, 10]
result = minimize(loss_fun, p0, args=(t, y, fit_fun))
params_minimize = result.x
minimize_y = fit_fun(t, *params_minimize)
params_fit, _ = curve_fit(fit_fun, t, y, p0)
fit_y = fit_fun(t, *params_fit)
plt.semilogy(t, y)
plt.semilogy(t, minimize_y)
plt.semilogy(t, fit_y)
plt.ylim([1, plt.ylim()[1]])
plt.show()
Here are the resulting fits (Green was fitted with curve_fit and orange with minimize).
So, why doesn't using minimize work properly?
Also, the reason I'm doing this is that I want to implement a loss function other than least squares. If it isn't possible this way, how could I do that?

scipy curve_fit raises "OptimizeWarning: Covariance of the parameters could not be estimated"

I am trying to fit this function to some data:
But when I use my code
import numpy as np
from scipy.optimize import curve_fit
import matplotlib.pyplot as plt
def f(x, start, end):
res = np.empty_like(x)
res[x < start] =-1
res[x > end] = 1
linear = np.all([[start <= x], [x <= end]], axis=0)[0]
res[linear] = np.linspace(-1., 1., num=np.sum(linear))
return res
if __name__ == '__main__':
xdata = np.linspace(0., 1000., 1000)
ydata = -np.ones(1000)
ydata[500:1000] = 1.
ydata = ydata + np.random.normal(0., 0.25, len(ydata))
popt, pcov = curve_fit(f, xdata, ydata, p0=[495., 505.])
print(popt, pcov)
plt.figure()
plt.plot(xdata, f(xdata, *popt), 'r-', label='fit')
plt.plot(xdata, ydata, 'b-', label='data')
plt.show()
I get the error
OptimizeWarning: Covariance of the parameters could not be estimated
Output:
In this example start and end should be closer to 500, but they dont change at all from my initial guess.
The warning (not error) of
OptimizeWarning: Covariance of the parameters could not be estimated
means that the fit could not determine the uncertainties (variance) of the fitting parameters.
The main problem is that your model function f treats the parameters start and end as discrete values -- they are used as integer locations for the change in functional form. scipy's curve_fit (and all other optimization routines in scipy.optimize) assume that parameters are continuous variables, not discrete.
The fitting procedure will try to take small steps (typically around machine precision) in the parameters to get a numerical derivative of the residual with respect to the variables (the Jacobian). With values used as discrete variables, these derivatives will be zero and the fitting procedure will not know how to change the values to improve the fit.
It looks like you're trying to fit a step function to some data. Allow me to recommend trying lmfit (https://lmfit.github.io/lmfit-py) which provides a higher-level interface to curve fitting, and has many built-in models. For example, it includes a StepModel that should be able to model your data.
For a slight modification of your data (so that it has a finite step), the following script with lmfit can fit such data:
#!/usr/bin/python
import numpy as np
from lmfit.models import StepModel, LinearModel
import matplotlib.pyplot as plt
np.random.seed(0)
xdata = np.linspace(0., 1000., 1000)
ydata = -np.ones(1000)
ydata[500:1000] = 1.
# note that a linear step is added here:
ydata[490:510] = -1 + np.arange(20)/10.0
ydata = ydata + np.random.normal(size=len(xdata), scale=0.1)
# model data as Step + Line
step_mod = StepModel(form='linear', prefix='step_')
line_mod = LinearModel(prefix='line_')
model = step_mod + line_mod
# make named parameters, giving initial values:
pars = model.make_params(line_intercept=ydata.min(),
line_slope=0,
step_center=xdata.mean(),
step_amplitude=ydata.std(),
step_sigma=2.0)
# fit data to this model with these parameters
out = model.fit(ydata, pars, x=xdata)
# print results
print(out.fit_report())
# plot data and best-fit
plt.plot(xdata, ydata, 'b')
plt.plot(xdata, out.best_fit, 'r-')
plt.show()
which prints out a report of
[[Model]]
(Model(step, prefix='step_', form='linear') + Model(linear, prefix='line_'))
[[Fit Statistics]]
# fitting method = leastsq
# function evals = 49
# data points = 1000
# variables = 5
chi-square = 9.72660131
reduced chi-square = 0.00977548
Akaike info crit = -4622.89074
Bayesian info crit = -4598.35197
[[Variables]]
step_sigma: 20.6227793 +/- 0.77214167 (3.74%) (init = 2)
step_center: 490.167878 +/- 0.44804412 (0.09%) (init = 500)
step_amplitude: 1.98946656 +/- 0.01304854 (0.66%) (init = 0.996283)
line_intercept: -1.00628058 +/- 0.00706005 (0.70%) (init = -1.277259)
line_slope: 1.3947e-05 +/- 2.2340e-05 (160.18%) (init = 0)
[[Correlations]] (unreported correlations are < 0.100)
C(step_amplitude, line_slope) = -0.875
C(step_sigma, step_center) = -0.863
C(line_intercept, line_slope) = -0.774
C(step_amplitude, line_intercept) = 0.461
C(step_sigma, step_amplitude) = 0.170
C(step_sigma, line_slope) = -0.147
C(step_center, step_amplitude) = -0.146
C(step_center, line_slope) = 0.127
and produces a plot of
Lmfit has lots of extra features. For example, if you want to set bounds on some of the parameter values or fix some from varying, you can do the following:
# make named parameters, giving initial values:
pars = model.make_params(line_intercept=ydata.min(),
line_slope=0,
step_center=xdata.mean(),
step_amplitude=ydata.std(),
step_sigma=2.0)
# now set max and min values for step amplitude"
pars['step_amplitude'].min = 0
pars['step_amplitude'].max = 100
# fix the offset of the line to be -1.0
pars['line_offset'].value = -1.0
pars['line_offset'].vary = False
# then run fit with these parameters
out = model.fit(ydata, pars, x=xdata)
If you know the model should be Step+Constant and that the constant should be fixed, you could also modify the model to be
from lmfit.models import ConstantModel
# model data as Step + Constant
step_mod = StepModel(form='linear', prefix='step_')
const_mod = ConstantModel(prefix='const_')
model = step_mod + const_mod
pars = model.make_params(const_c=-1,
step_center=xdata.mean(),
step_amplitude=ydata.std(),
step_sigma=2.0)
pars['const_c'].vary = False

Integration of a Gaussian function to count the number of particle under area

I need to count the number of particle under the fitted Gaussian curve. The area of the fitted curve can be found by integrating the function within the limit (mean-3*sigma) to (mean+3*sigma). Would you please help me to solve this. Thanks for your kind consideration.
import pylab as py
import numpy as np
from scipy import optimize
from scipy.stats import stats
import matplotlib.pyplot as plt
import pandas as pd
BackPFT='T067.csv'
df_180 = pd.read_csv(BackPFT, error_bad_lines=False, header=1)
x_180=df_180.iloc[:,3]
y_180=df_180.iloc[:,4]
#want to plot the distribution of s calculated by the following equation
s=np.sqrt((((16*x_180**2*38.22**2)/((4*38.22**2-y_180**2)**2))+1))-1
#Shape of this distribution is Gaussian
#I need to fit this distribution by following parameter
mean=0.433
sigma=0.014
draw=s
#Definition of bin number
bi=np.linspace(0.01,8, 1000)
data = py.hist(draw.dropna(), bins = bi)
#Definition of Gaussian function
def f(x, a, b, c):
return (a * py.exp(-(x - mean)**2.0 / (2 *sigma**2)))
x = [0.5 * (data[1][i] + data[1][i+1]) for i in xrange(len(data[1])-1)]
y = data[0]
#Fitting the peak of the distribution
popt, pcov = optimize.curve_fit(f, x, y)
chi2, p = stats.chisquare(popt)
x_fit = py.linspace(x[0], x[-1], 80000)
y_fit = f(x_fit, *popt)
plot(x_fit, y_fit, lw=3, color="r",ls="--")
plt.xlim(0,2)
plt.tick_params(axis='both', which='major', labelsize=20)
plt.show()
The problem is how to integrate the defined function (f) and count the number under the area. Here I attach the file T067.csv. Thanks in advance for your kind consideration.
BackPFT='T061.csv'
df_180 = pd.read_csv(BackPFT, skip_blank_lines=True ,skiprows=1,header=None,skipfooter=None,engine='python')
x_180=df_180.iloc[:,3]
y_180=df_180.iloc[:,4]
b=42.4
E=109.8
LET=24.19
REL=127.32
mean=0.339; m1=0.259
sigma=0.012; s1=0.015
s=np.sqrt((((16*x_180**2*b**2)/((4*b**2-y_180**2)**2))+1))-1
draw=s
bi=np.linspace(0,8, 2000)
binwidth=0.004
#I want to plot the dsitribution of s. This distribution has three gaussian peaks
data = py.hist(draw.dropna(), bins = bi,color='gray',)
#first Gaussian function for the first peak (peaks counted from the right)
def f(x, a, b, c):
return (a * py.exp(-(x - mean)**2.0 / (2 *sigma**2)))
# fitting the function (Gaussian)
x = [0.5 * (data[1][i] + data[1][i+1]) for i in xrange(len(data[1])-1)]
y = data[0]
popt, pcov = optimize.curve_fit(f, x, y)
chi, p = stats.chisquare(popt)
x_fit = py.linspace(x[0], x[-1], 80000)
y_fit = f(x_fit, *popt)
plot(x_fit, y_fit, lw=5, color="r",ls="--")
#integration of first function f
gaussF = lambda x, a: f(x, a, sigma, mean)
bins=((6*sigma)/(binwidth))
delta = ((mean+3*sigma) - (mean-3*sigma))/bins
f1 = lambda x : f(x, popt[0], sigma, mean)
result = quad(f1,mean-3*sigma,mean+3*sigma)
area = result[0] # this give the area after integration of the gaussian
numPar = area / delta # this gives the number of particle under the integrated area
print"\n\tArea under curve = ", area, "\n\tNumber of particel= ", numPar
The file T061.csv here. Thanks Dr. I Putu Susila for his kind co-operation and interest.

Curve fitting with conditional equation

Problem
I have created a curve fitting exercise (see functional code below), but I would like to add to the functionality.
I need to be able to define the following condition: slope at min(xdata) = 0.
(in words: I want the fitted curve to start out with horizontal gradient)
What I have tried
I have spent quite a bit of time researching scipy.optimize.curve_fit and evaluated other options (lmfit package, and scipy functions scipy.optimize.fmin_slsqp, scipy.optimize.minimize, etc.). lmfit only allows me to set a static condition on the parameters, such as p1 = 2 * p2 + 3. But it does not allow me to address min(xdata) dynamically, and I cannot make use of the derivate in the constraint.
Scipy only allows me to minimize the function (find an optimal x, but parameters p are already known). Or it can be used to define a specific range for the parameters. I was not able to define a second function that can be used to constrain the parameters during the curve fitting.
I need to be able to pass the condition directly to the curve fitting algorithm (rather than addressing the problem by bringing the condition into the cubic_fit() equation - it seems possible to eliminate e.g. p3 and define it as a combination of the other parameters and min(xdata)). My actual fitting function is much more complex and I need to run this script iteratively on a batch of data (varying min(xdata)). I cannot manually alter the fitting function each time...
I am grateful for any suggestions, maybe there are other packages out there that allow for a more complex definition of the curve fitting problem?
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats
import scipy.optimize
# generate dummy data - on which I will run a curve fit below
def cubic_fit_with_noise(x, p1, p2, p3, p4):
return p1 + p2*x + p3*x**2 + p4*x**3 + np.random.rand()
xdata = [x * 0.1 for x in range(0, 100)]
ydata = np.array( [cubic_fit_with_noise (x, 2, 0.4, -.2,0.02) for x in xdata] )
# now, run the curve-fit
# set up the fitting function:
def cubic_fit(x, p1, p2, p3, p4):
return p1 + p2*x + p3*x**2 + p4*x**3
# define starting point:
s1 = 2.5
s2 = 0.2
s3 = -.2
s4 = 0.02
# scipy curve fitting:
popt, pcov = scipy.optimize.curve_fit(cubic_fit, xdata, ydata, p0=(s1,s2,s3,s4))
y_modelled = np.array([cubic_fit(x, popt[0], popt[1], popt[2], popt[3]) for x in xdata])
print(popt) # prints out the 4 parameters p1,p2,p3,p4 defined in curve-fitting
plt.plot(xdata, ydata, 'bo')
plt.plot(xdata, y_modelled, 'r-')
plt.show()
The above code runs with Python3 (fix the print statement if you have Python2).
As an addition, I want to bring in the derivative:
def cubic_fit_derivative(x, p1, p2, p3, p4):
return p2 + 2.0 * p3 * x + 3 * p4 * x**2
and the constraint that cubic_fit_derivative(min(xdata), p1,p2,p3,p4) = 0.
Your condition that the derivative of your polynomial = 0 at xmin can be expressed as a simple constraint and means that the variables p2, p3, and p4 are not actually independent. The derivate condition is
p2 + 2*p3*xmin + 3*p4*xmin**2 = 0
where xmin is the minimum value of xdata. Furthermore, xmin will be known prior to the fit (if not necessarily when your script is written), you can use this to constrain one of the three parameters. Since xmin may be zero (in fact, it is for your case), the constraint should be that
p2 = - 2*p3*xmin - 3*p4*xmin**2
Using lmfit, the original, unconstrained fit would look like this (I cleaned it up a bit):
import numpy as np
from lmfit import Model
import matplotlib.pylab as plt
# the model function:
def cubic_poly(x, p1, p2, p3, p4):
return p1 + p2*x + p3*x**2 + p4*x**3
xdata = np.arange(100) * 0.1
ydata = cubic_poly(xdata, 2, 0.4, -.2, 0.02)
ydata = ydata + np.random.normal(size=len(xdata), scale=0.05)
# make Model, create parameters, run fit, print results
model = Model(cubic_poly)
params = model.make_params(p1=2.5, p2=0.2, p3=-0.0, p4=0.0)
result = model.fit(ydata, params, x=xdata)
print(result.fit_report())
plt.plot(xdata, ydata, 'bo')
plt.plot(xdata, result.best_fit, 'r-')
plt.show()
which prints:
[[Model]]
Model(cubic_poly)
[[Fit Statistics]]
# function evals = 13
# data points = 100
# variables = 4
chi-square = 0.218
reduced chi-square = 0.002
Akaike info crit = -604.767
Bayesian info crit = -594.347
[[Variables]]
p1: 2.00924432 +/- 0.018375 (0.91%) (init= 2.5)
p2: 0.39427207 +/- 0.016155 (4.10%) (init= 0.2)
p3: -0.19902928 +/- 0.003802 (1.91%) (init=-0)
p4: 0.01993319 +/- 0.000252 (1.27%) (init= 0)
[[Correlations]] (unreported correlations are < 0.100)
C(p3, p4) = -0.986
C(p2, p3) = -0.967
C(p2, p4) = 0.914
C(p1, p2) = -0.857
C(p1, p3) = 0.732
C(p1, p4) = -0.646
and produces a plot of
Now, to add your constraint condition, we will add xmin as a fixed parameter, and constrain p2 as above, replace the above with:
params = model.make_params(p1=2.5, p2=0.2, p3=-0.0, p4=0.0)
# add an extra parameter for `xmin`
params.add('xmin', min(xdata), vary=False)
# constrain p2 so that the derivative is 0 at xmin
params['p2'].expr = '-2*p3*xmin - 3*p4*xmin**2'
result = model.fit(ydata, params, x=xdata)
print(result.fit_report())
plt.plot(xdata, ydata, 'bo')
plt.plot(xdata, result.best_fit, 'r-')
plt.show()
which now prints
[[Model]]
Model(cubic_poly)
[[Fit Statistics]]
# function evals = 10
# data points = 100
# variables = 3
chi-square = 1.329
reduced chi-square = 0.014
Akaike info crit = -426.056
Bayesian info crit = -418.241
[[Variables]]
p1: 2.39001759 +/- 0.023239 (0.97%) (init= 2.5)
p2: 0 +/- 0 (nan%) == '-2*p3*xmin - 3*p4*xmin**2'
p3: -0.10858258 +/- 0.002372 (2.19%) (init=-0)
p4: 0.01424411 +/- 0.000251 (1.76%) (init= 0)
xmin: 0 (fixed)
[[Correlations]] (unreported correlations are < 0.100)
C(p3, p4) = -0.986
C(p1, p3) = -0.742
C(p1, p4) = 0.658
and a plot like
If xmin had not been zero (say, xdata = np.linspace(-10, 10, 101), the value and uncertainty of p2 would not be zero.
As mentioned in my comment, you just have to fit the right function. I forgot the constant, though. So the function would be a*(x-xmin)**2*(x-xn)+c
As curvefit does not take additional parameters as would e.g. leatssq, the only trick is to pass xmin. I do that by a global variable (Maybe not the nicest way, but it works. Comments on how to do it better are welcome).
Eventually, you just need to add the following lines to your code:
def cubic_zero(x,a,xn,const):
global xmin
return (a*(x-xmin)**2*(x-xn)+const)
and
xmin=xdata[0]
popt2, pcov2 = scipy.optimize.curve_fit(cubic_zero, xdata, ydata)
y_modelled2 = np.array([cubic_zero(x, *popt2) for x in xdata])
print(popt2)
plt.plot(xdata, y_modelled2, color='#ee9900',linestyle="--")
providing
>>>[ 0.01429367 7.63190327 2.92604132]
and
This solution uses scipy.optimize.leastsq. Using the self made residuals function, there is actually no need to pass xmin as additional parameter to the fit. The fit function is as in the other post and therefore has no necessity for constraints. This looks like:
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import leastsq
def cubic_fit_with_noise(x, p1, p2, p3, p4):
return p1 + p2*x + p3*x**2 + p4*x**3 + .2*(1-2*np.random.rand())
def cubic_zero(x,a,xn,const, xmin):
return (a*(x-xmin)**2*(x-xn)+const)
def residuals(params, dataX,dataY):
a,xn,const=params
xmin=dataX[0]
dist=np.fromiter( (y-cubic_zero(x,a,xn,const, xmin) for x,y in zip(dataX,dataY)), np.float)
return dist
xdata = np.linspace(.5,10.5,100)
ydata = np.fromiter( (cubic_fit_with_noise (x, 2, 0.4, -.2,0.02) for x in xdata), np.float )
# scipy curve fitting with leastsq:
initialGuess=[.3,.3,.3]
popt2, pcov2, info2, msg2, ier2 = leastsq(residuals,initialGuess, args=(xdata, ydata), full_output=True)
fullparams=np.append(popt2,xdata[0])
y_modelled2 = np.array([cubic_zero(x, *fullparams) for x in xdata])
print(popt2)
print(pcov2)
print np.array([ -popt2[0]*xdata[0]**2*popt2[1]+popt2[2],popt2[0]*(xdata[0]**2+2*xdata[0]*popt2[1]),-popt2[0]*(2*xdata[0]+popt2[1]),popt2[0] ])
plt.plot(xdata, ydata, 'bo')
plt.plot(xdata, y_modelled2, 'r-')
plt.show()
and provides:
>>>[ 0.01710749 7.69369653 2.38986378]
>>>[[ 4.33308441e-06 5.61402017e-04 2.71819763e-04]
[ 5.61402017e-04 1.10367937e-01 5.67852980e-02]
[ 2.71819763e-04 5.67852980e-02 3.94127702e-02]]
>>>[ 2.35695882 0.13589672 -0.14872733 0.01710749]
Image upload does not work at the moment ... for whatever reason but result is the same as in the other post

Categories