Gaussian fit with consideration of uncertainties - python

I'm having trouble understandig what is wrong with the following piece of code:
import numpy as np
import matplotlib.pyplot as plt
from scipy.odr import *
def gauss(p,x):
return p[0]*np.exp(-(x-p[1])**2/(2*p[2]**2)+p[4]) + p[3]
# Create a model for fitting.
gg = Model(gauss)
x = np.arange(0, 350)
# Create a RealData object using our initiated data from above.
data = RealData(x, y_data, sx=0, sy=y_data_err)
# Set up ODR with the model and data.
odr = ODR(data, gg, beta0=[0.1, 1., 1.0, 1.0, 1.0])
# Run the regression.
out = odr.run()
# Use the in-built pprint method to give us results.
out.pprint()
x_fit = np.linspace(x[0], x[-1], 1000)
y_fit = gauss(out.beta, x_fit)
plt.figure()
plt.errorbar(x, xy_data xerr=0, yerr=y_data_err, linestyle='None', marker='x')
plt.plot(x_fit, y_fit)
plt.show()
This was straight up copied from here with only changing the model. The error that I get is
scipy.odr.odrpack.odr_error: number of observations do not match
But as far as I can tell beta0 has five parameters, which is exactly as many as gauss needs to work. Would be great if someone could point to the error-source or my misconception.

Here is a graphing fitter with your equation, comparing both ODR and curve_fit on one graph. The example uses scipy's differential_evolution genetic algorithm module to determine initial parameter estimates for the solvers, and that module implements the Latin Hypercube algorithm to ensure a thorough search of parameter space which requires bounds within which to search. In this example, those bounds are taken from the data maximum and minimum values. As your post did not include data, I have used my own test data in the example. In this example the two fitted curves look very similar, diverging slightly at the plotted extremes.
import numpy, scipy, matplotlib
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
import scipy.odr
from scipy.optimize import differential_evolution
import warnings
xData = numpy.array([1.1, 2.2, 3.3, 4.4, 5.0, 6.6, 7.7, 0.0])
yData = numpy.array([1.1, 20.2, 30.3, 40.4, 50.0, 60.6, 70.7, 0.1])
def func(x, a, b, c, d, offset): # curve fitting function for curve_fit()
return a*numpy.exp(-(x-b)**2/(2*c**2)+d) + offset
def func_wrapper_for_ODR(parameters, x): # parameter order for ODR
return func(x, *parameters)
# function for genetic algorithm to minimize (sum of squared error)
def sumOfSquaredError(parameterTuple):
warnings.filterwarnings("ignore") # do not print warnings by genetic algorithm
val = func(xData, *parameterTuple)
return numpy.sum((yData - val) ** 2.0)
def generate_Initial_Parameters():
# min and max used for bounds
maxX = max(xData)
minX = min(xData)
maxY = max(yData)
minY = min(yData)
parameterBounds = []
parameterBounds.append([minY, maxY]) # search bounds for a
parameterBounds.append([minX, maxX]) # search bounds for b
parameterBounds.append([minX, maxX]) # search bounds for c
parameterBounds.append([minY, maxY]) # search bounds for d
parameterBounds.append([0.0, maxY]) # search bounds for Offset
# "seed" the numpy random number generator for repeatable results
result = differential_evolution(sumOfSquaredError, parameterBounds, seed=3)
return result.x
geneticParameters = generate_Initial_Parameters()
##########################
# curve_fit section
##########################
fittedParameters_curvefit, pcov = curve_fit(func, xData, yData, geneticParameters)
print('Fitted parameters curve_fit:', fittedParameters_curvefit)
print()
modelPredictions_curvefit = func(xData, *fittedParameters_curvefit)
absError_curvefit = modelPredictions_curvefit - yData
SE_curvefit = numpy.square(absError_curvefit) # squared errors
MSE_curvefit = numpy.mean(SE_curvefit) # mean squared errors
RMSE_curvefit = numpy.sqrt(MSE_curvefit) # Root Mean Squared Error, RMSE
Rsquared_curvefit = 1.0 - (numpy.var(absError_curvefit) / numpy.var(yData))
print()
print('RMSE curve_fit:', RMSE_curvefit)
print('R-squared curve_fit:', Rsquared_curvefit)
print()
##########################
# ODR section
##########################
data = scipy.odr.odrpack.Data(xData,yData)
model = scipy.odr.odrpack.Model(func_wrapper_for_ODR)
odr = scipy.odr.odrpack.ODR(data, model, beta0=geneticParameters)
# Run the regression.
odr_out = odr.run()
print('Fitted parameters ODR:', odr_out.beta)
print()
modelPredictions_odr = func(xData, *odr_out.beta)
absError_odr = modelPredictions_odr - yData
SE_odr = numpy.square(absError_odr) # squared errors
MSE_odr = numpy.mean(SE_odr) # mean squared errors
RMSE_odr = numpy.sqrt(MSE_odr) # Root Mean Squared Error, RMSE
Rsquared_odr = 1.0 - (numpy.var(absError_odr) / numpy.var(yData))
print()
print('RMSE ODR:', RMSE_odr)
print('R-squared ODR:', Rsquared_odr)
print()
##########################################################
# graphics output section
def ModelsAndScatterPlot(graphWidth, graphHeight):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
axes = f.add_subplot(111)
# first the raw data as a scatter plot
axes.plot(xData, yData, 'D')
# create data for the fitted equation plots
xModel = numpy.linspace(min(xData), max(xData))
yModel_curvefit = func(xModel, *fittedParameters_curvefit)
yModel_odr = func(xModel, *odr_out.beta)
# now the models as line plots
axes.plot(xModel, yModel_curvefit)
axes.plot(xModel, yModel_odr)
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
plt.show()
plt.close('all') # clean up after using pyplot
graphWidth = 800
graphHeight = 600
ModelsAndScatterPlot(graphWidth, graphHeight)

Related

Results From Curve_Fit Are Off

I am trying to reproduce some results from a paper using non-linear parameter estimation, the problem, however, is that when I use curve_fit, all I get back is an array of 1's as opposed to anything close to the results I should be getting.
I have included a minimum working example to illustrate what I am getting and then also the actual results:
import pandas as pd
import numpy as np
from scipy.optimize import curve_fit
xdata = np.array([0.00, 27.01,84.15,134.66,178.74,217.00,250.20,279.06,304.24,
326.29,346.71,362.87,378.13,391.75,403.96,414.96])
ydata = np.array([0.00,440.00,933.00,1154.00,1226.00,1222.00,1185.00,
1134.00,1081.00,1031.00,984.00,942.00,904.00,870.00,840.00,814.00])
# Non-Linear Estimation Function
def func(V,A,d):
return A*V*exp(-1*d*V)
popt, pcov = curve_fit(func,xdata,ydata)
popt
array([1., 1.])
The actual results I should be getting are the following:
param = estimate (standard err)
A = 17.6 (0.132)
d = 5.27 x 10^-3 (2.61 x 10^-5)
Scipy;s curve_fit() routine uses all 1.0 values for initial parameter estimates if none are supplied. If curve_fit() cannot make any improvement on the initial parameter estimates, it will simply return them - which is why you get "fitted" parameter values of all 1.0. Here is a graphical Python fitter with your data and equation, using scipy's differential_evolution genetic algorithm module to supply initial parameter estimates for the non-linear fitter. That scipy module uses the Latin Hypercube algorithm to ensure a thorough search of parameter space, which requires bounds within which to search. In this example, those bounds are derived from the data max and min values. Note that it is much easier to supply ranges for the parameters than to give specific values.
import numpy, scipy, matplotlib
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from scipy.optimize import differential_evolution
import warnings
x = [0.00, 27.01,84.15,134.66,178.74,217.00,250.20,279.06,304.24,
326.29,346.71,362.87,378.13,391.75,403.96,414.96]
y = [0.00,440.00,933.00,1154.00,1226.00,1222.00,1185.00,
1134.00,1081.00,1031.00,984.00,942.00,904.00,870.00,840.00,814.00]
xData = numpy.array(x, dtype=float)
yData = numpy.array(y, dtype=float)
# Non-Linear Estimation Function
def func(V,A,d):
return A*V*numpy.exp(-1.0*d*V)
# function for genetic algorithm to minimize (sum of squared error)
def sumOfSquaredError(parameterTuple):
warnings.filterwarnings("ignore") # do not print warnings by genetic algorithm
val = func(xData, *parameterTuple)
return numpy.sum((yData - val) ** 2.0)
def generate_Initial_Parameters():
# min and max used for bounds
maxX = max(xData)
minX = min(xData)
#maxY = max(yData)
#minY = min(yData)
parameterBounds = []
parameterBounds.append([minX, maxX/10.0]) # search bounds for A
parameterBounds.append([minX, maxX/10.0]) # search bounds for d
# "seed" the numpy random number generator for repeatable results
result = differential_evolution(sumOfSquaredError, parameterBounds, seed=3)
return result.x
# by default, differential_evolution completes by calling curve_fit() using parameter bounds
geneticParameters = generate_Initial_Parameters()
# now call curve_fit without passing bounds from the genetic algorithm,
# just in case the best fit parameters are aoutside those bounds
fittedParameters, pcov = curve_fit(func, xData, yData, geneticParameters)
print('Fitted parameters:', fittedParameters)
print()
modelPredictions = func(xData, *fittedParameters)
absError = modelPredictions - yData
SE = numpy.square(absError) # squared errors
MSE = numpy.mean(SE) # mean squared errors
RMSE = numpy.sqrt(MSE) # Root Mean Squared Error, RMSE
Rsquared = 1.0 - (numpy.var(absError) / numpy.var(yData))
print()
print('RMSE:', RMSE)
print('R-squared:', Rsquared)
print()
##########################################################
# graphics output section
def ModelAndScatterPlot(graphWidth, graphHeight):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
axes = f.add_subplot(111)
# first the raw data as a scatter plot
axes.plot(xData, yData, 'D')
# create data for the fitted equation plot
xModel = numpy.linspace(min(xData), max(xData))
yModel = func(xModel, *fittedParameters)
# now the model as a line plot
axes.plot(xModel, yModel)
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
plt.show()
plt.close('all') # clean up after using pyplot
graphWidth = 800
graphHeight = 600
ModelAndScatterPlot(graphWidth, graphHeight)

Python scipy.optimise.curve_fit gives linear fit

I have come across a problem when playing with the parameters of the curve_fit from scipy. I have initially copied the code suggested by the docs. I then changed the equation slightly and it was fine, but having increased the np.linspace, the whole prediction ended up being a straight line. Any ideas?
import numpy as np
from scipy.optimize import curve_fit
import matplotlib.pyplot as plt
def f(x, a, b, c):
# This works fine on smaller numbers
return (a - c) * np.exp(-x / b) + c
xdata = np.linspace(60, 3060, 200)
ydata = f(xdata, 100, 400, 20)
# noise
np.random.seed(1729)
ydata = ydata + np.random.normal(size=xdata.size) * 0.2
# graph
fig, ax = plt.subplots()
plt.plot(xdata, ydata, marker="o")
pred, covar = curve_fit(f, xdata, ydata)
plt.plot(xdata, f(xdata, *pred), label="prediciton")
plt.show()
You may need to start with a better guess, The default initial guess (1.0, 1.0, 1.0) seems to be in the divergent region.
I use the initial guess p0 = (50,200,100) and it works
fig, ax = plt.subplots()
plt.plot(xdata, ydata, marker="o")
pred, covar = curve_fit(f, xdata, ydata, p0 = (50,200,100))
plt.plot(xdata, f(xdata, *pred), label="prediciton")
plt.show()
Here is example code using your data and equation, with the initial parameter estimates given by scipy's differential_evolution genetic algorithm module. That module uses the Latin Hypercube algorithm to ensure a thorough search of parameter space, which requires bounds within which to search. In this example those bounds are taken from the data maximum and minimum values. It is much easier to supply ranges for the initial parameter estimates rather than specific values.
import numpy, scipy, matplotlib
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from scipy.optimize import differential_evolution
import warnings
def func(x, a, b, c):
return (a - c) * numpy.exp(-x / b) + c
xData = numpy.linspace(60, 3060, 200)
yData = func(xData, 100, 400, 20)
# noise
numpy.random.seed(1729)
yData = yData + numpy.random.normal(size=xData.size) * 0.2
# function for genetic algorithm to minimize (sum of squared error)
def sumOfSquaredError(parameterTuple):
warnings.filterwarnings("ignore") # do not print warnings by genetic algorithm
val = func(xData, *parameterTuple)
return numpy.sum((yData - val) ** 2.0)
def generate_Initial_Parameters():
# min and max used for bounds
maxX = max(xData)
minX = min(xData)
maxY = max(yData)
minY = min(yData)
parameterBounds = []
parameterBounds.append([minY, maxY]) # search bounds for a
parameterBounds.append([minX, maxX]) # search bounds for b
parameterBounds.append([minY, maxY]) # search bounds for c
# "seed" the numpy random number generator for repeatable results
result = differential_evolution(sumOfSquaredError, parameterBounds, seed=3)
return result.x
# by default, differential_evolution completes by calling curve_fit() using parameter bounds
geneticParameters = generate_Initial_Parameters()
# now call curve_fit without passing bounds from the genetic algorithm,
# just in case the best fit parameters are aoutside those bounds
fittedParameters, pcov = curve_fit(func, xData, yData, geneticParameters)
print('Fitted parameters:', fittedParameters)
print()
modelPredictions = func(xData, *fittedParameters)
absError = modelPredictions - yData
SE = numpy.square(absError) # squared errors
MSE = numpy.mean(SE) # mean squared errors
RMSE = numpy.sqrt(MSE) # Root Mean Squared Error, RMSE
Rsquared = 1.0 - (numpy.var(absError) / numpy.var(yData))
print()
print('RMSE:', RMSE)
print('R-squared:', Rsquared)
print()
##########################################################
# graphics output section
def ModelAndScatterPlot(graphWidth, graphHeight):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
axes = f.add_subplot(111)
# first the raw data as a scatter plot
axes.plot(xData, yData, 'D')
# create data for the fitted equation plot
xModel = numpy.linspace(min(xData), max(xData))
yModel = func(xModel, *fittedParameters)
# now the model as a line plot
axes.plot(xModel, yModel)
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
plt.show()
plt.close('all') # clean up after using pyplot
graphWidth = 800
graphHeight = 600
ModelAndScatterPlot(graphWidth, graphHeight)
This is due to a limitation of Levenberg–Marquardt algorithm which curve_fit uses by default. The good way to use it is to provide some decent initial guess for parameters before optimize. In my experiense this is particularly important when optimizing exponential functions like your example. With such iterative algorithms as LM, the quality of your starting point determines that where the result will converge. The more parameters you have the more likely that your final result will converge to a completely unwanted curve. Overall the solution is finding a good initial guess somehow as other answers did.

Fit a Gaussian which must use the provided mean in python

Is there anyway to fit a Gaussian where I'm not just providing a suggestion or best guess for the mean, but it MUST take it and adjust the other parameters for this to work? I know this won't won't give me the best fit for the data, but that's not essential.
#BenedictWilkinsAI suggestion is the simplest way, write the equation with the fixed value replacing the mean. If however you would like to use a programmatic solution, here is a graphical Python fitter which allows both normal (pun intended) and fixed-mean Gaussian peak equation fitting.
When a fixed mean parameter value of 9.0 is used, the fit is visibly worse - as expected. Also, curve_fit() gives a warning that it cannot calculate the covariance matrix, since the mean parameter cannot vary.
import numpy, scipy, matplotlib
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
xData = numpy.array([5.357, 5.797, 5.936, 6.161, 6.697, 6.731, 6.775, 8.442, 9.861])
yData = numpy.array([0.376, 0.874, 1.049, 1.327, 2.054, 2.077, 2.138, 4.744, 7.104])
# normally fitted mean is 10.67571675
# set this value to None to fit normally, else
# set to the value of the fixed mean
fixedMean = 9.0
def func(x, a, b, c): # Gaussian peak equation
if fixedMean:
b = fixedMean
return a * numpy.exp(-0.5 * numpy.power((x-b) / c, 2.0))
# these are the same as the scipy defaults except for the fixed mean
if fixedMean:
initialParameters = numpy.array([1.0, fixedMean, 1.0])
else:
initialParameters = numpy.array([1.0, 1.0, 1.0])
# curve fit the test data
fittedParameters, pcov = curve_fit(func, xData, yData, p0=initialParameters)
modelPredictions = func(xData, *fittedParameters)
absError = modelPredictions - yData
SE = numpy.square(absError) # squared errors
MSE = numpy.mean(SE) # mean squared errors
RMSE = numpy.sqrt(MSE) # Root Mean Squared Error, RMSE
Rsquared = 1.0 - (numpy.var(absError) / numpy.var(yData))
print('Parameters:', fittedParameters)
print('RMSE:', RMSE)
print('R-squared:', Rsquared)
print()
##########################################################
# graphics output section
def ModelAndScatterPlot(graphWidth, graphHeight):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
axes = f.add_subplot(111)
# first the raw data as a scatter plot
axes.plot(xData, yData, 'D')
# create data for the fitted equation plot
xModel = numpy.linspace(min(xData), max(xData))
yModel = func(xModel, *fittedParameters)
# now the model as a line plot
axes.plot(xModel, yModel)
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
plt.show()
plt.close('all') # clean up after using pyplot
graphWidth = 800
graphHeight = 600
ModelAndScatterPlot(graphWidth, graphHeight)
You might consider using lmfit for this (disclosure: I am lead author), as it supports fixing or adding bounds to any parameter. Depending on your data, doing such a fit might be as simple as:
import numpy as np
import matplotlib.pyplot as plt
from lmfit.models import GaussianModel
# get x, y data from some source
data = np.loadtxt('somedatafile.dat')
xdata = data[:, 0]
ydata = data[:, 1]
# create Gaussian, set initial parameter values
model = GaussianModel()
parameters = model.make_params(amplitude=10, center=12, sigma=3)
# tell the center parameter to not vary in the fit
parameters['center'].vary = False
# run fit
result = model.fit(ydata, parameters, x=xdata)
# print fit statistics, parameter values and uncertainties
print(result.fit_report())
# make a simple plot of data + fit, with residual
result.plot()
plt.show()
There are options for other peak shapes and controlling parameter values and ranges.

TypeError when fitting curve

I'm trying to fit a curve to some data that I have but for some reason I just get the error "'numpy.float64' object cannot be interpreted as an integer" and I don't understand why or how to fix it. Would be grateful for some help, the code is below:
import numpy as np
import matplotlib.pyplot as plt
from scipy import optimize
mud=[0.0014700734999999996,
0.0011840320799999997,
0.0014232304799999995,
0.0008501509799999997,
0.0007235751599999999,
0.0005770661399999999,
0.0005581295999999999,
0.00028703807999999994,
0.00014850233999999998]
F=[0.5750972123893806,
0.5512177433628319,
0.5638906194690266,
0.5240915044247788,
0.5217873451327435,
0.5066008407079646,
0.5027256637168142,
0.4847113274336283,
0.46502123893805314]
fitfunc = lambda p, x: p[0]+p[1]*x # Target function
errfunc = lambda p, x, y: fitfunc(p, x) - y # Distance to the target function
p0 = [0.46,80,1] # Initial guess for the parameters
p1, success = optimize.leastsq(errfunc, p0[:], args=(mud, F))
m = np.linspace(max(mud),min(mud), 9)
ax = plot(mud,F,"b^")
ax3 = plot(m,fitfunc(p2,m),"g-")
Your problem is that your arguments, mud and F are lists, not arrays, which means that you cannot just multiply them with a number. Hence the error. If you define those parameters as np.ndarrays, it will work:
import numpy as np
import matplotlib.pyplot as plt
from scipy import optimize
mud=np.array([0.0014700734999999996,
0.0011840320799999997,
0.0014232304799999995,
0.0008501509799999997,
0.0007235751599999999,
0.0005770661399999999,
0.0005581295999999999,
0.00028703807999999994,
0.00014850233999999998])
F=np.array([0.5750972123893806,
0.5512177433628319,
0.5638906194690266,
0.5240915044247788,
0.5217873451327435,
0.5066008407079646,
0.5027256637168142,
0.4847113274336283,
0.46502123893805314])
fitfunc = lambda p, x: p[0]+p[1]*x # Target function
errfunc = lambda p, x, y: fitfunc(p, x) - y # Distance to the target function
p0 = [0.46,80,1] # Initial guess for the parameters
p1, success = optimize.leastsq(errfunc, p0[:], args=(mud, F))
print(p1, success)
gives
[ 0.46006301 76.7920086 1. ] 2
Here is a graphical fitter using the Van Deemter Chromatography equation, it gives a good fit to your data.
import numpy, scipy, matplotlib
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
# mud
xData=numpy.array([0.0014700734999999996,
0.0011840320799999997,
0.0014232304799999995,
0.0008501509799999997,
0.0007235751599999999,
0.0005770661399999999,
0.0005581295999999999,
0.00028703807999999994,
0.00014850233999999998])
# F
yData=numpy.array([0.5750972123893806,
0.5512177433628319,
0.5638906194690266,
0.5240915044247788,
0.5217873451327435,
0.5066008407079646,
0.5027256637168142,
0.4847113274336283,
0.46502123893805314])
def func(x, a, b, c): # Van Deemter chromatography equation
return a + b/x + c*x
# these are the same as the scipy defaults
initialParameters = numpy.array([1.0, 1.0, 1.0])
# curve fit the test data
fittedParameters, pcov = curve_fit(func, xData, yData, initialParameters)
modelPredictions = func(xData, *fittedParameters)
absError = modelPredictions - yData
SE = numpy.square(absError) # squared errors
MSE = numpy.mean(SE) # mean squared errors
RMSE = numpy.sqrt(MSE) # Root Mean Squared Error, RMSE
Rsquared = 1.0 - (numpy.var(absError) / numpy.var(yData))
print('Parameters:', fittedParameters)
print('RMSE:', RMSE)
print('R-squared:', Rsquared)
print()
##########################################################
# graphics output section
def ModelAndScatterPlot(graphWidth, graphHeight):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
axes = f.add_subplot(111)
# first the raw data as a scatter plot
axes.plot(xData, yData, 'D')
# create data for the fitted equation plot
xModel = numpy.linspace(min(xData), max(xData))
yModel = func(xModel, *fittedParameters)
# now the model as a line plot
axes.plot(xModel, yModel)
axes.set_xlabel('X Data (mud)') # X axis data label
axes.set_ylabel('Y Data (F)') # Y axis data label
plt.show()
plt.close('all') # clean up after using pyplot
graphWidth = 800
graphHeight = 600
ModelAndScatterPlot(graphWidth, graphHeight)

how to do exponential nonlinear regression in python

I am trying to do non-linear regression using the equation
y=ae^(-bT)
where T is temp with the data:
([26.67, 93.33, 148.89, 222.01, 315.56])
and y is the viscosity with the data:
([1.35, .085, .012, .0049, .00075])
the goal is to determine the value of a and b WITHOUT linearizing the equation also to plot the graph. so far one method ive tried is:
import matplotlib
matplotlib.use('Qt4Agg')
import matplotlib.pyplot as plt
import numpy as np
from scipy.optimize import curve_fit
def func(x, a, b):
return a*(np.exp(-b * x))
#data
temp = np.array([26.67, 93.33, 148.89, 222.01, 315.56])
Viscosity = np.array([1.35, .085, .012, .0049, .00075])
initialGuess=[200,1]
guessedFactors=[func(x,*initialGuess ) for x in temp]
#curve fit
popt,pcov = curve_fit(func, temp, Viscosity,initialGuess)
print (popt)
print (pcov)
tempCont=np.linspace(min(temp),max(temp),50)
fittedData=[func(x, *popt) for x in tempCont]
fig1 = plt.figure(1)
ax=fig1.add_subplot(1,1,1)
###the three sets of data to plot
ax.plot(temp,Viscosity,linestyle='',marker='o', color='r',label="data")
ax.plot(temp,guessedFactors,linestyle='',marker='^', color='b',label="initial guess")
###beautification
ax.legend(loc=0, title="graphs", fontsize=12)
ax.set_ylabel("Viscosity")
ax.set_xlabel("temp")
ax.grid()
ax.set_title("$\mathrm{curve}_\mathrm{fit}$")
###putting the covariance matrix nicely
tab= [['{:.2g}'.format(j) for j in i] for i in pcov]
the_table = plt.table(cellText=tab,
colWidths = [0.2]*3,
loc='upper right', bbox=[0.483, 0.35, 0.5, 0.25] )
plt.text(250,65,'covariance:',size=12)
###putting the plot
plt.show()
im pretty sure ive made it overly complicated and messed up.
Here is example code using your data and equation, with scipy's differential_evolution genetic algorithm used to determine initial parameter estimates for the non-linear fitter. The scipy implementation of Differential Evolution uses the Latin Hypercube algorithm to ensure a thorough search of parameter space, here I have given what I thought were ranges within which the fitted parameters should exist.
import numpy, scipy, matplotlib
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from scipy.optimize import differential_evolution
import warnings
xData = numpy.array([26.67, 93.33, 148.89, 222.01, 315.56])
yData = numpy.array([1.35, .085, .012, .0049, .00075])
def func(T, a, b):
return a * numpy.exp(-b*T)
# function for genetic algorithm to minimize (sum of squared error)
def sumOfSquaredError(parameterTuple):
warnings.filterwarnings("ignore") # do not print warnings by genetic algorithm
val = func(xData, *parameterTuple)
return numpy.sum((yData - val) ** 2.0)
def generate_Initial_Parameters():
parameterBounds = []
parameterBounds.append([0.0, 10.0]) # search bounds for a
parameterBounds.append([-1.0, 1.0]) # search bounds for b
# "seed" the numpy random number generator for repeatable results
result = differential_evolution(sumOfSquaredError, parameterBounds, seed=3)
return result.x
# by default, differential_evolution completes by calling curve_fit() using parameter bounds
geneticParameters = generate_Initial_Parameters()
# now call curve_fit without passing bounds from the genetic algorithm,
# just in case the best fit parameters are aoutside those bounds
fittedParameters, pcov = curve_fit(func, xData, yData, geneticParameters)
print('Fitted parameters:', fittedParameters)
print()
modelPredictions = func(xData, *fittedParameters)
absError = modelPredictions - yData
SE = numpy.square(absError) # squared errors
MSE = numpy.mean(SE) # mean squared errors
RMSE = numpy.sqrt(MSE) # Root Mean Squared Error, RMSE
Rsquared = 1.0 - (numpy.var(absError) / numpy.var(yData))
print()
print('RMSE:', RMSE)
print('R-squared:', Rsquared)
print()
##########################################################
# graphics output section
def ModelAndScatterPlot(graphWidth, graphHeight):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
axes = f.add_subplot(111)
# first the raw data as a scatter plot
axes.plot(xData, yData, 'D')
# create data for the fitted equation plot
xModel = numpy.linspace(min(xData), max(xData))
yModel = func(xModel, *fittedParameters)
# now the model as a line plot
axes.plot(xModel, yModel)
axes.set_xlabel('temp') # X axis data label
axes.set_ylabel('viscosity') # Y axis data label
plt.show()
plt.close('all') # clean up after using pyplot
graphWidth = 800
graphHeight = 600
ModelAndScatterPlot(graphWidth, graphHeight)

Categories