Related
I'm having trouble fitting a curve with only one parameter using scipy.opt.curve_fit:
import scipy.optimize as opt
import numpy as np
def func(T):
return 76.881324*np.exp((-L)/(8.314*T))
best_params, cov_matrix = opt.curve_fit(func, xdata = x, ydata = y, p0=[])
I have arrays of values, x (T in the below eq) and y (P) that I'm trying to fit to the equation
but it seems it wants func() to have more than one argument. How do I fix this?
Here is a graphical Python fitter using your equation with some test data. Replace the example data with your own and you should be done.
import numpy, scipy, matplotlib
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
xData = numpy.array([1.1, 2.2, 3.3, 4.4, 5.0, 6.6, 7.7])
yData = numpy.array([1.1, 20.2, 30.3, 60.4, 50.0, 60.6, 70.7])
def func(T, L):
return 76.881324*numpy.exp((-L)/(8.314*T))
# all "1.0" is the same as the scipy defaults
initialParameters = numpy.array([1.0])
# curve fit the test data
fittedParameters, pcov = curve_fit(func, xData, yData, initialParameters)
modelPredictions = func(xData, *fittedParameters)
absError = modelPredictions - yData
SE = numpy.square(absError) # squared errors
MSE = numpy.mean(SE) # mean squared errors
RMSE = numpy.sqrt(MSE) # Root Mean Squared Error, RMSE
Rsquared = 1.0 - (numpy.var(absError) / numpy.var(yData))
print('Parameters:', fittedParameters)
print('RMSE:', RMSE)
print('R-squared:', Rsquared)
print()
##########################################################
# graphics output section
def ModelAndScatterPlot(graphWidth, graphHeight):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
axes = f.add_subplot(111)
# first the raw data as a scatter plot
axes.plot(xData, yData, 'D')
# create data for the fitted equation plot
xModel = numpy.linspace(min(xData), max(xData))
yModel = func(xModel, *fittedParameters)
# now the model as a line plot
axes.plot(xModel, yModel)
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
plt.show()
plt.close('all') # clean up after using pyplot
graphWidth = 800
graphHeight = 600
ModelAndScatterPlot(graphWidth, graphHeight)
Edited: 31/7/19
I have a data set which has PGR (pasture growth rate) and Foo (amount of pasture) readings for 4 sites for about 6 weeks of the year. The relationship between PGR and Foo is inverse exponential.
What I would like to do is group the weeks into 3 lots. Weeks with a similar relationship between PGR and Foo would be together.
Group size doesn't have to be the same.
But weeks must be consecutive, ie
Group one - week 1 , week 2, week 3.
Group two - week 4.
Group three - week 5, week 6.
What I would like to do is create 3 regressions that optimise to reduce the sum of squares and at the same time optimise the week selection.
The example above would suggest that weeks 1 - 3 are similar, 4 is different to 3, and 5 & 6 are similar to each other but different to 4. (I want this grouping to happen automatically based on the regressions)
The code below is my attempt however it doesn't work (I have included it to help better explain what I am trying to do).
data = {'Week':[1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,6,6,6,6],
'PGR':[10,29,34.93,32,10,29,34.93,35,31,36,34.93,37,40,46,50,52,40,60,65,68,42,62,65,68],
'Foo': [20,45,102.28,66.79,25,50,90,75,50,75,90,130,50,75,90,130,30,60,105,150,35,60,110,140]}
df = pd.DataFrame(data)
def group(x):
a, b, c, e, f, g, h, i, j, w, z, y = x
#below defines the groups, I want z, y & w to be optimised when this function is solverd
#This determines which weeks are in which groups
group1 = df.loc[(df['Week'] == range(1,z))]
group2 = df.loc[(df['Week'] == range(z,y))]
group3 = df.loc[(df['Week'] == range(y,w))]
#Once the groups are defined this will extract Foo and PGR values for regressions
xm1 = group1['Foo'].to_numpy()
ym1 = group1['PGR'].to_numpy()
xm2 = group2['Foo'].to_numpy()
ym2 = group2['PGR'].to_numpy()
xm3 = group3['Foo'].to_numpy()
ym3 = group3['PGR'].to_numpy()
#These are the 3 regressions
y1 = a + b / xm1 + c * np.log(xm1)
SSE1 = (y1 - ym1)**2
y2 = e + f / xm2 + g * np.log(xm2)
SSE2 = (y2 - ym2) ** 2
y3 = h + i / xm3 + j * np.log(xm3)
SSE3 = (y3 - ym3) ** 2
return SSE1, SSE2, SSE3
#I now have the sum of squares for all the regressions, which I want to minimise
#Minimising can happen by selecting groups that are more similar or by changing the regression coefficients
def objective(x):
return np.sum(group(x))
x0 = np.zeros(12)
# bounds for a, b, c, e, f, g, h, i, j, w, z, y
bndspositive = (0,52)
bnds100 = (-100.0, 100.0)
no_bnds = (-1.0e10, 1.0e10)
bnds = (no_bnds, no_bnds, bnds100, no_bnds, no_bnds, bnds100, no_bnds, no_bnds, bnds100, bndspositive, bndspositive, bndspositive)
# optimise groups and regressions for best fit
solution = minimize(objective, x0, method=None, bounds=bnds)
# solution
x = solution.x
Hopefully this makes sense, thanks
Another approach is to fit the data as a 3D surface. My equation search turned up "Foo = a * PGR + b * week^2 + Offset" as a likely candidate equation, with parameters a = 2.90940013, b = -2.33138779, and Offset = -10.04234205 yielding RMSE = 22.02 and R-squared = 0.6338. Here is a graphical Python fitter with your data and this equation.
import numpy, scipy, scipy.optimize
import matplotlib
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm # to colormap 3D surfaces from blue to red
import matplotlib.pyplot as plt
graphWidth = 800 # units are pixels
graphHeight = 600 # units are pixels
# 3D contour plot lines
numberOfContourLines = 16
def SurfacePlot(func, data, fittedParameters):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
matplotlib.pyplot.grid(True)
axes = Axes3D(f)
x_data = data[0]
y_data = data[1]
z_data = data[2]
xModel = numpy.linspace(min(x_data), max(x_data), 20)
yModel = numpy.linspace(min(y_data), max(y_data), 20)
X, Y = numpy.meshgrid(xModel, yModel)
Z = func(numpy.array([X, Y]), *fittedParameters)
axes.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=cm.coolwarm, linewidth=1, antialiased=True)
axes.scatter(x_data, y_data, z_data) # show data along with plotted surface
axes.set_title('Surface Plot (click-drag with mouse)') # add a title for surface plot
axes.set_xlabel('Week') # X axis data label
axes.set_ylabel('PGR') # Y axis data label
axes.set_zlabel('Foo') # Z axis data label
plt.show()
plt.close('all') # clean up after using pyplot or else there can be memory and process problems
def ContourPlot(func, data, fittedParameters):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
axes = f.add_subplot(111)
x_data = data[0]
y_data = data[1]
z_data = data[2]
xModel = numpy.linspace(min(x_data), max(x_data), 20)
yModel = numpy.linspace(min(y_data), max(y_data), 20)
X, Y = numpy.meshgrid(xModel, yModel)
Z = func(numpy.array([X, Y]), *fittedParameters)
axes.plot(x_data, y_data, 'o')
axes.set_title('Contour Plot') # add a title for contour plot
axes.set_xlabel('Week') # X axis data label
axes.set_ylabel('PGR') # Y axis data label
CS = matplotlib.pyplot.contour(X, Y, Z, numberOfContourLines, colors='k')
matplotlib.pyplot.clabel(CS, inline=1, fontsize=10) # labels for contours
plt.show()
plt.close('all') # clean up after using pyplot or else there can be memory and process problems
def ScatterPlot(data):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
matplotlib.pyplot.grid(True)
axes = Axes3D(f)
x_data = data[0]
y_data = data[1]
z_data = data[2]
axes.scatter(x_data, y_data, z_data)
axes.set_title('Scatter Plot (click-drag with mouse)')
axes.set_xlabel('Week')
axes.set_ylabel('PGR')
axes.set_zlabel('Z Foo')
plt.show()
plt.close('all') # clean up after using pyplot or else there can be memory and process problems
def func(data, a, b, Offset):
x = data[0]
y = data[1]
return a*y + b*numpy.square(x) + Offset
if __name__ == "__main__":
week = [1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,6,6,6,6]
PGR = [10,29,34.93,32,10,29,34.93,35,31,36,34.93,37,40,46,50,52,40,60,65,68,42,62,65,68]
Foo = [20,45,102.28,66.79,25,50,90,75,50,75,90,130,50,75,90,130,30,60,105,150,35,60,110,140]
xData = numpy.array(week, dtype=numpy.float32)
yData = numpy.array(PGR, dtype=numpy.float32)
zData = numpy.array(Foo, dtype=numpy.float32)
data = [xData, yData, zData]
initialParameters = [1.0, 1.0, 1.0] # these are the same as scipy default values in this example
# here a non-linear surface fit is made with scipy's curve_fit()
fittedParameters, pcov = scipy.optimize.curve_fit(func, [xData, yData], zData, p0 = initialParameters)
ScatterPlot(data)
SurfacePlot(func, data, fittedParameters)
ContourPlot(func, data, fittedParameters)
print('fitted prameters', fittedParameters)
modelPredictions = func(data, *fittedParameters)
absError = modelPredictions - zData
SE = numpy.square(absError) # squared errors
MSE = numpy.mean(SE) # mean squared errors
RMSE = numpy.sqrt(MSE) # Root Mean Squared Error, RMSE
Rsquared = 1.0 - (numpy.var(absError) / numpy.var(zData))
print('RMSE:', RMSE)
print('R-squared:', Rsquared)
I am studying nonlinear curvefit with python.
I made example like below.
But the optimized plot is not drawn well
plt.plot(basketCont, fittedData)
I guess the optimized parametes are not good also.
Could you give some recommends? Thank you.
import matplotlib
matplotlib.use('Qt4Agg')
import matplotlib.pyplot as plt
from matplotlib.pyplot import cm
import numpy as np
from scipy.optimize import curve_fit
def func(x, a, b, c):
return a - b* np.exp(c * x)
baskets = np.array([475, 108, 2, 38, 320])
scaling_factor = np.array([95.5, 57.7, 1.4, 21.9, 88.8])
popt,pcov = curve_fit(func, baskets, scaling_factor)
print (popt)
print (pcov)
basketCont=np.linspace(min(baskets),max(baskets),50)
fittedData=[func(x, *popt) for x in basketCont]
fig1 = plt.figure(1)
plt.scatter(baskets, scaling_factor, s=5)
plt.plot(basketCont, fittedData)
plt.grid()
plt.show()
I personally could not get a good fit to your data using the equation you posted, however the Hill sigmoidal equation gave a good fit. Here is the Python code for the graphical fitter I used.
import numpy, scipy, matplotlib
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
import warnings
baskets = numpy.array([475.0, 108.0, 2.0, 38.0, 320.0])
scaling_factor = numpy.array([95.5, 57.7, 1.4, 21.9, 88.8])
# rename data for simpler code re-use later
xData = baskets
yData = scaling_factor
def func(x, a, b, c): # Hill sigmoidal equation from zunzun.com
return a * numpy.power(x, b) / (numpy.power(c, b) + numpy.power(x, b))
# these are the same as the scipy defaults
initialParameters = numpy.array([1.0, 1.0, 1.0])
# do not print unnecessary warnings during curve_fit()
warnings.filterwarnings("ignore")
# curve fit the test data
fittedParameters, pcov = curve_fit(func, xData, yData, initialParameters)
modelPredictions = func(xData, *fittedParameters)
absError = modelPredictions - yData
SE = numpy.square(absError) # squared errors
MSE = numpy.mean(SE) # mean squared errors
RMSE = numpy.sqrt(MSE) # Root Mean Squared Error, RMSE
Rsquared = 1.0 - (numpy.var(absError) / numpy.var(yData))
print('Parameters:', fittedParameters)
print('RMSE:', RMSE)
print('R-squared:', Rsquared)
print()
##########################################################
# graphics output section
def ModelAndScatterPlot(graphWidth, graphHeight):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
axes = f.add_subplot(111)
# first the raw data as a scatter plot
axes.plot(xData, yData, 'D')
# create data for the fitted equation plot
xModel = numpy.linspace(min(xData), max(xData))
yModel = func(xModel, *fittedParameters)
# now the model as a line plot
axes.plot(xModel, yModel)
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
plt.show()
plt.close('all') # clean up after using pyplot
graphWidth = 800
graphHeight = 600
ModelAndScatterPlot(graphWidth, graphHeight)
I'm trying to fit a curve to some data that I have but for some reason I just get the error "'numpy.float64' object cannot be interpreted as an integer" and I don't understand why or how to fix it. Would be grateful for some help, the code is below:
import numpy as np
import matplotlib.pyplot as plt
from scipy import optimize
mud=[0.0014700734999999996,
0.0011840320799999997,
0.0014232304799999995,
0.0008501509799999997,
0.0007235751599999999,
0.0005770661399999999,
0.0005581295999999999,
0.00028703807999999994,
0.00014850233999999998]
F=[0.5750972123893806,
0.5512177433628319,
0.5638906194690266,
0.5240915044247788,
0.5217873451327435,
0.5066008407079646,
0.5027256637168142,
0.4847113274336283,
0.46502123893805314]
fitfunc = lambda p, x: p[0]+p[1]*x # Target function
errfunc = lambda p, x, y: fitfunc(p, x) - y # Distance to the target function
p0 = [0.46,80,1] # Initial guess for the parameters
p1, success = optimize.leastsq(errfunc, p0[:], args=(mud, F))
m = np.linspace(max(mud),min(mud), 9)
ax = plot(mud,F,"b^")
ax3 = plot(m,fitfunc(p2,m),"g-")
Your problem is that your arguments, mud and F are lists, not arrays, which means that you cannot just multiply them with a number. Hence the error. If you define those parameters as np.ndarrays, it will work:
import numpy as np
import matplotlib.pyplot as plt
from scipy import optimize
mud=np.array([0.0014700734999999996,
0.0011840320799999997,
0.0014232304799999995,
0.0008501509799999997,
0.0007235751599999999,
0.0005770661399999999,
0.0005581295999999999,
0.00028703807999999994,
0.00014850233999999998])
F=np.array([0.5750972123893806,
0.5512177433628319,
0.5638906194690266,
0.5240915044247788,
0.5217873451327435,
0.5066008407079646,
0.5027256637168142,
0.4847113274336283,
0.46502123893805314])
fitfunc = lambda p, x: p[0]+p[1]*x # Target function
errfunc = lambda p, x, y: fitfunc(p, x) - y # Distance to the target function
p0 = [0.46,80,1] # Initial guess for the parameters
p1, success = optimize.leastsq(errfunc, p0[:], args=(mud, F))
print(p1, success)
gives
[ 0.46006301 76.7920086 1. ] 2
Here is a graphical fitter using the Van Deemter Chromatography equation, it gives a good fit to your data.
import numpy, scipy, matplotlib
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
# mud
xData=numpy.array([0.0014700734999999996,
0.0011840320799999997,
0.0014232304799999995,
0.0008501509799999997,
0.0007235751599999999,
0.0005770661399999999,
0.0005581295999999999,
0.00028703807999999994,
0.00014850233999999998])
# F
yData=numpy.array([0.5750972123893806,
0.5512177433628319,
0.5638906194690266,
0.5240915044247788,
0.5217873451327435,
0.5066008407079646,
0.5027256637168142,
0.4847113274336283,
0.46502123893805314])
def func(x, a, b, c): # Van Deemter chromatography equation
return a + b/x + c*x
# these are the same as the scipy defaults
initialParameters = numpy.array([1.0, 1.0, 1.0])
# curve fit the test data
fittedParameters, pcov = curve_fit(func, xData, yData, initialParameters)
modelPredictions = func(xData, *fittedParameters)
absError = modelPredictions - yData
SE = numpy.square(absError) # squared errors
MSE = numpy.mean(SE) # mean squared errors
RMSE = numpy.sqrt(MSE) # Root Mean Squared Error, RMSE
Rsquared = 1.0 - (numpy.var(absError) / numpy.var(yData))
print('Parameters:', fittedParameters)
print('RMSE:', RMSE)
print('R-squared:', Rsquared)
print()
##########################################################
# graphics output section
def ModelAndScatterPlot(graphWidth, graphHeight):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
axes = f.add_subplot(111)
# first the raw data as a scatter plot
axes.plot(xData, yData, 'D')
# create data for the fitted equation plot
xModel = numpy.linspace(min(xData), max(xData))
yModel = func(xModel, *fittedParameters)
# now the model as a line plot
axes.plot(xModel, yModel)
axes.set_xlabel('X Data (mud)') # X axis data label
axes.set_ylabel('Y Data (F)') # Y axis data label
plt.show()
plt.close('all') # clean up after using pyplot
graphWidth = 800
graphHeight = 600
ModelAndScatterPlot(graphWidth, graphHeight)
I have a function Polyfit which I want it to get the data here x and y and return a 2D line fitted to that data using linear regression. I get a good result but it's too good and I don't know if I am doing it correctly all the way to the end.
#creating the data and plotting them
np.random.seed(0)
N = 10 # number of data points
x = np.linspace(0,2*np.pi,N)
y = np.sin(x) + np.random.normal(0,.3,x.shape)
plt.figure()
plt.plot(x,y,'o')
plt.xlabel('x')
plt.ylabel('y')
plt.title('2D data (#data = %d)' % N)
plt.show()
def polyfit(x,y,degree,delta):
#x,y
X = np.vstack([np.ones(x.shape), x, y]).T
Y = np.vstack([y]).T
XtX = np.dot(X.T, X)
XtY = np.dot(X.T, Y)
theta = np.dot(np.linalg.inv(XtX), XtY)
degree = theta.shape[0]
delta = theta.T * theta
x_theta = X.T * theta
pred = np.sum([theta* x])
loss = np.dot((Y.T - x_theta).T, (Y.T - x_theta))
c = theta[0] + theta[1] * x[1] + theta[2] * math.pow(x[2],2)
return pred
result = polyfit(x,y,2,2)
fin = y - result
plt.plot(x, fin, 'go--')
Data image:
Result of the fitted line:
Here is a graphing example polynomial fitter using numpy's polyfit() for the fit and numpy's polyval() to calculate model predictions, along with RMSE and R-squared values.
import numpy, scipy, matplotlib
import matplotlib.pyplot as plt
xData = numpy.array([1.1, 2.2, 3.3, 4.4, 5.0, 6.6, 7.7, 0.0])
yData = numpy.array([1.1, 20.2, 30.3, 40.4, 50.0, 60.6, 70.7, 0.1])
polynomialOrder = 2 # example quadratic
# curve fit the test data
fittedParameters = numpy.polyfit(xData, yData, polynomialOrder)
print('Fitted Parameters:', fittedParameters)
modelPredictions = numpy.polyval(fittedParameters, xData)
absError = modelPredictions - yData
SE = numpy.square(absError) # squared errors
MSE = numpy.mean(SE) # mean squared errors
RMSE = numpy.sqrt(MSE) # Root Mean Squared Error, RMSE
Rsquared = 1.0 - (numpy.var(absError) / numpy.var(yData))
print('RMSE:', RMSE)
print('R-squared:', Rsquared)
print()
##########################################################
# graphics output section
def ModelAndScatterPlot(graphWidth, graphHeight):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
axes = f.add_subplot(111)
# first the raw data as a scatter plot
axes.plot(xData, yData, 'D')
# create data for the fitted equation plot
xModel = numpy.linspace(min(xData), max(xData))
yModel = numpy.polyval(fittedParameters, xModel)
# now the model as a line plot
axes.plot(xModel, yModel)
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
plt.show()
plt.close('all') # clean up after using pyplot
graphWidth = 800
graphHeight = 600
ModelAndScatterPlot(graphWidth, graphHeight)