How to get regression to optimise coefficients and data array? Python

How to get regression to optimise coefficients and data array? Python - python

Edited: 31/7/19
I have a data set which has PGR (pasture growth rate) and Foo (amount of pasture) readings for 4 sites for about 6 weeks of the year. The relationship between PGR and Foo is inverse exponential.
What I would like to do is group the weeks into 3 lots. Weeks with a similar relationship between PGR and Foo would be together.
Group size doesn't have to be the same.
But weeks must be consecutive, ie
Group one - week 1 , week 2, week 3.
Group two - week 4.
Group three - week 5, week 6.
What I would like to do is create 3 regressions that optimise to reduce the sum of squares and at the same time optimise the week selection.
The example above would suggest that weeks 1 - 3 are similar, 4 is different to 3, and 5 & 6 are similar to each other but different to 4. (I want this grouping to happen automatically based on the regressions)
The code below is my attempt however it doesn't work (I have included it to help better explain what I am trying to do).
data = {'Week':[1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,6,6,6,6],
'PGR':[10,29,34.93,32,10,29,34.93,35,31,36,34.93,37,40,46,50,52,40,60,65,68,42,62,65,68],
'Foo': [20,45,102.28,66.79,25,50,90,75,50,75,90,130,50,75,90,130,30,60,105,150,35,60,110,140]}
df = pd.DataFrame(data)
def group(x):
a, b, c, e, f, g, h, i, j, w, z, y = x
#below defines the groups, I want z, y & w to be optimised when this function is solverd
#This determines which weeks are in which groups
group1 = df.loc[(df['Week'] == range(1,z))]
group2 = df.loc[(df['Week'] == range(z,y))]
group3 = df.loc[(df['Week'] == range(y,w))]
#Once the groups are defined this will extract Foo and PGR values for regressions
xm1 = group1['Foo'].to_numpy()
ym1 = group1['PGR'].to_numpy()
xm2 = group2['Foo'].to_numpy()
ym2 = group2['PGR'].to_numpy()
xm3 = group3['Foo'].to_numpy()
ym3 = group3['PGR'].to_numpy()
#These are the 3 regressions
y1 = a + b / xm1 + c * np.log(xm1)
SSE1 = (y1 - ym1)**2
y2 = e + f / xm2 + g * np.log(xm2)
SSE2 = (y2 - ym2) ** 2
y3 = h + i / xm3 + j * np.log(xm3)
SSE3 = (y3 - ym3) ** 2
return SSE1, SSE2, SSE3
#I now have the sum of squares for all the regressions, which I want to minimise
#Minimising can happen by selecting groups that are more similar or by changing the regression coefficients
def objective(x):
return np.sum(group(x))
x0 = np.zeros(12)
# bounds for a, b, c, e, f, g, h, i, j, w, z, y
bndspositive = (0,52)
bnds100 = (-100.0, 100.0)
no_bnds = (-1.0e10, 1.0e10)
bnds = (no_bnds, no_bnds, bnds100, no_bnds, no_bnds, bnds100, no_bnds, no_bnds, bnds100, bndspositive, bndspositive, bndspositive)
# optimise groups and regressions for best fit
solution = minimize(objective, x0, method=None, bounds=bnds)
# solution
x = solution.x
Hopefully this makes sense, thanks

Another approach is to fit the data as a 3D surface. My equation search turned up "Foo = a * PGR + b * week^2 + Offset" as a likely candidate equation, with parameters a = 2.90940013, b = -2.33138779, and Offset = -10.04234205 yielding RMSE = 22.02 and R-squared = 0.6338. Here is a graphical Python fitter with your data and this equation.
import numpy, scipy, scipy.optimize
import matplotlib
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm # to colormap 3D surfaces from blue to red
import matplotlib.pyplot as plt
graphWidth = 800 # units are pixels
graphHeight = 600 # units are pixels
# 3D contour plot lines
numberOfContourLines = 16
def SurfacePlot(func, data, fittedParameters):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
matplotlib.pyplot.grid(True)
axes = Axes3D(f)
x_data = data[0]
y_data = data[1]
z_data = data[2]
xModel = numpy.linspace(min(x_data), max(x_data), 20)
yModel = numpy.linspace(min(y_data), max(y_data), 20)
X, Y = numpy.meshgrid(xModel, yModel)
Z = func(numpy.array([X, Y]), *fittedParameters)
axes.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=cm.coolwarm, linewidth=1, antialiased=True)
axes.scatter(x_data, y_data, z_data) # show data along with plotted surface
axes.set_title('Surface Plot (click-drag with mouse)') # add a title for surface plot
axes.set_xlabel('Week') # X axis data label
axes.set_ylabel('PGR') # Y axis data label
axes.set_zlabel('Foo') # Z axis data label
plt.show()
plt.close('all') # clean up after using pyplot or else there can be memory and process problems
def ContourPlot(func, data, fittedParameters):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
axes = f.add_subplot(111)
x_data = data[0]
y_data = data[1]
z_data = data[2]
xModel = numpy.linspace(min(x_data), max(x_data), 20)
yModel = numpy.linspace(min(y_data), max(y_data), 20)
X, Y = numpy.meshgrid(xModel, yModel)
Z = func(numpy.array([X, Y]), *fittedParameters)
axes.plot(x_data, y_data, 'o')
axes.set_title('Contour Plot') # add a title for contour plot
axes.set_xlabel('Week') # X axis data label
axes.set_ylabel('PGR') # Y axis data label
CS = matplotlib.pyplot.contour(X, Y, Z, numberOfContourLines, colors='k')
matplotlib.pyplot.clabel(CS, inline=1, fontsize=10) # labels for contours
plt.show()
plt.close('all') # clean up after using pyplot or else there can be memory and process problems
def ScatterPlot(data):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
matplotlib.pyplot.grid(True)
axes = Axes3D(f)
x_data = data[0]
y_data = data[1]
z_data = data[2]
axes.scatter(x_data, y_data, z_data)
axes.set_title('Scatter Plot (click-drag with mouse)')
axes.set_xlabel('Week')
axes.set_ylabel('PGR')
axes.set_zlabel('Z Foo')
plt.show()
plt.close('all') # clean up after using pyplot or else there can be memory and process problems
def func(data, a, b, Offset):
x = data[0]
y = data[1]
return a*y + b*numpy.square(x) + Offset
if __name__ == "__main__":
week = [1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,6,6,6,6]
PGR = [10,29,34.93,32,10,29,34.93,35,31,36,34.93,37,40,46,50,52,40,60,65,68,42,62,65,68]
Foo = [20,45,102.28,66.79,25,50,90,75,50,75,90,130,50,75,90,130,30,60,105,150,35,60,110,140]
xData = numpy.array(week, dtype=numpy.float32)
yData = numpy.array(PGR, dtype=numpy.float32)
zData = numpy.array(Foo, dtype=numpy.float32)
data = [xData, yData, zData]
initialParameters = [1.0, 1.0, 1.0] # these are the same as scipy default values in this example
# here a non-linear surface fit is made with scipy's curve_fit()
fittedParameters, pcov = scipy.optimize.curve_fit(func, [xData, yData], zData, p0 = initialParameters)
ScatterPlot(data)
SurfacePlot(func, data, fittedParameters)
ContourPlot(func, data, fittedParameters)
print('fitted prameters', fittedParameters)
modelPredictions = func(data, *fittedParameters)
absError = modelPredictions - zData
SE = numpy.square(absError) # squared errors
MSE = numpy.mean(SE) # mean squared errors
RMSE = numpy.sqrt(MSE) # Root Mean Squared Error, RMSE
Rsquared = 1.0 - (numpy.var(absError) / numpy.var(zData))
print('RMSE:', RMSE)
print('R-squared:', Rsquared)

Related

Curve fitting and smoothing using python for 3D data

I am currently using polynomial defined function to create a 3d curve fitting but to no avail.
image 1 scatter, image 2 curve fitting
code is given below:
#import excel data
"""
how can I improve this polynomial function,
is there any better methods instead of polynomial?
"""
def func(data, a, b, c, d):
x = data[0]
y = data[1]
z = data[2]
return a + b * x + c * y + d * x**2
# using curve fitting to pass the function
fittedParameters, pcov = scipy.optimize.curve_fit(
func, [xData, yData, zData],
zData, p0 = None, method= 'lm', maxfev=5000000
) #, p0 = None, maxfev=5000
# making mesh grid
# making meshgrid
xModel = numpy.linspace( min(x_data), max(x_data), 80) #min(x_data)
yModel = numpy.linspace( min(y_data), max(y_data), 80)
X, Y = numpy.meshgrid( xModel, yModel )
#popt = fittedparameters
a = fittedParameters[0]
b = fittedParameters[1]
c = fittedParameters[2]
d = fittedParameters[3]
x = X
y = Y
Z = a + b * x + c * y + d * x**2
axes.plot_surface(
X, Y, Z,
rstride=1, cstride=1, cmap=cm.coolwarm,
linewidth=1, antialiased=True
)
axes.scatter(x_data, y_data, z_data) # show data along with plotted surface
# add a title for surface plot
axes.set_title('Surface plot of LN(PoF) and length & depth')
axes.set_xlabel('Depth (mm)')
axes.set_ylabel('Length (mm)')
axes.set_zlabel('LN(PoF)') # Z axis data label
plt.show()
enter image description here

inbuild module
#%% splprep and splev for the 2D smoothing of x and y value
def splprep_2d(x,y):
tck, u = interpolate.splprep([x,y], s = 2,
task = 0,full_output=0,quiet = 0,
k = 5, t=None)
fittedParameters = interpolate.splev(u,tck)
xnew = np.array(fittedParameters[0])
ynew = np.array(fittedParameters[1])
return xnew, ynew
xnew, ynew = splprep_2d(x,y)
splprep_2d(x,y)
s = 2 is the smoothing factor, lower would result in accurate plot, using higher smoothing factor results in smoothed curve.
K = parabolic nature of the curve, upto 5th parabolic curve can be used.
These are your smoothed parameter:
xnew = np.array(fittedParameters[0])
ynew = np.array(fittedParameters[1])

Calculate root mean square of 3D deviation after surface fitting in python

My goal is to determine the 3D deviation (and its RMS) between a set of 3D data points and a fitted paraboloid in Python.
Starting from this: Paraboloid (3D parabola) surface fitting python, I can compute the RMS. If I understand correctly, the error and the RMS are computed along the Z-axis. Is it right?
I tried (without success) to determine the 3D deviation and RMS between the fitted surface and the data points, but I cannot get their.
Does anyone have some advices to solve this, please?
import numpy as np
from scipy.optimize import curve_fit
# Initial guess parameters
p0 = [1.5,0.4,1.5,0.4,1]
# INPUT DATA
x = [0.4,0.165,0.165,0.585,0.585]
y = [.45, .22, .63, .22, .63]
z = np.array([1, .99, .98,.97,.96])
# FIT
def paraBolEqn(data,a,b,c,d,e):
x,y = data
return -(((x-b)/a)**2+((y-d)/c)**2)+e
data = np.vstack((x,y))
popt, _ = curve_fit(paraBolEqn,data,z,p0)
# Deviation and RMS along Z axis
modelPredictions = paraBolEqn(data, *popt)
absError = modelPredictions - z
RMSE = np.sqrt(np.mean(np.square(absError))) # Root Mean Squared Error along Z axis
print('RMSE (along Z axis):', RMSE)
# Deviation and RMS in 3D
# ??

Here is a graphical Python surface fitter using your data and equation that draws a 3D scatter plot, a 3D surface plot, and a contour plot. You should be able to click-drag with the mouse and rotate the 3D plots in 3-space for visual inspection. Note that you have 5 data points and 5 equation parameters, so you get what is in effect a perfect fit - the RMSE is effectively zero, the R-squared is 1.0, and the scipy code gives a warning when calculating the covariance matrix.
import numpy, scipy, scipy.optimize
import matplotlib
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm # to colormap 3D surfaces from blue to red
import matplotlib.pyplot as plt
graphWidth = 800 # units are pixels
graphHeight = 600 # units are pixels
# 3D contour plot lines
numberOfContourLines = 16
x = [0.4,0.165,0.165,0.585,0.585]
y = [.45, .22, .63, .22, .63]
z = [1, .99, .98,.97,.96]
# alias data to match previous example
xData = numpy.array(x, dtype=float)
yData = numpy.array(y, dtype=float)
zData = numpy.array(z, dtype=float)
# place the data in a single list
data = [xData, yData, zData]
def func(data,a,b,c,d,e):
# extract data from the single list
x = data[0]
y = data[1]
return -(((x-b)/a)**2+((y-d)/c)**2)+e
def SurfacePlot(func, data, fittedParameters):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
matplotlib.pyplot.grid(True)
axes = Axes3D(f)
# extract data from the single list
x_data = data[0]
y_data = data[1]
z_data = data[2]
xModel = numpy.linspace(min(x_data), max(x_data), 20)
yModel = numpy.linspace(min(y_data), max(y_data), 20)
X, Y = numpy.meshgrid(xModel, yModel)
Z = func(numpy.array([X, Y]), *fittedParameters)
axes.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=cm.coolwarm, linewidth=1, antialiased=True)
axes.scatter(x_data, y_data, z_data) # show data along with plotted surface
axes.set_title('Surface Plot (click-drag with mouse)') # add a title for surface plot
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
axes.set_zlabel('Z Data') # Z axis data label
plt.show()
plt.close('all') # clean up after using pyplot or else there can be memory and process problems
def ContourPlot(func, data, fittedParameters):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
axes = f.add_subplot(111)
# extract data from the single list
x_data = data[0]
y_data = data[1]
z_data = data[2]
xModel = numpy.linspace(min(x_data), max(x_data), 20)
yModel = numpy.linspace(min(y_data), max(y_data), 20)
X, Y = numpy.meshgrid(xModel, yModel)
Z = func(numpy.array([X, Y]), *fittedParameters)
axes.plot(x_data, y_data, 'o')
axes.set_title('Contour Plot') # add a title for contour plot
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
CS = matplotlib.pyplot.contour(X, Y, Z, numberOfContourLines, colors='k')
matplotlib.pyplot.clabel(CS, inline=1, fontsize=10) # labels for contours
plt.show()
plt.close('all') # clean up after using pyplot or else there can be memory and process problems
def ScatterPlot(data):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
matplotlib.pyplot.grid(True)
axes = Axes3D(f)
# extract data from the single list
x_data = data[0]
y_data = data[1]
z_data = data[2]
axes.scatter(x_data, y_data, z_data)
axes.set_title('Scatter Plot (click-drag with mouse)')
axes.set_xlabel('X Data')
axes.set_ylabel('Y Data')
axes.set_zlabel('Z Data')
plt.show()
plt.close('all') # clean up after using pyplot or else there can be memory and process problems
if __name__ == "__main__":
initialParameters = [1.5,0.4,1.5,0.4,1] # from the posting
# here a non-linear surface fit is made with scipy's curve_fit()
fittedParameters, pcov = scipy.optimize.curve_fit(func, [xData, yData], zData, p0 = initialParameters)
ScatterPlot(data)
SurfacePlot(func, data, fittedParameters)
ContourPlot(func, data, fittedParameters)
print('fitted parameters', fittedParameters)
modelPredictions = func(data, *fittedParameters)
absError = modelPredictions - zData
SE = numpy.square(absError) # squared errors
MSE = numpy.mean(SE) # mean squared errors
RMSE = numpy.sqrt(MSE) # Root Mean Squared Error, RMSE
Rsquared = 1.0 - (numpy.var(absError) / numpy.var(zData))
print('RMSE:', RMSE)
print('R-squared:', Rsquared)

Multiple Linear Regression using ScikitLearn, different approaches give different answers

This is probably as equally valid on stats exchange as here (could be the stats or python that i'm not sure about.
Suppose I have two independent variables X,Y that explain some of the variance of Z.
from sklearn.linear_model import LinearRegression
import numpy as np
from scipy.stats import pearsonr,linregress
Z = np.array([1,3,5,6,7,8,9,7,10,9])
X = np.array([2,5,3,1,6,4,7,8,6,7])
Y = np.array([3,2,6,4,6,1,2,5,6,10])
I want to regress out the variability in X and Y from Z. There's two approaches that I know of:
Regress out X from Z first (form a linear regression of X,Z, find the residual, then repeat for Y). Such that:
regr = linregress(X,Z)
resi_1 = NAO - (X*regr[0])+regr[1] #residual = y-mx+c
regr = linregress(Y,resi_1)
resi_2 = resi_1 - (Y*regr[0])+regr[1] #residual = y-mx+c
Where regr_2 is the remainder of Z where X and Y have been sequentially regressed out.
The alternative is to create a multiple linear regression model for X and Y predicting Z:
regr = LinearRegression()
Model = regr.fit(np.array((X,Y)).swapaxes(0,1),Z)
pred = Model.predict(np.array((X,Y)).swapaxes(0,1))
resi_3 = Z - pred
The residual from the first sequential approach resi_2 and the multiple linear regression resi_3 are very similar (correlation=0.97) but not equivalent. The two residuals are plotted below:
Any thoughts great (not a statistician so could be my understanding vs a python problem!). Note if for the first part I regress out Y first, then X, I get different residuals.

Here is an example 3D graphical surface fitter using your data and scipy's curve_fit() routine with scatter, surface, and contour plots. You should be able to click-drag the 3D plots to rotate them in 3-space and see that the data does not appear to lie on any sort of smooth surface, so the flat plane model used here "z = (a *x) + (b * y) + c" is pretty much no better or worse than any other model for this data.
fitted prameters [ 0.65963199 0.18537117 2.43363301]
RMSE: 2.11487214206
R-squared: 0.383078044516
import numpy, scipy, scipy.optimize
import matplotlib
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm # to colormap 3D surfaces from blue to red
import matplotlib.pyplot as plt
graphWidth = 800 # units are pixels
graphHeight = 600 # units are pixels
# 3D contour plot lines
numberOfContourLines = 16
def SurfacePlot(func, data, fittedParameters):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
matplotlib.pyplot.grid(True)
axes = Axes3D(f)
x_data = data[0]
y_data = data[1]
z_data = data[2]
xModel = numpy.linspace(min(x_data), max(x_data), 20)
yModel = numpy.linspace(min(y_data), max(y_data), 20)
X, Y = numpy.meshgrid(xModel, yModel)
Z = func(numpy.array([X, Y]), *fittedParameters)
axes.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=cm.coolwarm, linewidth=1, antialiased=True)
axes.scatter(x_data, y_data, z_data) # show data along with plotted surface
axes.set_title('Surface Plot (click-drag with mouse)') # add a title for surface plot
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
axes.set_zlabel('Z Data') # Z axis data label
plt.show()
plt.close('all') # clean up after using pyplot or else there can be memory and process problems
def ContourPlot(func, data, fittedParameters):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
axes = f.add_subplot(111)
x_data = data[0]
y_data = data[1]
z_data = data[2]
xModel = numpy.linspace(min(x_data), max(x_data), 20)
yModel = numpy.linspace(min(y_data), max(y_data), 20)
X, Y = numpy.meshgrid(xModel, yModel)
Z = func(numpy.array([X, Y]), *fittedParameters)
axes.plot(x_data, y_data, 'o')
axes.set_title('Contour Plot') # add a title for contour plot
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
CS = matplotlib.pyplot.contour(X, Y, Z, numberOfContourLines, colors='k')
matplotlib.pyplot.clabel(CS, inline=1, fontsize=10) # labels for contours
plt.show()
plt.close('all') # clean up after using pyplot or else there can be memory and process problems
def ScatterPlot(data):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
matplotlib.pyplot.grid(True)
axes = Axes3D(f)
x_data = data[0]
y_data = data[1]
z_data = data[2]
axes.scatter(x_data, y_data, z_data)
axes.set_title('Scatter Plot (click-drag with mouse)')
axes.set_xlabel('X Data')
axes.set_ylabel('Y Data')
axes.set_zlabel('Z Data')
plt.show()
plt.close('all') # clean up after using pyplot or else there can be memory and process problems
def func(data, a, b, c): # example flat surface
x = data[0]
y = data[1]
return (a * x) + (b * y) + c
if __name__ == "__main__":
xData = numpy.array([2.0, 5.0, 3.0, 1.0, 6.0, 4.0, 7.0, 8.0, 6.0, 7.0])
yData = numpy.array([3.0, 2.0, 6.0, 4.0, 6.0, 1.0, 2.0, 5.0, 6.0, 10.0])
zData = numpy.array([1.0, 3.0, 5.0, 6.0, 7.0, 8.0, 9.0, 7.0, 10.0, 9.0])
data = [xData, yData, zData]
initialParameters = [1.0, 1.0, 1.0] # these are the same as scipy default values in this example
# here a non-linear surface fit is made with scipy's curve_fit()
fittedParameters, pcov = scipy.optimize.curve_fit(func, [xData, yData], zData, p0 = initialParameters)
ScatterPlot(data)
SurfacePlot(func, data, fittedParameters)
ContourPlot(func, data, fittedParameters)
print('fitted prameters', fittedParameters)
modelPredictions = func(data, *fittedParameters)
absError = modelPredictions - zData
SE = numpy.square(absError) # squared errors
MSE = numpy.mean(SE) # mean squared errors
RMSE = numpy.sqrt(MSE) # Root Mean Squared Error, RMSE
Rsquared = 1.0 - (numpy.var(absError) / numpy.var(zData))
print('RMSE:', RMSE)
print('R-squared:', Rsquared)

In a regression surface Z, how to find maximum X value where Z is less than a predefined value?

I have created a meshgrid of X, Y with dimensions I x I and corresponding surface Z from a regression model. How can I pull out the coordinates of maximum of X, where Z is less than a given threshold value?
Input data
x = np.array( )
y = np.array( )
z = np.array( )
data = np.column_stack((x, y, z))
# regular grid covering the domain of the data
X, Y = np.meshgrid(x, y)
XX = X.flatten()
YY = Y.flatten()
# best-fit quadratic curve
A = np.c_[np.ones(data.shape[0]), data[:, :2], np.prod(data[:, :2], axis=1), data[:, :2]**2]
C, _, _, _ = scipy.linalg.lstsq(A, data[:, 2])
# evaluate it on a grid
Z = np.dot(np.c_[np.ones(XX.shape), XX, YY, XX * YY, XX**2, YY**2],C).reshape(X.shape)
# Desired output
max(X, where Z < a) = [x1, y1, z1]
List with coordinates of maximized X value where Z is less than a.

Here is an example 3D surface fitter that extracts a data subset based on whether a data point's predicted Z value from the fit was below a specified threshhold. This example also has a 3D scatterplot, 3D surface plot, and a contour plot. Here, curve_fit is used for the fitting rather than linalg.lstsq().
import numpy, scipy, scipy.optimize
import matplotlib
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm # to colormap 3D surfaces from blue to red
import matplotlib.pyplot as plt
# this is threshhold value for Z subset
zThreshhold = 5.0
graphWidth = 800 # units are pixels
graphHeight = 600 # units are pixels
# 3D contour plot lines
numberOfContourLines = 16
def SurfacePlot(func, data, fittedParameters):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
matplotlib.pyplot.grid(True)
axes = Axes3D(f)
x_data = data[0]
y_data = data[1]
z_data = data[2]
xModel = numpy.linspace(min(x_data), max(x_data), 20)
yModel = numpy.linspace(min(y_data), max(y_data), 20)
X, Y = numpy.meshgrid(xModel, yModel)
Z = func(numpy.array([X, Y]), *fittedParameters)
axes.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=cm.coolwarm, linewidth=1, antialiased=True)
axes.scatter(x_data, y_data, z_data) # show data along with plotted surface
axes.set_title('Surface Plot (click-drag with mouse)') # add a title for surface plot
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
axes.set_zlabel('Z Data') # Z axis data label
plt.show()
plt.close('all') # clean up after using pyplot or else thaere can be memory and process problems
def ContourPlot(func, data, fittedParameters):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
axes = f.add_subplot(111)
x_data = data[0]
y_data = data[1]
z_data = data[2]
xModel = numpy.linspace(min(x_data), max(x_data), 20)
yModel = numpy.linspace(min(y_data), max(y_data), 20)
X, Y = numpy.meshgrid(xModel, yModel)
Z = func(numpy.array([X, Y]), *fittedParameters)
axes.plot(x_data, y_data, 'o')
axes.set_title('Contour Plot') # add a title for contour plot
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
CS = matplotlib.pyplot.contour(X, Y, Z, numberOfContourLines, colors='k')
matplotlib.pyplot.clabel(CS, inline=1, fontsize=10) # labels for contours
plt.show()
plt.close('all') # clean up after using pyplot or else thaere can be memory and process problems
def ScatterPlot(data):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
matplotlib.pyplot.grid(True)
axes = Axes3D(f)
x_data = data[0]
y_data = data[1]
z_data = data[2]
axes.scatter(x_data, y_data, z_data)
axes.set_title('Scatter Plot (click-drag with mouse)')
axes.set_xlabel('X Data')
axes.set_ylabel('Y Data')
axes.set_zlabel('Z Data')
plt.show()
plt.close('all') # clean up after using pyplot or else thaere can be memory and process problems
# fulle 3D quadratic
def func(data, a, b, c, d, e, f):
X = data[0]
Y = data[1]
return a + b*X + c*Y + d*X*Y + e*X*X + f*Y*Y
if __name__ == "__main__":
xData = numpy.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0])
yData = numpy.array([1.0, 12.1, 13.0, 14.1, 15.0, 16.1, 17.0, 18.1, 90.0])
zData = numpy.array([1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.0, 9.9])
data = [xData, yData, zData]
# these are the same as scipy default values in this example
initialParameters = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
# here a non-linear surface fit is made with scipy's curve_fit()
fittedParameters, pcov = scipy.optimize.curve_fit(func, [xData, yData], zData, p0 = initialParameters)
ScatterPlot(data)
SurfacePlot(func, data, fittedParameters)
ContourPlot(func, data, fittedParameters)
print('fitted prameters', fittedParameters)
modelPredictions = func(data, *fittedParameters)
absError = modelPredictions - zData
SE = numpy.square(absError) # squared errors
MSE = numpy.mean(SE) # mean squared errors
RMSE = numpy.sqrt(MSE) # Root Mean Squared Error, RMSE
Rsquared = 1.0 - (numpy.var(absError) / numpy.var(zData))
print('RMSE:', RMSE)
print('R-squared:', Rsquared)
zSubset = []
for i in range(len(modelPredictions)):
if modelPredictions[i] < zThreshhold:
zSubset.append([xData[i], yData[i], zData[i], modelPredictions[i]])
print("X, Y, Z, and Predicted values below Z threshold of", zThreshhold)
for i in range(len(zSubset)):
print(zSubset[i])

Scipy curve_fit for Two Dimensions Not Working - Object Too Deep?

I have a 2400 by 2400 array of data which looks something like this:
data = [[-2.302670298082603040e-01 -2.304885241061924717e-01 -2.305029774024092148e-01 -2.304807100897505734e-01 -2.303702531336284665e-01 -2.307144352067780346e-01...
[-2.302670298082603040e-01 -2.304885241061924717e-01 -2.305029774024092148e-01 -2.304807100897505734e-01 -2.303702531336284665e-01 -2.307144352067780346e-01...
...
and I am trying to fit the following 2D Gaussian function:
def Gauss2D(x, mux, muy, sigmax, sigmay, amplitude, offset, rotation):
assert len(x) == 2
X = x[0]
Y = x[1]
A = (np.cos(rotation)**2)/(2*sigmax**2) + (np.sin(rotation)**2)/(2*sigmay**2)
B = (np.sin(rotation*2))/(4*sigmay**2) - (np.sin(2*rotation))/(4*sigmax**2)
C = (np.sin(rotation)**2)/(2*sigmax**2) + (np.cos(rotation)**2)/(2*sigmay**2)
G = amplitude*np.exp(-((A * (X - mux) ** 2) + (2 * B * (X - mux) * (Y - muy)) + (C * (Y - muy) ** 2))) + offset
return G
to this data, using scipy curve_fit. I have therefore defined the domain of the independent variables (coordinates) as follows:
vert = np.arange(2400, dtype=float)
horiz = np.arange(2400, dtype=float)
HORIZ, VERT = np.meshgrid(horiz, vert)
and as an initial estimate of the parameters:
po = np.asarray([1200., 1200., 300., 300., 0.14, 0.22, 0.], dtype=float)
so that I can perform the following fit:
popt, pcov = curve_fit(Gauss2D, (HORIZ, VERT), data, p0=po)
This is returning the following error message, and I haven't the faintest clue why:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
ValueError: object too deep for desired array
---------------------------------------------------------------------------
error Traceback (most recent call last)
<ipython-input-11-ebba75332bfa> in <module>()
----> 1 curve_fit(Gauss2D, (HORIZ, VERT), data, p0=po)
/home/harrythegenius/anaconda3/lib/python3.6/site-packages/scipy/optimize/minpack.py in curve_fit(f, xdata, ydata, p0, sigma, absolute_sigma, check_finite, bounds, method, jac, **kwargs)
734 # Remove full_output from kwargs, otherwise we're passing it in twice.
735 return_full = kwargs.pop('full_output', False)
--> 736 res = leastsq(func, p0, Dfun=jac, full_output=1, **kwargs)
737 popt, pcov, infodict, errmsg, ier = res
738 cost = np.sum(infodict['fvec'] ** 2)
/home/harrythegenius/anaconda3/lib/python3.6/site-packages/scipy/optimize/minpack.py in leastsq(func, x0, args, Dfun, full_output, col_deriv, ftol, xtol, gtol, maxfev, epsfcn, factor, diag)
385 maxfev = 200*(n + 1)
386 retval = _minpack._lmdif(func, x0, args, full_output, ftol, xtol,
--> 387 gtol, maxfev, epsfcn, factor, diag)
388 else:
389 if col_deriv:
error: Result from function call is not a proper array of floats.
I don't understand the message "object too deep for desired array". I have also seen multiple online solutions to this error message, in which one would fix it by ensuring that all data types which were passed to curve_fit were floats, or by checking that the dimensions of the arrays were correct. I have tried both of these approaches, time and time again, but it makes no difference. So what's wrong with this one?

OK guys, I've fixed the problem myself. As I suspected, it's a dimensionality issue.
The appropriate dimensions for curve_fit applied to a 2D array are as follows:
Function - One Dimension, which in this case carries the same dimensions as the data set unless enforced
x data - (2, n*m), where n and m are the dimensions of the data array
y data - (n*m)
List of Initial Parameters - A 1D array simply containing all the parameters in the same order as stated in the function
I therefore left my parameter array unchanged, but made the following change to the function:
def Gauss2D(x, mux, muy, sigmax, sigmay, amplitude, offset, rotation):
assert len(x) == 2
X = x[0]
Y = x[1]
A = (np.cos(rotation)**2)/(2*sigmax**2) + (np.sin(rotation)**2)/(2*sigmay**2)
B = (np.sin(rotation*2))/(4*sigmay**2) - (np.sin(2*rotation))/(4*sigmax**2)
C = (np.sin(rotation)**2)/(2*sigmax**2) + (np.cos(rotation)**2)/(2*sigmay**2)
G = amplitude*np.exp(-((A * (X - mux) ** 2) + (2 * B * (X - mux) * (Y - muy)) + (C * (Y - muy) ** 2))) + offset
return G.ravel()
and I passed the following to the x data argument:
x = np.vstack((HORIZ.ravel(), VERT.ravel()))
and this to the y data argument:
y = data.ravel()
Thus, I optimised it using:
curve_fit(Gauss2D, x, y, po)
which works just fine.

Per the comments, here is a 3D surface fitter using curve_fit() that has 3D scatterplot, 3D surface plot, and contour plot.
import numpy, scipy, scipy.optimize
import matplotlib
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm # to colormap 3D surfaces from blue to red
import matplotlib.pyplot as plt
graphWidth = 800 # units are pixels
graphHeight = 600 # units are pixels
# 3D contour plot lines
numberOfContourLines = 16
def SurfacePlot(func, data, fittedParameters):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
matplotlib.pyplot.grid(True)
axes = Axes3D(f)
x_data = data[0]
y_data = data[1]
z_data = data[2]
xModel = numpy.linspace(min(x_data), max(x_data), 20)
yModel = numpy.linspace(min(y_data), max(y_data), 20)
X, Y = numpy.meshgrid(xModel, yModel)
Z = func(numpy.array([X, Y]), *fittedParameters)
axes.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=cm.coolwarm, linewidth=1, antialiased=True)
axes.scatter(x_data, y_data, z_data) # show data along with plotted surface
axes.set_title('Surface Plot (click-drag with mouse)') # add a title for surface plot
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
axes.set_zlabel('Z Data') # Z axis data label
plt.show()
plt.close('all') # clean up after using pyplot or else thaere can be memory and process problems
def ContourPlot(func, data, fittedParameters):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
axes = f.add_subplot(111)
x_data = data[0]
y_data = data[1]
z_data = data[2]
xModel = numpy.linspace(min(x_data), max(x_data), 20)
yModel = numpy.linspace(min(y_data), max(y_data), 20)
X, Y = numpy.meshgrid(xModel, yModel)
Z = func(numpy.array([X, Y]), *fittedParameters)
axes.plot(x_data, y_data, 'o')
axes.set_title('Contour Plot') # add a title for contour plot
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
CS = matplotlib.pyplot.contour(X, Y, Z, numberOfContourLines, colors='k')
matplotlib.pyplot.clabel(CS, inline=1, fontsize=10) # labels for contours
plt.show()
plt.close('all') # clean up after using pyplot or else thaere can be memory and process problems
def ScatterPlot(data):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
matplotlib.pyplot.grid(True)
axes = Axes3D(f)
x_data = data[0]
y_data = data[1]
z_data = data[2]
axes.scatter(x_data, y_data, z_data)
axes.set_title('Scatter Plot (click-drag with mouse)')
axes.set_xlabel('X Data')
axes.set_ylabel('Y Data')
axes.set_zlabel('Z Data')
plt.show()
plt.close('all') # clean up after using pyplot or else thaere can be memory and process problems
def func(data, a, alpha, beta):
t = data[0]
p_p = data[1]
return a * (t**alpha) * (p_p**beta)
if __name__ == "__main__":
xData = numpy.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0])
yData = numpy.array([11.0, 12.1, 13.0, 14.1, 15.0, 16.1, 17.0, 18.1, 90.0])
zData = numpy.array([1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.0, 9.9])
data = [xData, yData, zData]
initialParameters = [1.0, 1.0, 1.0] # these are the same as scipy default values in this example
# here a non-linear surface fit is made with scipy's curve_fit()
fittedParameters, pcov = scipy.optimize.curve_fit(func, [xData, yData], zData, p0 = initialParameters)
ScatterPlot(data)
SurfacePlot(func, data, fittedParameters)
ContourPlot(func, data, fittedParameters)
print('fitted prameters', fittedParameters)
modelPredictions = func(data, *fittedParameters)
absError = modelPredictions - zData
SE = numpy.square(absError) # squared errors
MSE = numpy.mean(SE) # mean squared errors
RMSE = numpy.sqrt(MSE) # Root Mean Squared Error, RMSE
Rsquared = 1.0 - (numpy.var(absError) / numpy.var(zData))
print('RMSE:', RMSE)
print('R-squared:', Rsquared)

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

How to get regression to optimise coefficients and data array? Python - python

Related

Curve fitting and smoothing using python for 3D data

Calculate root mean square of 3D deviation after surface fitting in python

Multiple Linear Regression using ScikitLearn, different approaches give different answers

In a regression surface Z, how to find maximum X value where Z is less than a predefined value?

Scipy curve_fit for Two Dimensions Not Working - Object Too Deep?

Categories

Resources