Multiple Linear Regression using ScikitLearn, different approaches give different answers - python

This is probably as equally valid on stats exchange as here (could be the stats or python that i'm not sure about.
Suppose I have two independent variables X,Y that explain some of the variance of Z.
from sklearn.linear_model import LinearRegression
import numpy as np
from scipy.stats import pearsonr,linregress
Z = np.array([1,3,5,6,7,8,9,7,10,9])
X = np.array([2,5,3,1,6,4,7,8,6,7])
Y = np.array([3,2,6,4,6,1,2,5,6,10])
I want to regress out the variability in X and Y from Z. There's two approaches that I know of:
Regress out X from Z first (form a linear regression of X,Z, find the residual, then repeat for Y). Such that:
regr = linregress(X,Z)
resi_1 = NAO - (X*regr[0])+regr[1] #residual = y-mx+c
regr = linregress(Y,resi_1)
resi_2 = resi_1 - (Y*regr[0])+regr[1] #residual = y-mx+c
Where regr_2 is the remainder of Z where X and Y have been sequentially regressed out.
The alternative is to create a multiple linear regression model for X and Y predicting Z:
regr = LinearRegression()
Model = regr.fit(np.array((X,Y)).swapaxes(0,1),Z)
pred = Model.predict(np.array((X,Y)).swapaxes(0,1))
resi_3 = Z - pred
The residual from the first sequential approach resi_2 and the multiple linear regression resi_3 are very similar (correlation=0.97) but not equivalent. The two residuals are plotted below:
Any thoughts great (not a statistician so could be my understanding vs a python problem!). Note if for the first part I regress out Y first, then X, I get different residuals.

Here is an example 3D graphical surface fitter using your data and scipy's curve_fit() routine with scatter, surface, and contour plots. You should be able to click-drag the 3D plots to rotate them in 3-space and see that the data does not appear to lie on any sort of smooth surface, so the flat plane model used here "z = (a *x) + (b * y) + c" is pretty much no better or worse than any other model for this data.
fitted prameters [ 0.65963199 0.18537117 2.43363301]
RMSE: 2.11487214206
R-squared: 0.383078044516
import numpy, scipy, scipy.optimize
import matplotlib
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm # to colormap 3D surfaces from blue to red
import matplotlib.pyplot as plt
graphWidth = 800 # units are pixels
graphHeight = 600 # units are pixels
# 3D contour plot lines
numberOfContourLines = 16
def SurfacePlot(func, data, fittedParameters):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
matplotlib.pyplot.grid(True)
axes = Axes3D(f)
x_data = data[0]
y_data = data[1]
z_data = data[2]
xModel = numpy.linspace(min(x_data), max(x_data), 20)
yModel = numpy.linspace(min(y_data), max(y_data), 20)
X, Y = numpy.meshgrid(xModel, yModel)
Z = func(numpy.array([X, Y]), *fittedParameters)
axes.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=cm.coolwarm, linewidth=1, antialiased=True)
axes.scatter(x_data, y_data, z_data) # show data along with plotted surface
axes.set_title('Surface Plot (click-drag with mouse)') # add a title for surface plot
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
axes.set_zlabel('Z Data') # Z axis data label
plt.show()
plt.close('all') # clean up after using pyplot or else there can be memory and process problems
def ContourPlot(func, data, fittedParameters):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
axes = f.add_subplot(111)
x_data = data[0]
y_data = data[1]
z_data = data[2]
xModel = numpy.linspace(min(x_data), max(x_data), 20)
yModel = numpy.linspace(min(y_data), max(y_data), 20)
X, Y = numpy.meshgrid(xModel, yModel)
Z = func(numpy.array([X, Y]), *fittedParameters)
axes.plot(x_data, y_data, 'o')
axes.set_title('Contour Plot') # add a title for contour plot
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
CS = matplotlib.pyplot.contour(X, Y, Z, numberOfContourLines, colors='k')
matplotlib.pyplot.clabel(CS, inline=1, fontsize=10) # labels for contours
plt.show()
plt.close('all') # clean up after using pyplot or else there can be memory and process problems
def ScatterPlot(data):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
matplotlib.pyplot.grid(True)
axes = Axes3D(f)
x_data = data[0]
y_data = data[1]
z_data = data[2]
axes.scatter(x_data, y_data, z_data)
axes.set_title('Scatter Plot (click-drag with mouse)')
axes.set_xlabel('X Data')
axes.set_ylabel('Y Data')
axes.set_zlabel('Z Data')
plt.show()
plt.close('all') # clean up after using pyplot or else there can be memory and process problems
def func(data, a, b, c): # example flat surface
x = data[0]
y = data[1]
return (a * x) + (b * y) + c
if __name__ == "__main__":
xData = numpy.array([2.0, 5.0, 3.0, 1.0, 6.0, 4.0, 7.0, 8.0, 6.0, 7.0])
yData = numpy.array([3.0, 2.0, 6.0, 4.0, 6.0, 1.0, 2.0, 5.0, 6.0, 10.0])
zData = numpy.array([1.0, 3.0, 5.0, 6.0, 7.0, 8.0, 9.0, 7.0, 10.0, 9.0])
data = [xData, yData, zData]
initialParameters = [1.0, 1.0, 1.0] # these are the same as scipy default values in this example
# here a non-linear surface fit is made with scipy's curve_fit()
fittedParameters, pcov = scipy.optimize.curve_fit(func, [xData, yData], zData, p0 = initialParameters)
ScatterPlot(data)
SurfacePlot(func, data, fittedParameters)
ContourPlot(func, data, fittedParameters)
print('fitted prameters', fittedParameters)
modelPredictions = func(data, *fittedParameters)
absError = modelPredictions - zData
SE = numpy.square(absError) # squared errors
MSE = numpy.mean(SE) # mean squared errors
RMSE = numpy.sqrt(MSE) # Root Mean Squared Error, RMSE
Rsquared = 1.0 - (numpy.var(absError) / numpy.var(zData))
print('RMSE:', RMSE)
print('R-squared:', Rsquared)

Related

Calculate root mean square of 3D deviation after surface fitting in python

My goal is to determine the 3D deviation (and its RMS) between a set of 3D data points and a fitted paraboloid in Python.
Starting from this: Paraboloid (3D parabola) surface fitting python, I can compute the RMS. If I understand correctly, the error and the RMS are computed along the Z-axis. Is it right?
I tried (without success) to determine the 3D deviation and RMS between the fitted surface and the data points, but I cannot get their.
Does anyone have some advices to solve this, please?
import numpy as np
from scipy.optimize import curve_fit
# Initial guess parameters
p0 = [1.5,0.4,1.5,0.4,1]
# INPUT DATA
x = [0.4,0.165,0.165,0.585,0.585]
y = [.45, .22, .63, .22, .63]
z = np.array([1, .99, .98,.97,.96])
# FIT
def paraBolEqn(data,a,b,c,d,e):
x,y = data
return -(((x-b)/a)**2+((y-d)/c)**2)+e
data = np.vstack((x,y))
popt, _ = curve_fit(paraBolEqn,data,z,p0)
# Deviation and RMS along Z axis
modelPredictions = paraBolEqn(data, *popt)
absError = modelPredictions - z
RMSE = np.sqrt(np.mean(np.square(absError))) # Root Mean Squared Error along Z axis
print('RMSE (along Z axis):', RMSE)
# Deviation and RMS in 3D
# ??
Here is a graphical Python surface fitter using your data and equation that draws a 3D scatter plot, a 3D surface plot, and a contour plot. You should be able to click-drag with the mouse and rotate the 3D plots in 3-space for visual inspection. Note that you have 5 data points and 5 equation parameters, so you get what is in effect a perfect fit - the RMSE is effectively zero, the R-squared is 1.0, and the scipy code gives a warning when calculating the covariance matrix.
import numpy, scipy, scipy.optimize
import matplotlib
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm # to colormap 3D surfaces from blue to red
import matplotlib.pyplot as plt
graphWidth = 800 # units are pixels
graphHeight = 600 # units are pixels
# 3D contour plot lines
numberOfContourLines = 16
x = [0.4,0.165,0.165,0.585,0.585]
y = [.45, .22, .63, .22, .63]
z = [1, .99, .98,.97,.96]
# alias data to match previous example
xData = numpy.array(x, dtype=float)
yData = numpy.array(y, dtype=float)
zData = numpy.array(z, dtype=float)
# place the data in a single list
data = [xData, yData, zData]
def func(data,a,b,c,d,e):
# extract data from the single list
x = data[0]
y = data[1]
return -(((x-b)/a)**2+((y-d)/c)**2)+e
def SurfacePlot(func, data, fittedParameters):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
matplotlib.pyplot.grid(True)
axes = Axes3D(f)
# extract data from the single list
x_data = data[0]
y_data = data[1]
z_data = data[2]
xModel = numpy.linspace(min(x_data), max(x_data), 20)
yModel = numpy.linspace(min(y_data), max(y_data), 20)
X, Y = numpy.meshgrid(xModel, yModel)
Z = func(numpy.array([X, Y]), *fittedParameters)
axes.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=cm.coolwarm, linewidth=1, antialiased=True)
axes.scatter(x_data, y_data, z_data) # show data along with plotted surface
axes.set_title('Surface Plot (click-drag with mouse)') # add a title for surface plot
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
axes.set_zlabel('Z Data') # Z axis data label
plt.show()
plt.close('all') # clean up after using pyplot or else there can be memory and process problems
def ContourPlot(func, data, fittedParameters):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
axes = f.add_subplot(111)
# extract data from the single list
x_data = data[0]
y_data = data[1]
z_data = data[2]
xModel = numpy.linspace(min(x_data), max(x_data), 20)
yModel = numpy.linspace(min(y_data), max(y_data), 20)
X, Y = numpy.meshgrid(xModel, yModel)
Z = func(numpy.array([X, Y]), *fittedParameters)
axes.plot(x_data, y_data, 'o')
axes.set_title('Contour Plot') # add a title for contour plot
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
CS = matplotlib.pyplot.contour(X, Y, Z, numberOfContourLines, colors='k')
matplotlib.pyplot.clabel(CS, inline=1, fontsize=10) # labels for contours
plt.show()
plt.close('all') # clean up after using pyplot or else there can be memory and process problems
def ScatterPlot(data):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
matplotlib.pyplot.grid(True)
axes = Axes3D(f)
# extract data from the single list
x_data = data[0]
y_data = data[1]
z_data = data[2]
axes.scatter(x_data, y_data, z_data)
axes.set_title('Scatter Plot (click-drag with mouse)')
axes.set_xlabel('X Data')
axes.set_ylabel('Y Data')
axes.set_zlabel('Z Data')
plt.show()
plt.close('all') # clean up after using pyplot or else there can be memory and process problems
if __name__ == "__main__":
initialParameters = [1.5,0.4,1.5,0.4,1] # from the posting
# here a non-linear surface fit is made with scipy's curve_fit()
fittedParameters, pcov = scipy.optimize.curve_fit(func, [xData, yData], zData, p0 = initialParameters)
ScatterPlot(data)
SurfacePlot(func, data, fittedParameters)
ContourPlot(func, data, fittedParameters)
print('fitted parameters', fittedParameters)
modelPredictions = func(data, *fittedParameters)
absError = modelPredictions - zData
SE = numpy.square(absError) # squared errors
MSE = numpy.mean(SE) # mean squared errors
RMSE = numpy.sqrt(MSE) # Root Mean Squared Error, RMSE
Rsquared = 1.0 - (numpy.var(absError) / numpy.var(zData))
print('RMSE:', RMSE)
print('R-squared:', Rsquared)

In a regression surface Z, how to find maximum X value where Z is less than a predefined value?

I have created a meshgrid of X, Y with dimensions I x I and corresponding surface Z from a regression model. How can I pull out the coordinates of maximum of X, where Z is less than a given threshold value?
Input data
x = np.array( )
y = np.array( )
z = np.array( )
data = np.column_stack((x, y, z))
# regular grid covering the domain of the data
X, Y = np.meshgrid(x, y)
XX = X.flatten()
YY = Y.flatten()
# best-fit quadratic curve
A = np.c_[np.ones(data.shape[0]), data[:, :2], np.prod(data[:, :2], axis=1), data[:, :2]**2]
C, _, _, _ = scipy.linalg.lstsq(A, data[:, 2])
# evaluate it on a grid
Z = np.dot(np.c_[np.ones(XX.shape), XX, YY, XX * YY, XX**2, YY**2],C).reshape(X.shape)
# Desired output
max(X, where Z < a) = [x1, y1, z1]
List with coordinates of maximized X value where Z is less than a.
Here is an example 3D surface fitter that extracts a data subset based on whether a data point's predicted Z value from the fit was below a specified threshhold. This example also has a 3D scatterplot, 3D surface plot, and a contour plot. Here, curve_fit is used for the fitting rather than linalg.lstsq().
import numpy, scipy, scipy.optimize
import matplotlib
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm # to colormap 3D surfaces from blue to red
import matplotlib.pyplot as plt
# this is threshhold value for Z subset
zThreshhold = 5.0
graphWidth = 800 # units are pixels
graphHeight = 600 # units are pixels
# 3D contour plot lines
numberOfContourLines = 16
def SurfacePlot(func, data, fittedParameters):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
matplotlib.pyplot.grid(True)
axes = Axes3D(f)
x_data = data[0]
y_data = data[1]
z_data = data[2]
xModel = numpy.linspace(min(x_data), max(x_data), 20)
yModel = numpy.linspace(min(y_data), max(y_data), 20)
X, Y = numpy.meshgrid(xModel, yModel)
Z = func(numpy.array([X, Y]), *fittedParameters)
axes.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=cm.coolwarm, linewidth=1, antialiased=True)
axes.scatter(x_data, y_data, z_data) # show data along with plotted surface
axes.set_title('Surface Plot (click-drag with mouse)') # add a title for surface plot
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
axes.set_zlabel('Z Data') # Z axis data label
plt.show()
plt.close('all') # clean up after using pyplot or else thaere can be memory and process problems
def ContourPlot(func, data, fittedParameters):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
axes = f.add_subplot(111)
x_data = data[0]
y_data = data[1]
z_data = data[2]
xModel = numpy.linspace(min(x_data), max(x_data), 20)
yModel = numpy.linspace(min(y_data), max(y_data), 20)
X, Y = numpy.meshgrid(xModel, yModel)
Z = func(numpy.array([X, Y]), *fittedParameters)
axes.plot(x_data, y_data, 'o')
axes.set_title('Contour Plot') # add a title for contour plot
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
CS = matplotlib.pyplot.contour(X, Y, Z, numberOfContourLines, colors='k')
matplotlib.pyplot.clabel(CS, inline=1, fontsize=10) # labels for contours
plt.show()
plt.close('all') # clean up after using pyplot or else thaere can be memory and process problems
def ScatterPlot(data):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
matplotlib.pyplot.grid(True)
axes = Axes3D(f)
x_data = data[0]
y_data = data[1]
z_data = data[2]
axes.scatter(x_data, y_data, z_data)
axes.set_title('Scatter Plot (click-drag with mouse)')
axes.set_xlabel('X Data')
axes.set_ylabel('Y Data')
axes.set_zlabel('Z Data')
plt.show()
plt.close('all') # clean up after using pyplot or else thaere can be memory and process problems
# fulle 3D quadratic
def func(data, a, b, c, d, e, f):
X = data[0]
Y = data[1]
return a + b*X + c*Y + d*X*Y + e*X*X + f*Y*Y
if __name__ == "__main__":
xData = numpy.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0])
yData = numpy.array([1.0, 12.1, 13.0, 14.1, 15.0, 16.1, 17.0, 18.1, 90.0])
zData = numpy.array([1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.0, 9.9])
data = [xData, yData, zData]
# these are the same as scipy default values in this example
initialParameters = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
# here a non-linear surface fit is made with scipy's curve_fit()
fittedParameters, pcov = scipy.optimize.curve_fit(func, [xData, yData], zData, p0 = initialParameters)
ScatterPlot(data)
SurfacePlot(func, data, fittedParameters)
ContourPlot(func, data, fittedParameters)
print('fitted prameters', fittedParameters)
modelPredictions = func(data, *fittedParameters)
absError = modelPredictions - zData
SE = numpy.square(absError) # squared errors
MSE = numpy.mean(SE) # mean squared errors
RMSE = numpy.sqrt(MSE) # Root Mean Squared Error, RMSE
Rsquared = 1.0 - (numpy.var(absError) / numpy.var(zData))
print('RMSE:', RMSE)
print('R-squared:', Rsquared)
zSubset = []
for i in range(len(modelPredictions)):
if modelPredictions[i] < zThreshhold:
zSubset.append([xData[i], yData[i], zData[i], modelPredictions[i]])
print("X, Y, Z, and Predicted values below Z threshold of", zThreshhold)
for i in range(len(zSubset)):
print(zSubset[i])

How can I plot best fit line in multiple Linear Regression if I have two independent variables and one dependent variable

This code is for 1-Dimentional. What I have to edit in this code for multiple Dimentional?
y_pred= regressor.predict(X_test)
Plotting the Training set result:
plt.scatter(X_train, y_train, color='red')
plt.plot(X_train, regressor.predict(X_train) , color= 'blue')
plt.title('Salary vs Experience (training set) ')
plt.xlabel('year of experiance')
plt.ylabel('salary')
plt.show()
Plotting the test set result:
plt.scatter(X_test, y_test, color='red')
plt.plot(X_train, regressor.predict(X_train), color= 'blue')
plt.title('Salary vs Experience (training set) ')
plt.xlabel('year of experiance')
plt.ylabel('salary')
plt.show()
Here is an example surface fit that makes a 3D scatterplot, 3D surface plot, and a contour plot.
import numpy, scipy, scipy.optimize
import matplotlib
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm # to colormap 3D surfaces from blue to red
import matplotlib.pyplot as plt
graphWidth = 800 # units are pixels
graphHeight = 600 # units are pixels
# 3D contour plot lines
numberOfContourLines = 16
def SurfacePlot(func, data, fittedParameters):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
matplotlib.pyplot.grid(True)
axes = Axes3D(f)
x_data = data[0]
y_data = data[1]
z_data = data[2]
xModel = numpy.linspace(min(x_data), max(x_data), 20)
yModel = numpy.linspace(min(y_data), max(y_data), 20)
X, Y = numpy.meshgrid(xModel, yModel)
Z = func(numpy.array([X, Y]), *fittedParameters)
axes.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=cm.coolwarm, linewidth=1, antialiased=True)
axes.scatter(x_data, y_data, z_data) # show data along with plotted surface
axes.set_title('Surface Plot (click-drag with mouse)') # add a title for surface plot
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
axes.set_zlabel('Z Data') # Z axis data label
plt.show()
plt.close('all') # clean up after using pyplot or else thaere can be memory and process problems
def ContourPlot(func, data, fittedParameters):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
axes = f.add_subplot(111)
x_data = data[0]
y_data = data[1]
z_data = data[2]
xModel = numpy.linspace(min(x_data), max(x_data), 20)
yModel = numpy.linspace(min(y_data), max(y_data), 20)
X, Y = numpy.meshgrid(xModel, yModel)
Z = func(numpy.array([X, Y]), *fittedParameters)
axes.plot(x_data, y_data, 'o')
axes.set_title('Contour Plot') # add a title for contour plot
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
CS = matplotlib.pyplot.contour(X, Y, Z, numberOfContourLines, colors='k')
matplotlib.pyplot.clabel(CS, inline=1, fontsize=10) # labels for contours
plt.show()
plt.close('all') # clean up after using pyplot or else thaere can be memory and process problems
def ScatterPlot(data):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
matplotlib.pyplot.grid(True)
axes = Axes3D(f)
x_data = data[0]
y_data = data[1]
z_data = data[2]
axes.scatter(x_data, y_data, z_data)
axes.set_title('Scatter Plot (click-drag with mouse)')
axes.set_xlabel('X Data')
axes.set_ylabel('Y Data')
axes.set_zlabel('Z Data')
plt.show()
plt.close('all') # clean up after using pyplot or else thaere can be memory and process problems
def func(data, a, alpha, beta):
t = data[0]
p_p = data[1]
return a * (t**alpha) * (p_p**beta)
if __name__ == "__main__":
xData = numpy.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0])
yData = numpy.array([11.0, 12.1, 13.0, 14.1, 15.0, 16.1, 17.0, 18.1, 90.0])
zData = numpy.array([1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.0, 9.9])
data = [xData, yData, zData]
initialParameters = [1.0, 1.0, 1.0] # these are the same as scipy default values in this example
# here a non-linear surface fit is made with scipy's curve_fit()
fittedParameters, pcov = scipy.optimize.curve_fit(func, [xData, yData], zData, p0 = initialParameters)
ScatterPlot(data)
SurfacePlot(func, data, fittedParameters)
ContourPlot(func, data, fittedParameters)
print('fitted prameters', fittedParameters)
modelPredictions = func(data, *fittedParameters)
absError = modelPredictions - zData
SE = numpy.square(absError) # squared errors
MSE = numpy.mean(SE) # mean squared errors
RMSE = numpy.sqrt(MSE) # Root Mean Squared Error, RMSE
Rsquared = 1.0 - (numpy.var(absError) / numpy.var(zData))
print('RMSE:', RMSE)
print('R-squared:', Rsquared)

Linear regression (best fit line) [closed]

Closed. This question needs details or clarity. It is not currently accepting answers.
Want to improve this question? Add details and clarify the problem by editing this post.
Closed 3 years ago.
Improve this question
I'm using matplotlib in python to draw (best fit-line) in linear regression... the line looks so messy unless I sort the X array(input) and the Yhat array(prediction), but is it right?
If I sort the X array & the "Yhat" array separately, the data is changing... because the Yhat opposed to X before sorting will not remain the same after sorting.
import numpy as np
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
#load the data
X=[]
Y=[]
for line in open('data_2d2.csv'):
x1,x2,y=line.split(',')
X.append([float(x1),float(x2),1])
Y.append(float(y))
#trun X and Y into numpy array
X=np.array(X)
Y=np.array(Y)
#fig=plt.figure();
#ax=fig.add_subplot(111,projection='3d')
#ax.scatter(X[:,0],X[:,1],Y)
#plt.show()
#caculate weights
w=np.linalg.solve(np.dot(X.T,X),np.dot(X.T,Y))
print(w)
Yhat=np.dot(X,w)
fig=plt.figure();
ax.scatter(X[:,0],X[:,1],Y)
print(Yhat)
ax.plot((sorted(X[:,0])),sorted((X[:,1])),sorted(Yhat)))
plt.show()
Here is an example using scipy's curve_fit() to make a non-linear fit to 3D data, make a 3D scatterplot, make a 3D surface plot, make a contour plot, and yield RMSE and R-squared fit statistics. You could change the non-linear fit to a linear fit if you change the equation used here, this should help get you started with the 3D plotting for sure.
import numpy, scipy, scipy.optimize
import matplotlib
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm # to colormap 3D surfaces from blue to red
import matplotlib.pyplot as plt
graphWidth = 800 # units are pixels
graphHeight = 600 # units are pixels
# 3D contour plot lines
numberOfContourLines = 16
def SurfacePlot(func, data, fittedParameters):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
matplotlib.pyplot.grid(True)
axes = Axes3D(f)
x_data = data[0]
y_data = data[1]
z_data = data[2]
xModel = numpy.linspace(min(x_data), max(x_data), 20)
yModel = numpy.linspace(min(y_data), max(y_data), 20)
X, Y = numpy.meshgrid(xModel, yModel)
Z = func(numpy.array([X, Y]), *fittedParameters)
axes.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=cm.coolwarm, linewidth=1, antialiased=True)
axes.scatter(x_data, y_data, z_data) # show data along with plotted surface
axes.set_title('Surface Plot (click-drag with mouse)') # add a title for surface plot
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
axes.set_zlabel('Z Data') # Z axis data label
plt.show()
plt.close('all') # clean up after using pyplot or else thaere can be memory and process problems
def ContourPlot(func, data, fittedParameters):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
axes = f.add_subplot(111)
x_data = data[0]
y_data = data[1]
z_data = data[2]
xModel = numpy.linspace(min(x_data), max(x_data), 20)
yModel = numpy.linspace(min(y_data), max(y_data), 20)
X, Y = numpy.meshgrid(xModel, yModel)
Z = func(numpy.array([X, Y]), *fittedParameters)
axes.plot(x_data, y_data, 'o')
axes.set_title('Contour Plot') # add a title for contour plot
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
CS = matplotlib.pyplot.contour(X, Y, Z, numberOfContourLines, colors='k')
matplotlib.pyplot.clabel(CS, inline=1, fontsize=10) # labels for contours
plt.show()
plt.close('all') # clean up after using pyplot or else thaere can be memory and process problems
def ScatterPlot(data):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
matplotlib.pyplot.grid(True)
axes = Axes3D(f)
x_data = data[0]
y_data = data[1]
z_data = data[2]
axes.scatter(x_data, y_data, z_data)
axes.set_title('Scatter Plot (click-drag with mouse)')
axes.set_xlabel('X Data')
axes.set_ylabel('Y Data')
axes.set_zlabel('Z Data')
plt.show()
plt.close('all') # clean up after using pyplot or else thaere can be memory and process problems
def func(data, a, alpha, beta):
t = data[0]
p_p = data[1]
return a * (t**alpha) * (p_p**beta)
if __name__ == "__main__":
xData = numpy.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0])
yData = numpy.array([11.0, 12.1, 13.0, 14.1, 15.0, 16.1, 17.0, 18.1, 90.0])
zData = numpy.array([1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.0, 9.9])
data = [xData, yData, zData]
initialParameters = [1.0, 1.0, 1.0] # these are the same as scipy default values in this example
# here a non-linear surface fit is made with scipy's curve_fit()
fittedParameters, pcov = scipy.optimize.curve_fit(func, [xData, yData], zData, p0 = initialParameters)
ScatterPlot(data)
SurfacePlot(func, data, fittedParameters)
ContourPlot(func, data, fittedParameters)
print('fitted prameters', fittedParameters)
modelPredictions = func(data, *fittedParameters)
absError = modelPredictions - zData
SE = numpy.square(absError) # squared errors
MSE = numpy.mean(SE) # mean squared errors
RMSE = numpy.sqrt(MSE) # Root Mean Squared Error, RMSE
Rsquared = 1.0 - (numpy.var(absError) / numpy.var(zData))
print('RMSE:', RMSE)
print('R-squared:', Rsquared)

implicit curve fitting using scattered data

I have two arrays - x and y - which corresponds to coordinates (x,y) in a cartesian plane. For example, with the scatter function (plt.scatter(x,y)) from matplotlib (so far, I'm trying to solve my problem using Python), I get the following result:
What I really need is to get an implicit function f(x,y) from this data, or at least coefficients from an approximate function f(x,y) . So far, I tried to use the curve_fitfunction from scipy.optimize as suggested here, but I've got the following error message:
OptimizeWarning: Covariance of the parameters could not be estimated category=OptimizeWarning)
That is my code so far:
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
import scipy as sy
import pylab as plb
def func(x, a, b, c):
return a*x**b + c
def main():
file = open('firstcurve.out')
lines = file.read().split('\n')
file.close()
x = []
y = []
for item in lines:
if len(item) > 0:
numbers = item.split(",")
x = x + [float(numbers[0])]
y = y + [float(numbers[1])]
p0 = sy.array([1,1,1])
coeffs, matcov = curve_fit(func, x, y, p0)
yaj = func(x, coeffs[0], coeffs[1], coeffs[2])
plt.plot(x,yaj,'r-')
plt.show()
main()
Any help or suggestions are really appreciated!
PS: I'm trying to do it in Python, but MatLab is also an option in case there is any tool that does what I need to. I tried to use the SLM ToolKit but it didn't work as well.
This is more of a math problem than it is a coding question. You can't use the curve fit function in python because it's looking it's looking for a function i.e. you cannot have two separate Y's for the same X.
One thing that you can try if it's possible is to define a parametric function
x = f(t)
y = g(t)
And use the curve fit function to fit x and y vs. t. If you represent it that way, you can use smoothing splines to do the fit.
https://docs.scipy.org/doc/scipy/reference/generated/scipy.interpolate.UnivariateSpline.html
Here is code to fit a surface equation "z = f(x,y)", plot the raw data 3D scatterplot, plot the 3D fitted surface, and graph a contour plot. This should at least give you the graphics you need.
import numpy, scipy, scipy.optimize
import matplotlib
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm # to colormap 3D surfaces from blue to red
import matplotlib.pyplot as plt
graphWidth = 800 # units are pixels
graphHeight = 600 # units are pixels
# 3D contour plot lines
numberOfContourLines = 16
def SurfacePlot(func, data, fittedParameters):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
matplotlib.pyplot.grid(True)
axes = Axes3D(f)
x_data = data[0]
y_data = data[1]
z_data = data[2]
xModel = numpy.linspace(min(x_data), max(x_data), 20)
yModel = numpy.linspace(min(y_data), max(y_data), 20)
X, Y = numpy.meshgrid(xModel, yModel)
Z = func(numpy.array([X, Y]), *fittedParameters)
axes.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=cm.coolwarm, linewidth=1, antialiased=True)
axes.scatter(x_data, y_data, z_data) # show data along with plotted surface
axes.set_title('Surface Plot (click-drag with mouse)') # add a title for surface plot
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
axes.set_zlabel('Z Data') # Z axis data label
plt.show()
plt.close('all') # clean up after using pyplot or else thaere can be memory and process problems
def ContourPlot(func, data, fittedParameters):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
axes = f.add_subplot(111)
x_data = data[0]
y_data = data[1]
z_data = data[2]
xModel = numpy.linspace(min(x_data), max(x_data), 20)
yModel = numpy.linspace(min(y_data), max(y_data), 20)
X, Y = numpy.meshgrid(xModel, yModel)
Z = func(numpy.array([X, Y]), *fittedParameters)
axes.plot(x_data, y_data, 'o')
axes.set_title('Contour Plot') # add a title for contour plot
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
CS = matplotlib.pyplot.contour(X, Y, Z, numberOfContourLines, colors='k')
matplotlib.pyplot.clabel(CS, inline=1, fontsize=10) # labels for contours
plt.show()
plt.close('all') # clean up after using pyplot or else thaere can be memory and process problems
def ScatterPlot(data):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
matplotlib.pyplot.grid(True)
axes = Axes3D(f)
x_data = data[0]
y_data = data[1]
z_data = data[2]
axes.scatter(x_data, y_data, z_data)
axes.set_title('Scatter Plot (click-drag with mouse)')
axes.set_xlabel('X Data')
axes.set_ylabel('Y Data')
axes.set_zlabel('Z Data')
plt.show()
plt.close('all') # clean up after using pyplot or else thaere can be memory and process problems
def func(data, a, alpha, beta):
t = data[0]
p_p = data[1]
return a * (t**alpha) * (p_p**beta)
if __name__ == "__main__":
xData = numpy.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0])
yData = numpy.array([11.0, 12.1, 13.0, 14.1, 15.0, 16.1, 17.0, 18.1, 90.0])
zData = numpy.array([1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.0, 9.9])
data = [xData, yData, zData]
# this example uses curve_fit()'s default initial paramter values
fittedParameters, pcov = scipy.optimize.curve_fit(func, [xData, yData], zData)
ScatterPlot(data)
SurfacePlot(func, data, fittedParameters)
ContourPlot(func, data, fittedParameters)
print('fitted prameters', fittedParameters)

Categories