implicit curve fitting using scattered data - python

I have two arrays - x and y - which corresponds to coordinates (x,y) in a cartesian plane. For example, with the scatter function (plt.scatter(x,y)) from matplotlib (so far, I'm trying to solve my problem using Python), I get the following result:
What I really need is to get an implicit function f(x,y) from this data, or at least coefficients from an approximate function f(x,y) . So far, I tried to use the curve_fitfunction from scipy.optimize as suggested here, but I've got the following error message:
OptimizeWarning: Covariance of the parameters could not be estimated category=OptimizeWarning)
That is my code so far:
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
import scipy as sy
import pylab as plb
def func(x, a, b, c):
return a*x**b + c
def main():
file = open('firstcurve.out')
lines = file.read().split('\n')
file.close()
x = []
y = []
for item in lines:
if len(item) > 0:
numbers = item.split(",")
x = x + [float(numbers[0])]
y = y + [float(numbers[1])]
p0 = sy.array([1,1,1])
coeffs, matcov = curve_fit(func, x, y, p0)
yaj = func(x, coeffs[0], coeffs[1], coeffs[2])
plt.plot(x,yaj,'r-')
plt.show()
main()
Any help or suggestions are really appreciated!
PS: I'm trying to do it in Python, but MatLab is also an option in case there is any tool that does what I need to. I tried to use the SLM ToolKit but it didn't work as well.

This is more of a math problem than it is a coding question. You can't use the curve fit function in python because it's looking it's looking for a function i.e. you cannot have two separate Y's for the same X.
One thing that you can try if it's possible is to define a parametric function
x = f(t)
y = g(t)
And use the curve fit function to fit x and y vs. t. If you represent it that way, you can use smoothing splines to do the fit.
https://docs.scipy.org/doc/scipy/reference/generated/scipy.interpolate.UnivariateSpline.html

Here is code to fit a surface equation "z = f(x,y)", plot the raw data 3D scatterplot, plot the 3D fitted surface, and graph a contour plot. This should at least give you the graphics you need.
import numpy, scipy, scipy.optimize
import matplotlib
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm # to colormap 3D surfaces from blue to red
import matplotlib.pyplot as plt
graphWidth = 800 # units are pixels
graphHeight = 600 # units are pixels
# 3D contour plot lines
numberOfContourLines = 16
def SurfacePlot(func, data, fittedParameters):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
matplotlib.pyplot.grid(True)
axes = Axes3D(f)
x_data = data[0]
y_data = data[1]
z_data = data[2]
xModel = numpy.linspace(min(x_data), max(x_data), 20)
yModel = numpy.linspace(min(y_data), max(y_data), 20)
X, Y = numpy.meshgrid(xModel, yModel)
Z = func(numpy.array([X, Y]), *fittedParameters)
axes.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=cm.coolwarm, linewidth=1, antialiased=True)
axes.scatter(x_data, y_data, z_data) # show data along with plotted surface
axes.set_title('Surface Plot (click-drag with mouse)') # add a title for surface plot
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
axes.set_zlabel('Z Data') # Z axis data label
plt.show()
plt.close('all') # clean up after using pyplot or else thaere can be memory and process problems
def ContourPlot(func, data, fittedParameters):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
axes = f.add_subplot(111)
x_data = data[0]
y_data = data[1]
z_data = data[2]
xModel = numpy.linspace(min(x_data), max(x_data), 20)
yModel = numpy.linspace(min(y_data), max(y_data), 20)
X, Y = numpy.meshgrid(xModel, yModel)
Z = func(numpy.array([X, Y]), *fittedParameters)
axes.plot(x_data, y_data, 'o')
axes.set_title('Contour Plot') # add a title for contour plot
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
CS = matplotlib.pyplot.contour(X, Y, Z, numberOfContourLines, colors='k')
matplotlib.pyplot.clabel(CS, inline=1, fontsize=10) # labels for contours
plt.show()
plt.close('all') # clean up after using pyplot or else thaere can be memory and process problems
def ScatterPlot(data):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
matplotlib.pyplot.grid(True)
axes = Axes3D(f)
x_data = data[0]
y_data = data[1]
z_data = data[2]
axes.scatter(x_data, y_data, z_data)
axes.set_title('Scatter Plot (click-drag with mouse)')
axes.set_xlabel('X Data')
axes.set_ylabel('Y Data')
axes.set_zlabel('Z Data')
plt.show()
plt.close('all') # clean up after using pyplot or else thaere can be memory and process problems
def func(data, a, alpha, beta):
t = data[0]
p_p = data[1]
return a * (t**alpha) * (p_p**beta)
if __name__ == "__main__":
xData = numpy.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0])
yData = numpy.array([11.0, 12.1, 13.0, 14.1, 15.0, 16.1, 17.0, 18.1, 90.0])
zData = numpy.array([1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.0, 9.9])
data = [xData, yData, zData]
# this example uses curve_fit()'s default initial paramter values
fittedParameters, pcov = scipy.optimize.curve_fit(func, [xData, yData], zData)
ScatterPlot(data)
SurfacePlot(func, data, fittedParameters)
ContourPlot(func, data, fittedParameters)
print('fitted prameters', fittedParameters)

Related

Calculate root mean square of 3D deviation after surface fitting in python

My goal is to determine the 3D deviation (and its RMS) between a set of 3D data points and a fitted paraboloid in Python.
Starting from this: Paraboloid (3D parabola) surface fitting python, I can compute the RMS. If I understand correctly, the error and the RMS are computed along the Z-axis. Is it right?
I tried (without success) to determine the 3D deviation and RMS between the fitted surface and the data points, but I cannot get their.
Does anyone have some advices to solve this, please?
import numpy as np
from scipy.optimize import curve_fit
# Initial guess parameters
p0 = [1.5,0.4,1.5,0.4,1]
# INPUT DATA
x = [0.4,0.165,0.165,0.585,0.585]
y = [.45, .22, .63, .22, .63]
z = np.array([1, .99, .98,.97,.96])
# FIT
def paraBolEqn(data,a,b,c,d,e):
x,y = data
return -(((x-b)/a)**2+((y-d)/c)**2)+e
data = np.vstack((x,y))
popt, _ = curve_fit(paraBolEqn,data,z,p0)
# Deviation and RMS along Z axis
modelPredictions = paraBolEqn(data, *popt)
absError = modelPredictions - z
RMSE = np.sqrt(np.mean(np.square(absError))) # Root Mean Squared Error along Z axis
print('RMSE (along Z axis):', RMSE)
# Deviation and RMS in 3D
# ??
Here is a graphical Python surface fitter using your data and equation that draws a 3D scatter plot, a 3D surface plot, and a contour plot. You should be able to click-drag with the mouse and rotate the 3D plots in 3-space for visual inspection. Note that you have 5 data points and 5 equation parameters, so you get what is in effect a perfect fit - the RMSE is effectively zero, the R-squared is 1.0, and the scipy code gives a warning when calculating the covariance matrix.
import numpy, scipy, scipy.optimize
import matplotlib
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm # to colormap 3D surfaces from blue to red
import matplotlib.pyplot as plt
graphWidth = 800 # units are pixels
graphHeight = 600 # units are pixels
# 3D contour plot lines
numberOfContourLines = 16
x = [0.4,0.165,0.165,0.585,0.585]
y = [.45, .22, .63, .22, .63]
z = [1, .99, .98,.97,.96]
# alias data to match previous example
xData = numpy.array(x, dtype=float)
yData = numpy.array(y, dtype=float)
zData = numpy.array(z, dtype=float)
# place the data in a single list
data = [xData, yData, zData]
def func(data,a,b,c,d,e):
# extract data from the single list
x = data[0]
y = data[1]
return -(((x-b)/a)**2+((y-d)/c)**2)+e
def SurfacePlot(func, data, fittedParameters):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
matplotlib.pyplot.grid(True)
axes = Axes3D(f)
# extract data from the single list
x_data = data[0]
y_data = data[1]
z_data = data[2]
xModel = numpy.linspace(min(x_data), max(x_data), 20)
yModel = numpy.linspace(min(y_data), max(y_data), 20)
X, Y = numpy.meshgrid(xModel, yModel)
Z = func(numpy.array([X, Y]), *fittedParameters)
axes.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=cm.coolwarm, linewidth=1, antialiased=True)
axes.scatter(x_data, y_data, z_data) # show data along with plotted surface
axes.set_title('Surface Plot (click-drag with mouse)') # add a title for surface plot
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
axes.set_zlabel('Z Data') # Z axis data label
plt.show()
plt.close('all') # clean up after using pyplot or else there can be memory and process problems
def ContourPlot(func, data, fittedParameters):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
axes = f.add_subplot(111)
# extract data from the single list
x_data = data[0]
y_data = data[1]
z_data = data[2]
xModel = numpy.linspace(min(x_data), max(x_data), 20)
yModel = numpy.linspace(min(y_data), max(y_data), 20)
X, Y = numpy.meshgrid(xModel, yModel)
Z = func(numpy.array([X, Y]), *fittedParameters)
axes.plot(x_data, y_data, 'o')
axes.set_title('Contour Plot') # add a title for contour plot
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
CS = matplotlib.pyplot.contour(X, Y, Z, numberOfContourLines, colors='k')
matplotlib.pyplot.clabel(CS, inline=1, fontsize=10) # labels for contours
plt.show()
plt.close('all') # clean up after using pyplot or else there can be memory and process problems
def ScatterPlot(data):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
matplotlib.pyplot.grid(True)
axes = Axes3D(f)
# extract data from the single list
x_data = data[0]
y_data = data[1]
z_data = data[2]
axes.scatter(x_data, y_data, z_data)
axes.set_title('Scatter Plot (click-drag with mouse)')
axes.set_xlabel('X Data')
axes.set_ylabel('Y Data')
axes.set_zlabel('Z Data')
plt.show()
plt.close('all') # clean up after using pyplot or else there can be memory and process problems
if __name__ == "__main__":
initialParameters = [1.5,0.4,1.5,0.4,1] # from the posting
# here a non-linear surface fit is made with scipy's curve_fit()
fittedParameters, pcov = scipy.optimize.curve_fit(func, [xData, yData], zData, p0 = initialParameters)
ScatterPlot(data)
SurfacePlot(func, data, fittedParameters)
ContourPlot(func, data, fittedParameters)
print('fitted parameters', fittedParameters)
modelPredictions = func(data, *fittedParameters)
absError = modelPredictions - zData
SE = numpy.square(absError) # squared errors
MSE = numpy.mean(SE) # mean squared errors
RMSE = numpy.sqrt(MSE) # Root Mean Squared Error, RMSE
Rsquared = 1.0 - (numpy.var(absError) / numpy.var(zData))
print('RMSE:', RMSE)
print('R-squared:', Rsquared)

Multiple Linear Regression using ScikitLearn, different approaches give different answers

This is probably as equally valid on stats exchange as here (could be the stats or python that i'm not sure about.
Suppose I have two independent variables X,Y that explain some of the variance of Z.
from sklearn.linear_model import LinearRegression
import numpy as np
from scipy.stats import pearsonr,linregress
Z = np.array([1,3,5,6,7,8,9,7,10,9])
X = np.array([2,5,3,1,6,4,7,8,6,7])
Y = np.array([3,2,6,4,6,1,2,5,6,10])
I want to regress out the variability in X and Y from Z. There's two approaches that I know of:
Regress out X from Z first (form a linear regression of X,Z, find the residual, then repeat for Y). Such that:
regr = linregress(X,Z)
resi_1 = NAO - (X*regr[0])+regr[1] #residual = y-mx+c
regr = linregress(Y,resi_1)
resi_2 = resi_1 - (Y*regr[0])+regr[1] #residual = y-mx+c
Where regr_2 is the remainder of Z where X and Y have been sequentially regressed out.
The alternative is to create a multiple linear regression model for X and Y predicting Z:
regr = LinearRegression()
Model = regr.fit(np.array((X,Y)).swapaxes(0,1),Z)
pred = Model.predict(np.array((X,Y)).swapaxes(0,1))
resi_3 = Z - pred
The residual from the first sequential approach resi_2 and the multiple linear regression resi_3 are very similar (correlation=0.97) but not equivalent. The two residuals are plotted below:
Any thoughts great (not a statistician so could be my understanding vs a python problem!). Note if for the first part I regress out Y first, then X, I get different residuals.
Here is an example 3D graphical surface fitter using your data and scipy's curve_fit() routine with scatter, surface, and contour plots. You should be able to click-drag the 3D plots to rotate them in 3-space and see that the data does not appear to lie on any sort of smooth surface, so the flat plane model used here "z = (a *x) + (b * y) + c" is pretty much no better or worse than any other model for this data.
fitted prameters [ 0.65963199 0.18537117 2.43363301]
RMSE: 2.11487214206
R-squared: 0.383078044516
import numpy, scipy, scipy.optimize
import matplotlib
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm # to colormap 3D surfaces from blue to red
import matplotlib.pyplot as plt
graphWidth = 800 # units are pixels
graphHeight = 600 # units are pixels
# 3D contour plot lines
numberOfContourLines = 16
def SurfacePlot(func, data, fittedParameters):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
matplotlib.pyplot.grid(True)
axes = Axes3D(f)
x_data = data[0]
y_data = data[1]
z_data = data[2]
xModel = numpy.linspace(min(x_data), max(x_data), 20)
yModel = numpy.linspace(min(y_data), max(y_data), 20)
X, Y = numpy.meshgrid(xModel, yModel)
Z = func(numpy.array([X, Y]), *fittedParameters)
axes.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=cm.coolwarm, linewidth=1, antialiased=True)
axes.scatter(x_data, y_data, z_data) # show data along with plotted surface
axes.set_title('Surface Plot (click-drag with mouse)') # add a title for surface plot
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
axes.set_zlabel('Z Data') # Z axis data label
plt.show()
plt.close('all') # clean up after using pyplot or else there can be memory and process problems
def ContourPlot(func, data, fittedParameters):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
axes = f.add_subplot(111)
x_data = data[0]
y_data = data[1]
z_data = data[2]
xModel = numpy.linspace(min(x_data), max(x_data), 20)
yModel = numpy.linspace(min(y_data), max(y_data), 20)
X, Y = numpy.meshgrid(xModel, yModel)
Z = func(numpy.array([X, Y]), *fittedParameters)
axes.plot(x_data, y_data, 'o')
axes.set_title('Contour Plot') # add a title for contour plot
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
CS = matplotlib.pyplot.contour(X, Y, Z, numberOfContourLines, colors='k')
matplotlib.pyplot.clabel(CS, inline=1, fontsize=10) # labels for contours
plt.show()
plt.close('all') # clean up after using pyplot or else there can be memory and process problems
def ScatterPlot(data):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
matplotlib.pyplot.grid(True)
axes = Axes3D(f)
x_data = data[0]
y_data = data[1]
z_data = data[2]
axes.scatter(x_data, y_data, z_data)
axes.set_title('Scatter Plot (click-drag with mouse)')
axes.set_xlabel('X Data')
axes.set_ylabel('Y Data')
axes.set_zlabel('Z Data')
plt.show()
plt.close('all') # clean up after using pyplot or else there can be memory and process problems
def func(data, a, b, c): # example flat surface
x = data[0]
y = data[1]
return (a * x) + (b * y) + c
if __name__ == "__main__":
xData = numpy.array([2.0, 5.0, 3.0, 1.0, 6.0, 4.0, 7.0, 8.0, 6.0, 7.0])
yData = numpy.array([3.0, 2.0, 6.0, 4.0, 6.0, 1.0, 2.0, 5.0, 6.0, 10.0])
zData = numpy.array([1.0, 3.0, 5.0, 6.0, 7.0, 8.0, 9.0, 7.0, 10.0, 9.0])
data = [xData, yData, zData]
initialParameters = [1.0, 1.0, 1.0] # these are the same as scipy default values in this example
# here a non-linear surface fit is made with scipy's curve_fit()
fittedParameters, pcov = scipy.optimize.curve_fit(func, [xData, yData], zData, p0 = initialParameters)
ScatterPlot(data)
SurfacePlot(func, data, fittedParameters)
ContourPlot(func, data, fittedParameters)
print('fitted prameters', fittedParameters)
modelPredictions = func(data, *fittedParameters)
absError = modelPredictions - zData
SE = numpy.square(absError) # squared errors
MSE = numpy.mean(SE) # mean squared errors
RMSE = numpy.sqrt(MSE) # Root Mean Squared Error, RMSE
Rsquared = 1.0 - (numpy.var(absError) / numpy.var(zData))
print('RMSE:', RMSE)
print('R-squared:', Rsquared)

In a regression surface Z, how to find maximum X value where Z is less than a predefined value?

I have created a meshgrid of X, Y with dimensions I x I and corresponding surface Z from a regression model. How can I pull out the coordinates of maximum of X, where Z is less than a given threshold value?
Input data
x = np.array( )
y = np.array( )
z = np.array( )
data = np.column_stack((x, y, z))
# regular grid covering the domain of the data
X, Y = np.meshgrid(x, y)
XX = X.flatten()
YY = Y.flatten()
# best-fit quadratic curve
A = np.c_[np.ones(data.shape[0]), data[:, :2], np.prod(data[:, :2], axis=1), data[:, :2]**2]
C, _, _, _ = scipy.linalg.lstsq(A, data[:, 2])
# evaluate it on a grid
Z = np.dot(np.c_[np.ones(XX.shape), XX, YY, XX * YY, XX**2, YY**2],C).reshape(X.shape)
# Desired output
max(X, where Z < a) = [x1, y1, z1]
List with coordinates of maximized X value where Z is less than a.
Here is an example 3D surface fitter that extracts a data subset based on whether a data point's predicted Z value from the fit was below a specified threshhold. This example also has a 3D scatterplot, 3D surface plot, and a contour plot. Here, curve_fit is used for the fitting rather than linalg.lstsq().
import numpy, scipy, scipy.optimize
import matplotlib
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm # to colormap 3D surfaces from blue to red
import matplotlib.pyplot as plt
# this is threshhold value for Z subset
zThreshhold = 5.0
graphWidth = 800 # units are pixels
graphHeight = 600 # units are pixels
# 3D contour plot lines
numberOfContourLines = 16
def SurfacePlot(func, data, fittedParameters):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
matplotlib.pyplot.grid(True)
axes = Axes3D(f)
x_data = data[0]
y_data = data[1]
z_data = data[2]
xModel = numpy.linspace(min(x_data), max(x_data), 20)
yModel = numpy.linspace(min(y_data), max(y_data), 20)
X, Y = numpy.meshgrid(xModel, yModel)
Z = func(numpy.array([X, Y]), *fittedParameters)
axes.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=cm.coolwarm, linewidth=1, antialiased=True)
axes.scatter(x_data, y_data, z_data) # show data along with plotted surface
axes.set_title('Surface Plot (click-drag with mouse)') # add a title for surface plot
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
axes.set_zlabel('Z Data') # Z axis data label
plt.show()
plt.close('all') # clean up after using pyplot or else thaere can be memory and process problems
def ContourPlot(func, data, fittedParameters):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
axes = f.add_subplot(111)
x_data = data[0]
y_data = data[1]
z_data = data[2]
xModel = numpy.linspace(min(x_data), max(x_data), 20)
yModel = numpy.linspace(min(y_data), max(y_data), 20)
X, Y = numpy.meshgrid(xModel, yModel)
Z = func(numpy.array([X, Y]), *fittedParameters)
axes.plot(x_data, y_data, 'o')
axes.set_title('Contour Plot') # add a title for contour plot
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
CS = matplotlib.pyplot.contour(X, Y, Z, numberOfContourLines, colors='k')
matplotlib.pyplot.clabel(CS, inline=1, fontsize=10) # labels for contours
plt.show()
plt.close('all') # clean up after using pyplot or else thaere can be memory and process problems
def ScatterPlot(data):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
matplotlib.pyplot.grid(True)
axes = Axes3D(f)
x_data = data[0]
y_data = data[1]
z_data = data[2]
axes.scatter(x_data, y_data, z_data)
axes.set_title('Scatter Plot (click-drag with mouse)')
axes.set_xlabel('X Data')
axes.set_ylabel('Y Data')
axes.set_zlabel('Z Data')
plt.show()
plt.close('all') # clean up after using pyplot or else thaere can be memory and process problems
# fulle 3D quadratic
def func(data, a, b, c, d, e, f):
X = data[0]
Y = data[1]
return a + b*X + c*Y + d*X*Y + e*X*X + f*Y*Y
if __name__ == "__main__":
xData = numpy.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0])
yData = numpy.array([1.0, 12.1, 13.0, 14.1, 15.0, 16.1, 17.0, 18.1, 90.0])
zData = numpy.array([1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.0, 9.9])
data = [xData, yData, zData]
# these are the same as scipy default values in this example
initialParameters = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
# here a non-linear surface fit is made with scipy's curve_fit()
fittedParameters, pcov = scipy.optimize.curve_fit(func, [xData, yData], zData, p0 = initialParameters)
ScatterPlot(data)
SurfacePlot(func, data, fittedParameters)
ContourPlot(func, data, fittedParameters)
print('fitted prameters', fittedParameters)
modelPredictions = func(data, *fittedParameters)
absError = modelPredictions - zData
SE = numpy.square(absError) # squared errors
MSE = numpy.mean(SE) # mean squared errors
RMSE = numpy.sqrt(MSE) # Root Mean Squared Error, RMSE
Rsquared = 1.0 - (numpy.var(absError) / numpy.var(zData))
print('RMSE:', RMSE)
print('R-squared:', Rsquared)
zSubset = []
for i in range(len(modelPredictions)):
if modelPredictions[i] < zThreshhold:
zSubset.append([xData[i], yData[i], zData[i], modelPredictions[i]])
print("X, Y, Z, and Predicted values below Z threshold of", zThreshhold)
for i in range(len(zSubset)):
print(zSubset[i])

Linear regression (best fit line) [closed]

Closed. This question needs details or clarity. It is not currently accepting answers.
Want to improve this question? Add details and clarify the problem by editing this post.
Closed 3 years ago.
Improve this question
I'm using matplotlib in python to draw (best fit-line) in linear regression... the line looks so messy unless I sort the X array(input) and the Yhat array(prediction), but is it right?
If I sort the X array & the "Yhat" array separately, the data is changing... because the Yhat opposed to X before sorting will not remain the same after sorting.
import numpy as np
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
#load the data
X=[]
Y=[]
for line in open('data_2d2.csv'):
x1,x2,y=line.split(',')
X.append([float(x1),float(x2),1])
Y.append(float(y))
#trun X and Y into numpy array
X=np.array(X)
Y=np.array(Y)
#fig=plt.figure();
#ax=fig.add_subplot(111,projection='3d')
#ax.scatter(X[:,0],X[:,1],Y)
#plt.show()
#caculate weights
w=np.linalg.solve(np.dot(X.T,X),np.dot(X.T,Y))
print(w)
Yhat=np.dot(X,w)
fig=plt.figure();
ax.scatter(X[:,0],X[:,1],Y)
print(Yhat)
ax.plot((sorted(X[:,0])),sorted((X[:,1])),sorted(Yhat)))
plt.show()
Here is an example using scipy's curve_fit() to make a non-linear fit to 3D data, make a 3D scatterplot, make a 3D surface plot, make a contour plot, and yield RMSE and R-squared fit statistics. You could change the non-linear fit to a linear fit if you change the equation used here, this should help get you started with the 3D plotting for sure.
import numpy, scipy, scipy.optimize
import matplotlib
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm # to colormap 3D surfaces from blue to red
import matplotlib.pyplot as plt
graphWidth = 800 # units are pixels
graphHeight = 600 # units are pixels
# 3D contour plot lines
numberOfContourLines = 16
def SurfacePlot(func, data, fittedParameters):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
matplotlib.pyplot.grid(True)
axes = Axes3D(f)
x_data = data[0]
y_data = data[1]
z_data = data[2]
xModel = numpy.linspace(min(x_data), max(x_data), 20)
yModel = numpy.linspace(min(y_data), max(y_data), 20)
X, Y = numpy.meshgrid(xModel, yModel)
Z = func(numpy.array([X, Y]), *fittedParameters)
axes.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=cm.coolwarm, linewidth=1, antialiased=True)
axes.scatter(x_data, y_data, z_data) # show data along with plotted surface
axes.set_title('Surface Plot (click-drag with mouse)') # add a title for surface plot
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
axes.set_zlabel('Z Data') # Z axis data label
plt.show()
plt.close('all') # clean up after using pyplot or else thaere can be memory and process problems
def ContourPlot(func, data, fittedParameters):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
axes = f.add_subplot(111)
x_data = data[0]
y_data = data[1]
z_data = data[2]
xModel = numpy.linspace(min(x_data), max(x_data), 20)
yModel = numpy.linspace(min(y_data), max(y_data), 20)
X, Y = numpy.meshgrid(xModel, yModel)
Z = func(numpy.array([X, Y]), *fittedParameters)
axes.plot(x_data, y_data, 'o')
axes.set_title('Contour Plot') # add a title for contour plot
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
CS = matplotlib.pyplot.contour(X, Y, Z, numberOfContourLines, colors='k')
matplotlib.pyplot.clabel(CS, inline=1, fontsize=10) # labels for contours
plt.show()
plt.close('all') # clean up after using pyplot or else thaere can be memory and process problems
def ScatterPlot(data):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
matplotlib.pyplot.grid(True)
axes = Axes3D(f)
x_data = data[0]
y_data = data[1]
z_data = data[2]
axes.scatter(x_data, y_data, z_data)
axes.set_title('Scatter Plot (click-drag with mouse)')
axes.set_xlabel('X Data')
axes.set_ylabel('Y Data')
axes.set_zlabel('Z Data')
plt.show()
plt.close('all') # clean up after using pyplot or else thaere can be memory and process problems
def func(data, a, alpha, beta):
t = data[0]
p_p = data[1]
return a * (t**alpha) * (p_p**beta)
if __name__ == "__main__":
xData = numpy.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0])
yData = numpy.array([11.0, 12.1, 13.0, 14.1, 15.0, 16.1, 17.0, 18.1, 90.0])
zData = numpy.array([1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.0, 9.9])
data = [xData, yData, zData]
initialParameters = [1.0, 1.0, 1.0] # these are the same as scipy default values in this example
# here a non-linear surface fit is made with scipy's curve_fit()
fittedParameters, pcov = scipy.optimize.curve_fit(func, [xData, yData], zData, p0 = initialParameters)
ScatterPlot(data)
SurfacePlot(func, data, fittedParameters)
ContourPlot(func, data, fittedParameters)
print('fitted prameters', fittedParameters)
modelPredictions = func(data, *fittedParameters)
absError = modelPredictions - zData
SE = numpy.square(absError) # squared errors
MSE = numpy.mean(SE) # mean squared errors
RMSE = numpy.sqrt(MSE) # Root Mean Squared Error, RMSE
Rsquared = 1.0 - (numpy.var(absError) / numpy.var(zData))
print('RMSE:', RMSE)
print('R-squared:', Rsquared)

matplotlib plot_surface for 2-dimensional multiple linear regression

I have many points of data with three dimensions: x1, x2, and y. I'm able to calculate the multiple linear regression of these points, and I'm able to display the points on a 3D scatter plot, but I don't know how to plot the multiple linear regression I calculated for those points: the same way you can plot a line of best fit in a 1D linear regression, I'm interested in plotting a "plane of best fit" for a 2D linear regression.
My code follows:
import numpy as np
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
# collect data into numpy arrays
X = []
Y = []
for line in open('data_2d.csv'):
x1, x2, y = line.split(',')
X.append([1, float(x1), float(x2)]) # here X[i][0] represents x0 = 1
Y.append(float(y))
X = np.array(X)
Y = np.array(Y)
# calculate weights
w = np.linalg.solve(np.dot(X.T,X), np.dot(X.T, Y))
Yhat = np.dot(X, w) # results of linear regression for data points
# plot results
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(X[:,1], X[:,2], Y)
ax.plot_surface(X[:,1], X[:,2], Yhat) # doesn't seem to work
plt.show()
Turns out plot_surface requires each of its inputs to be coordinate matrices rather than a list of values, like I was using earlier. My solution is as follows:
import numpy as np
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
# collect data into numpy arrays
X = []
Y = []
for line in open('data_2d.csv'): # contains 3 columns: x1, x2, and y
x1, x2, y = line.split(',')
X.append([1, float(x1), float(x2)]) # here X[i][0] represents x0 = 1
Y.append(float(y))
X = np.array(X)
Y = np.array(Y)
# calculate weights for computing solutions
w = np.linalg.solve(X.T.dot(X), X.T.dot(Y))
# calculate r-squared error given weights
Yhat = X.dot(w)
d1 = Y - Yhat
d2 = Y - Y.mean()
r2 = 1 - d1.dot(d1) / d2.dot(d2)
print("r-squared value of", r2)
# calculate plane of best fit
divs = 2 # number of divisions in surface: generates divs^2 points.
# The surface is a plane, so just 2^2 = 4 points can define it.
# plane spans all values of x1 and x2 from data
x1_range = np.linspace(min(X[:,1]),max(X[:,1]),divs)
x2_range = np.linspace(min(X[:,2]),max(X[:,2]),divs)
X_plane = []
for i in range(divs):
for j in range(divs):
X_plane.append([1, x1_range[i], x2_range[j]])
X_plane = np.array(X_plane)
# values of y are equal to the linear regression of points on the plane
Yhat2 = X_plane.dot(w)
# rearrange Yhat2 into a coordinate matrix for display as a surface
Yhat2_surface = []
for i in range(divs):
Yhat2_surface.append(Yhat2[ divs*i : divs*i+divs ])
Yhat2_surface = np.array(Yhat2_surface)
Yhat2 = Yhat2_surface
# generate coordinate matrices for x1 and x2 values
X2, X1 = np.meshgrid(x1_range, x2_range) # intentional ordering: X2, *then* X1
# plot results
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(X[:,1], X[:,2], Y) # supplied data
ax.plot_surface(X1, X2, Yhat2, color='y', alpha=0.1) # plane of best fit
plt.show()
The output is shown here. The dots represent the input data, and the yellow rectangle represents its plane of best fit, drawn with plot_surface.
Here is a quick example I threw together demonstrating 3D scatter plot, 3D surface plot and contour plot.
import numpy, scipy
import matplotlib
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm # to colormap 3D surfaces from blue to red
import matplotlib.pyplot as plt
graphWidth = 800 # units are pixels
graphHeight = 600 # units are pixels
# 3D contour plot lines
numberOfContourLines = 16
def SurfacePlot(equationFunc, data):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
matplotlib.pyplot.grid(True)
axes = Axes3D(f)
x_data = data[0]
y_data = data[1]
z_data = data[2]
xModel = numpy.linspace(min(x_data), max(x_data), 20)
yModel = numpy.linspace(min(y_data), max(y_data), 20)
X, Y = numpy.meshgrid(xModel, yModel)
Z = equationFunc(numpy.array([X, Y]))
axes.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=cm.coolwarm, linewidth=1, antialiased=True)
axes.scatter(x_data, y_data, z_data) # show data along with plotted surface
axes.set_title('Surface Plot (click-drag with mouse)') # add a title for surface plot
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
axes.set_zlabel('Z Data') # Z axis data label
plt.show()
plt.close('all') # clean up after using pyplot or else thaere can be memory and process problems
def ContourPlot(equationFunc, data):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
axes = f.add_subplot(111)
x_data = data[0]
y_data = data[1]
z_data = data[2]
xModel = numpy.linspace(min(x_data), max(x_data), 20)
yModel = numpy.linspace(min(y_data), max(y_data), 20)
X, Y = numpy.meshgrid(xModel, yModel)
Z = equationFunc(numpy.array([X, Y]))
axes.plot(x_data, y_data, 'o')
axes.set_title('Contour Plot') # add a title for contour plot
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
CS = matplotlib.pyplot.contour(X, Y, Z, numberOfContourLines, colors='k')
matplotlib.pyplot.clabel(CS, inline=1, fontsize=10) # labels for contours
plt.show()
plt.close('all') # clean up after using pyplot or else thaere can be memory and process problems
def ScatterPlot(data):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
matplotlib.pyplot.grid(True)
axes = Axes3D(f)
x_data = data[0]
y_data = data[1]
z_data = data[2]
axes.scatter(x_data, y_data, z_data)
axes.set_title('Scatter Plot (click-drag with mouse)')
axes.set_xlabel('X Data')
axes.set_ylabel('Y Data')
axes.set_zlabel('Z Data')
plt.show()
plt.close('all') # clean up after using pyplot or else thaere can be memory and process problems
def EquationFunc(data):
return 5.0 + numpy.sqrt(data[0]) + numpy.cos(data[1] / 5.0)
if __name__ == "__main__":
xData = numpy.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0])
yData = numpy.array([11.0, 12.1, 13.0, 14.1, 15.0, 16.1, 17.0, 18.1, 90.0])
zData = numpy.array([1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.0, 9.9])
data = [xData, yData, zData]
ScatterPlot(data)
SurfacePlot(EquationFunc, data)
ContourPlot(EquationFunc, data)

Categories