Finding a slope of 3D linear regression line

Finding a slope of 3D linear regression line - python

I am trying to find a slope of a line in 3D space. The solution for plotting such line is given in this post
Here's the given code from the link above:
import numpy as np
pts = np.add.accumulate(np.random.random((10,3)))
x,y,z = pts.T
# this will find the slope and x-intercept of a plane
# parallel to the y-axis that best fits the data
A_xz = np.vstack((x, np.ones(len(x)))).T
m_xz, c_xz = np.linalg.lstsq(A_xz, z)[0]
# again for a plane parallel to the x-axis
A_yz = np.vstack((y, np.ones(len(y)))).T
m_yz, c_yz = np.linalg.lstsq(A_yz, z)[0]
# the intersection of those two planes and
# the function for the line would be:
# z = m_yz * y + c_yz
# z = m_xz * x + c_xz
# or:
def lin(z):
x = (z - c_xz)/m_xz
y = (z - c_yz)/m_yz
return x,y
#verifying:
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
fig = plt.figure()
ax = Axes3D(fig)
zz = np.linspace(0,5)
xx,yy = lin(zz)
ax.scatter(x, y, z)
ax.plot(xx,yy,zz)
plt.savefig('test.png')
plt.show()
Math-wise I know how to find intersections of two planes and the slope of a given line but I am having trouble putting it in code.
How can I find the slope of resulting regression line using this solution?

The "slope" of a 3D line is generally taken to be slopes of the line "projected" onto the x, y and z planes. See the second answer to this question
If this is what you intended then it's easy enough to calculate these; this modified version of your code below does this into the sx, sy and sz variables:
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from math import pow, sqrt
pts = np.add.accumulate(np.random.random((10,3)))
x, y, z = pts.T
# plane parallel to the y-axis
A_xz = np.vstack((x, np.ones(len(x)))).T
m_xz, c_xz = np.linalg.lstsq(A_xz, z, rcond=None)[0]
# plane parallel to the x-axis
A_yz = np.vstack((y, np.ones(len(y)))).T
m_yz, c_yz = np.linalg.lstsq(A_yz, z, rcond=None)[0]
# the intersection of those two planes and
# the function for the line would be:
# z = m_yz * y + c_yz
# z = m_xz * x + c_xz
# or:
def lin(z):
x = (z - c_xz)/m_xz
y = (z - c_yz)/m_yz
return x,y
# get 2 points on the intersection line
za = z[0]
zb = z[len(z) - 1]
xa, ya = lin(za)
xb, yb = lin(zb)
# get distance between points
len = sqrt(pow(xb - xa, 2) + pow(yb - ya, 2) + pow(zb - za, 2))
# get slopes (projections onto x, y and z planes)
sx = (xb - xa) / len # x slope
sy = (yb - ya) / len # y slope
sz = (zb - za) / len # z slope
# integrity check - the sum of squares of slopes should equal 1.0
# print (pow(sx, 2) + pow(sy, 2) + pow(sz, 2))
fig = plt.figure()
ax = Axes3D(fig)
ax.set_xlabel("x, slope: %.4f" %sx, color='blue')
ax.set_ylabel("y, slope: %.4f" %sy, color='blue')
ax.set_zlabel("z, slope: %.4f" %sz, color='blue')
ax.scatter(x, y, z)
ax.plot([xa], [ya], [za], markerfacecolor='k', markeredgecolor='k', marker = 'o')
ax.plot([xb], [yb], [zb], markerfacecolor='k', markeredgecolor='k', marker = 'o')
ax.plot([xa, xb], [ya, yb], [za, zb], color = 'r')
plt.show()
The output graph below shows the line in question, which is just drawn between the 2 extreme xyz points.
I hope this may help

Related

Scipy RegularGridInterpolator turns interpolated vector field

The task:
I am trying to interpolate a vector field on a regular grid, i.e.:
The issue:
I am using the RegularGridInterpolator from scipy to do this. However, it seems that the resulting vector field is turned with respect to the original:
Anyone knows why?
Python code to reproduce example:
from scipy.interpolate import RegularGridInterpolator
import matplotlib.pyplot as plt
import numpy as np
# ORIGINAL
# Number of points (NxN)
N = 50
# Boundaries
ymin = -2.; ymax = 2.
xmin = -2.; xmax = 2.
# Create Meshgrid
x = np.linspace(xmin,xmax, N)
y = np.linspace(ymin,ymax, N)
xx, yy = np.meshgrid(x, y)
# Vector Field
Fx = np.cos(xx + 2*yy)
Fy = np.sin(xx - 2*yy)
# Plot vector field
fig, ax = plt.subplots()
ax.quiver(x, y, Fx, Fy)
plt.title("Original")
plt.show()
# REDUCED
# Number of points (NxN)
N = 10
# Boundaries
ymin = -2.; ymax = 2.
xmin = -2.; xmax = 2.
# Create Meshgrid
x = np.linspace(xmin,xmax, N)
y = np.linspace(ymin,ymax, N)
xx, yy = np.meshgrid(x, y)
# Vector Field
Fx = np.cos(xx + 2*yy)
Fy = np.sin(xx - 2*yy)
# Plot vector field
fig, ax = plt.subplots()
ax.quiver(x, y, Fx, Fy)
plt.title("Reduced")
plt.show()
# INTERPOLATED VERSION BASED ON REDUCED
# Iterpolate
my_interpolating_function_x = RegularGridInterpolator((x, y), Fx)
my_interpolating_function_y = RegularGridInterpolator((x, y), Fy)
# Create Meshgrid
N = 50
x = np.linspace(xmin,xmax, N)
y = np.linspace(ymin,ymax, N)
grid = np.meshgrid(x, y)
new_points = np.vstack(list(map(np.ravel, grid))).T
# Interpolate
F_x_inter = my_interpolating_function_x(new_points)
F_y_inter = my_interpolating_function_y(new_points)
# reshape
F_x_inter = np.reshape(F_x_inter,(50,50))
F_y_inter = np.reshape(F_y_inter,(50,50))
#plot
fig, ax = plt.subplots()
ax.quiver(x, y, F_x_inter, F_y_inter)
plt.title("Interpolated")
plt.show()

How to create a numpy array filled with average value of a vector

I am not sure how to phrase my question in any way better. Basically, I have three lists of the same length x, y and z and I want to fill a 2D numpy array in the z/y plane with the average of the associated z values.
Here is how I can achieve what I wan to do:
import numpy as np
import matplotlib.pyplot as plt
x = [37.59390426045407, 38.00530354847739, 38.28412244348653, 38.74871247986305, 38.73175910429809, 38.869008864244016, 39.188234404976555, 39.92835838352555, 40.881394113153334, 41.686136269465884]
y = [0.1305391767832006, 0.13764519613447768, 0.14573326951792354, 0.15090729309032114, 0.16355823707239897, 0.17327106424274763, 0.17749746339532224, 0.17310384614773594, 0.16545780437882962, 0.1604752704890856]
z = [0.05738534353865021, 0.012572155256903583, -0.021709582561809437, -0.11191337750722108, -0.07931921785775153, -0.06241610118871843, 0.014216349927058225, 0.042002641153291886, -0.029354425271534645, 0.061894011359833856]
n = 5
image = np.zeros(shape=(n,n))
# Fill the 2D array
x0 = min(x)
y0 = min(y)
dx = (max(x) - min(x))/n
dy = (max(y) - min(y))/n
# Loop over each 2D cell
for index_x in range(n):
for index_y in range(n):
# find the limits of the cell
x1 = x0 + index_x * dx
x2 = x0 + (index_x+1) * dx
y1 = y0 + index_y * dy
y2 = y0 + (index_y+1) * dy
# find the points of z that lie within the range of the cell
vec_z = [z[idx] for idx in range(len(z)) if x[idx]>=x1 and x[idx]<x2 and y[idx]>=y1 and y[idx]<y2]
if vec_z:
image[index_x, index_y] = np.mean(vec_z)
# In the end, used to create a surface plot
fig, ax = plt.subplots()
ax.imshow(image, cmap=plt.cm.gray, interpolation='nearest')
plt.show()
Is there a more easy way to achieve this? I can imagine there is a numpy method for that.

If I understand correctly what you want to do, maybe a 2D interpolation from scipy.interpolate.interp2d is what you are looking for.
You define the interpolation function of your points:
f = interp2d(x = x, y = y, z = z)
Then you define the X and Y meshgrid:
N = 50
x_axis = np.linspace(np.min(x), np.max(x), N)
y_axis = np.linspace(np.min(y), np.max(y), N)
X, Y = np.meshgrid(x_axis, y_axis)
Finally you can compute Z interpolated values on the meshgrid:
Z = np.zeros((N, N))
for i in range(N):
for j in range(N):
Z[i, j] = f(X[i, j], Y[i, j])
If you plot in 3D the interpolated surface, you get:
fig = plt.figure()
ax = fig.add_subplot(projection = '3d')
ax.plot_surface(X, Y, Z, cmap = 'jet', shade = False)
ax.set_xlabel('x')
ax.set_ylabel('y')
ax.set_zlabel('z')
plt.show()
Interpolated surface compared to interpolation data points:
ax.scatter(x, y, z, color = 'black', s = 100, alpha = 1)

Python - find closest point to 3D point on 3D spline

I have 2 arrays with 3D points (name, X, Y, Z). First array contains reference points, through which I'm drawing spline. Second array contains measured points, from which I need to calculate normals to spline and get the coordinates of the normal on spline (I need to calculate the XY and height standard deviations of the measured points). This is the test data (in fact, I have several thousand points):
1st array - reference points/ generate spline:
r1,1.5602,6.0310,4.8289
r2,1.6453,5.8504,4.8428
r3,1.7172,5.6732,4.8428
r4,1.8018,5.5296,4.8474
r5,1.8700,5.3597,4.8414
2nd array - measured points:
m1, 1.8592, 5.4707, 4.8212
m2, 1.7642, 5.6362, 4.8441
m3, 1.6842, 5.7920, 4.8424
m4, 1.6048, 5.9707, 4.8465
The code I wrote, to read the data, calculate spline (using scipy) and display it via matplotlib:
import numpy as np
import matplotlib.pyplot as plt
from scipy import interpolate
# import measured points
filename = "measpts.csv"
meas_pts = np.genfromtxt(filename, delimiter=',')
# import reference points
filename = "refpts.csv"
ref = np.genfromtxt(filename, delimiter=',')
# divide data to X, Y, Z
x = ref[:, 2]
y = ref[:, 1]
z = ref[:, 3]
# spline interpolation
tck, u = interpolate.splprep([x, y, z], s=0)
u_new = np.linspace(u.min(), u.max(), 1000000)
x_new, y_new, z_new = interpolate.splev(u_new, tck, der=0)
xs = tck[1][0]
ys = tck[1][1]
zs = tck[1][2]
# PLOT 3D
fig = plt.figure()
ax3d = fig.add_subplot(111, projection='3d', proj_type='ortho')
ax3d.plot(x, y, z, 'ro') # ref points
ax3d.plot(xs, ys, zs, 'yo') # spline knots
ax3d.plot(x_new, y_new, z_new, 'b--') # spline
ax3d.plot(meas_pts[:, 2], meas_pts[:, 1], meas_pts[:, 3], 'g*') # measured points
# ax3d.view_init(90, -90) # 2D TOP view
# ax3d.view_init(0, -90) # 2D from SOUTH to NORTH view
# ax3d.view_init(0, 0) # 2D from EAST to WEST view
plt.show()
To sum up: I need array contains pairs: [[measured point X, Y, Z], [closest (normal) point on the spline X,Y,Z]]

Given a point P and a line in a 3d space, the distance from the point P and the points of the line is the diagonal of the box, so you wish to minimize this diagonal, the minimum distance will be normal to the line
You can use this property. So, for example
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# generate sample line
x = np.linspace(-2, 2, 100)
y = np.cbrt( np.exp(2*x) -1 )
z = (y + 1) * (y - 2)
# a point
P = (-1, 3, 2)
# 3d plot
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d', proj_type='ortho')
ax.plot(x, y, z)
ax.plot(P[0], P[1], P[2], 'or')
plt.show()
def distance_3d(x, y, z, x0, y0, z0):
"""
3d distance from a point and a line
"""
dx = x - x0
dy = y - y0
dz = z - z0
d = np.sqrt(dx**2 + dy**2 + dz**2)
return d
def min_distance(x, y, z, P, precision=5):
"""
Compute minimum/a distance/s between
a point P[x0,y0,z0] and a curve (x,y,z)
rounded at `precision`.
ARGS:
x, y, z (array)
P (3dtuple)
precision (integer)
Returns min indexes and distances array.
"""
# compute distance
d = distance_3d(x, y, z, P[0], P[1], P[2])
d = np.round(d, precision)
# find the minima
glob_min_idxs = np.argwhere(d==np.min(d)).ravel()
return glob_min_idxs, d
that gives
min_idx, d = min_distance(x, y, z, P)
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d', proj_type='ortho')
ax.plot(x, y, z)
ax.plot(P[0], P[1], P[2], 'or')
ax.plot(x[min_idx], y[min_idx], z[min_idx], 'ok')
for idx in min_idx:
ax.plot(
[P[0], x[idx]],
[P[1], y[idx]],
[P[2], z[idx]],
'k--'
)
plt.show()
print("distance:", d[min_idx])
distance: [2.4721]
You can implement a similar function for your needs.

Revolution solid with MatplotLib Python

I'm trying to create a bottle as a revolution solid using MatplotLib. I've got this points:
Image of the coordinates
Which in terms of coordinates are:
coords = [(0.00823433249299356, 0.06230346394288128),
(0.04086905251958573, 0.0648935210878489),
(0.08386400112604843, 0.0648935210878489),
(0.11753474401062763, 0.06541153251684242),
(0.14239929260231693, 0.05712334965294601),
(0.19109236692770842, 0.05401528107898486),
(0.2278711783862488, 0.05142522393401722),
(0.24133947554008045, 0.04158300678314021)]
The polynomial (more or less accurate) is:
Lambda(x, -19493.7965633925*x**6 + 13024.3747084876*x**5 - 3228.16456296349*x**4 + 368.816080918066*x**3 - 20.500262217588*x**2 + 0.545840273670868*x + 0.0590464366057008)
Which I get by:
# Getting the polynomial:
z = np.polyfit(xdata, ydata, 6)
# Being xdata and ydata the 2 vector from the coordinates
x = sp.symbols('x', real=True)
P = sp.Lambda(x,sum((a*x**i for i,a in enumerate(z[::-1]))))
print(P)
The point describe the outline of the bottle (cast your imagination) being the bottle in the plane XY.
How can I get, from that curve, a solid of revolution that recreates a bottle?
My objective is to be able to rotate the generator curve and create a solid of revolution, what I've tried is:
# Create the polynomial
pol = sp.lambdify(x,P(x),"numpy")
# Create the matrix of points
X = np.linspace(xdata[0], xdata[-1], 50)
Y = pol(X)
X, Y = np.meshgrid(X, Y)
# As long as a bottle is no more than a big amount of small cylinders, my
# equation should be more or less like:
# Z = x**2 + y** -R**2
# So we create here the equation
Z = X**2 + Y**2 - (Y - 0.0115)**2
# We create the #D figure
fig = plt.figure()
ax = plt.axes(projection="3d")
# And we representate it
surf = ax.plot_surface(X, Y, Z)
# We change the labels
ax.set_xlabel('$x$')
ax.set_ylabel('$y$')
ax.set_zlabel('$z$')
# And show the figure
plt.show()
The problem is that what I get is no longer a bottle (and I think is because how I'm using the plot_surface (I don't get very well how to use it by reading the documentation).
What I got is:
Image of the plotting. First I thought that was a problem related to the zoom, but I changed it and the figure is the same

I'll reference unutbu's answer to a similar question.
import numpy as np
import matplotlib.pyplot as plt
import mpl_toolkits.mplot3d.axes3d as axes3d
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1, projection='3d')
# grab more points between your coordinates, say 100 points
u = np.linspace(0.00823433249299356, 0.24133947554008045, 100)
def polynomial(x):
return -19493.7965633925*x**6 + 13024.3747084876*x**5 - 3228.16456296349*x**4 + 368.816080918066*x**3 - 20.500262217588*x**2 + 0.545840273670868*x + 0.0590464366057008
v = np.linspace(0, 2*np.pi, 60)
U, V = np.meshgrid(u, v)
X = U
Y1 = polynomial(X)*np.cos(V)
Z1 = polynomial(X)*np.sin(V)
# Revolving around the axis
Y2 = 0*np.cos(V)
Z2 = 0*np.sin(V)
ax.plot_surface(X, Y1, Z1, alpha=0.3, color='red', rstride=6, cstride=12)
ax.plot_surface(X, Y2, Z2, alpha=0.3, color='blue', rstride=6, cstride=12)
# set the limits of the axes
ax.set_xlim3d(-0.3, 0.3)
ax.set_ylim3d(-0.3, 0.3)
ax.set_zlim3d(-0.3, 0.3)
plt.show()

Code for best fit straight line of a scatter plot in python

Below is my code for scatter plotting the data in my text file. The file I am opening contains two columns. The left column is x coordinates and the right column is y coordinates. the code creates a scatter plot of x vs. y. I need a code to overplot a line of best fit to the data in the scatter plot, and none of the built in pylab function have worked for me.
from matplotlib import *
from pylab import *
with open('file.txt') as f:
data = [line.split() for line in f.readlines()]
out = [(float(x), float(y)) for x, y in data]
for i in out:
scatter(i[0],i[1])
xlabel('X')
ylabel('Y')
title('My Title')
show()

A one-line version of this excellent answer to plot the line of best fit is:
plt.plot(np.unique(x), np.poly1d(np.polyfit(x, y, 1))(np.unique(x)))
Using np.unique(x) instead of x handles the case where x isn't sorted or has duplicate values.

Assuming line of best fit for a set of points is:
y = a + b * x
where:
b = ( sum(xi * yi) - n * xbar * ybar ) / sum((xi - xbar)^2)
a = ybar - b * xbar
Code and plot
# sample points
X = [0, 5, 10, 15, 20]
Y = [0, 7, 10, 13, 20]
# solve for a and b
def best_fit(X, Y):
xbar = sum(X)/len(X)
ybar = sum(Y)/len(Y)
n = len(X) # or len(Y)
numer = sum([xi*yi for xi,yi in zip(X, Y)]) - n * xbar * ybar
denum = sum([xi**2 for xi in X]) - n * xbar**2
b = numer / denum
a = ybar - b * xbar
print('best fit line:\ny = {:.2f} + {:.2f}x'.format(a, b))
return a, b
# solution
a, b = best_fit(X, Y)
#best fit line:
#y = 0.80 + 0.92x
# plot points and fit line
import matplotlib.pyplot as plt
plt.scatter(X, Y)
yfit = [a + b * xi for xi in X]
plt.plot(X, yfit)
UPDATE:
notebook version

You can use numpy's polyfit. I use the following (you can safely remove the bit about coefficient of determination and error bounds, I just think it looks nice):
#!/usr/bin/python3
import numpy as np
import matplotlib.pyplot as plt
import csv
with open("example.csv", "r") as f:
data = [row for row in csv.reader(f)]
xd = [float(row[0]) for row in data]
yd = [float(row[1]) for row in data]
# sort the data
reorder = sorted(range(len(xd)), key = lambda ii: xd[ii])
xd = [xd[ii] for ii in reorder]
yd = [yd[ii] for ii in reorder]
# make the scatter plot
plt.scatter(xd, yd, s=30, alpha=0.15, marker='o')
# determine best fit line
par = np.polyfit(xd, yd, 1, full=True)
slope=par[0][0]
intercept=par[0][1]
xl = [min(xd), max(xd)]
yl = [slope*xx + intercept for xx in xl]
# coefficient of determination, plot text
variance = np.var(yd)
residuals = np.var([(slope*xx + intercept - yy) for xx,yy in zip(xd,yd)])
Rsqr = np.round(1-residuals/variance, decimals=2)
plt.text(.9*max(xd)+.1*min(xd),.9*max(yd)+.1*min(yd),'$R^2 = %0.2f$'% Rsqr, fontsize=30)
plt.xlabel("X Description")
plt.ylabel("Y Description")
# error bounds
yerr = [abs(slope*xx + intercept - yy) for xx,yy in zip(xd,yd)]
par = np.polyfit(xd, yerr, 2, full=True)
yerrUpper = [(xx*slope+intercept)+(par[0][0]*xx**2 + par[0][1]*xx + par[0][2]) for xx,yy in zip(xd,yd)]
yerrLower = [(xx*slope+intercept)-(par[0][0]*xx**2 + par[0][1]*xx + par[0][2]) for xx,yy in zip(xd,yd)]
plt.plot(xl, yl, '-r')
plt.plot(xd, yerrLower, '--r')
plt.plot(xd, yerrUpper, '--r')
plt.show()

Have implemented #Micah 's solution to generate a trendline with a few changes and thought I'd share:
Coded as a function
Option for a polynomial trendline (input order=2)
Function can also just return the coefficient of determination (R^2, input Rval=True)
More Numpy array optimisations
Code:
def trendline(xd, yd, order=1, c='r', alpha=1, Rval=False):
"""Make a line of best fit"""
#Calculate trendline
coeffs = np.polyfit(xd, yd, order)
intercept = coeffs[-1]
slope = coeffs[-2]
power = coeffs[0] if order == 2 else 0
minxd = np.min(xd)
maxxd = np.max(xd)
xl = np.array([minxd, maxxd])
yl = power * xl ** 2 + slope * xl + intercept
#Plot trendline
plt.plot(xl, yl, c, alpha=alpha)
#Calculate R Squared
p = np.poly1d(coeffs)
ybar = np.sum(yd) / len(yd)
ssreg = np.sum((p(xd) - ybar) ** 2)
sstot = np.sum((yd - ybar) ** 2)
Rsqr = ssreg / sstot
if not Rval:
#Plot R^2 value
plt.text(0.8 * maxxd + 0.2 * minxd, 0.8 * np.max(yd) + 0.2 * np.min(yd),
'$R^2 = %0.2f$' % Rsqr)
else:
#Return the R^2 value:
return Rsqr

import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
X, Y = x.reshape(-1,1), y.reshape(-1,1)
plt.plot( X, LinearRegression().fit(X, Y).predict(X) )

Numpy 1.4 introduced new API. You can use this one-liner, where n determines how smooth you want the line to be and a is the degree of the polynomial.
plt.plot(*np.polynomial.Polynomial.fit(x, y, a).linspace(n), 'r-')

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Finding a slope of 3D linear regression line - python

Related

Scipy RegularGridInterpolator turns interpolated vector field

How to create a numpy array filled with average value of a vector

Python - find closest point to 3D point on 3D spline

Revolution solid with MatplotLib Python

Code for best fit straight line of a scatter plot in python

Categories

Resources