How can i optimize my sinus fitting when using bounds? - python

i'm trying to do a sinus-fit on a given part of a curve. There a two restrictions. First the offset of my fitted sinus-curve should be 0, second the amplitude of my fitted sinus-curve should be identically to the minimum value of my original data.
When i'm using my code below, the fitting looks like in the picture i added (1). In my opinion the period of the sinus-function should be higher. The fitted curve only matches at the minimum with my original data, the fitting-curve isn't wide enough.
When i don't use the bounds for c and A my fitting looks good (2). What am i doing wrong? Is there a way to modify the fitting so that the sinus-curves fits better when using the bounds for A and c?
Fitting without bounds
Fitting with bounds
Edit:
Something thing I mentioned is, that the fitting extremely depends on the start value of the amplitude (ff_guess). When I manually change it to X (eg. 10 or 30), than the fitted sinus curve always shows an amplitude near to X (10.3 or 32.5). Is there any setting I haven't considered yet, that prevents the optimizer from varying the amplitude value?
import matplotlib.pyplot as plt
from matplotlib.ticker import FormatStrFormatter
from matplotlib.pyplot import figure, rcParams
import numpy as np
from scipy.optimize import curve_fit
#Time
t = [313.544, 313.545, 313.546, 313.547, 313.548, 313.549, 313.55, 313.551, 313.552, 313.553, 313.554, 313.555, 313.556, 313.557, 313.558, 313.559, 313.56, 313.561, 313.562, 313.563, 313.564, 313.565, 313.566, 313.567,]
#y-Values
s = [0.911188, -0.43135, -1.80997, -3.27816, -4.85784, -6.59428, -8.2214, -9.53617, -10.6892, -11.6003, -12.0844, -12.0524, -11.9749, -11.4891, -10.6131, -9.49873, -8.1154, -6.41442, -5.09357, -3.99165, -2.72991, -1.71446, -0.56306, 0.440741]
#fourier frequency
ff = np.fft.fftfreq(len(t), (t[1]-t[0]))
#fourier amplitude
fa = abs(np.fft.fft(s, len(t)))
#Position of maximum Amplitude
pos_amax = np.argmax(fa[1:])+1
#Frequency at maximum Amplitude (w/2pi)
ff_max = abs(ff[pos_amax])
ff_guess = ff_max
T_guess = 1000/ff_max
#A_guess = np.std(s) *2. **0.5
A_guess = min(s)
#c_guess = np.mean(s)
c_guess = 0
#First Guess for all paramters
f_guess = np.array([A_guess, 2*np.pi*ff_guess, 0., c_guess])
#Sinus_Curve
def sin_func(t, A, w, phi, c):
return A * np.sin(w*t + phi) + c
#Defining Bounds for A and c
c_bound = 0.1
A_bound = min(s)
#Bounds Array for curve_fit
param_bounds=([1.01*A_bound, -np.inf, -np.inf, -1*c_bound],[0.99*A_bound, np.inf, np.inf, c_bound])
popt, pcov = curve_fit(sin_func, t, s, p0=f_guess, bounds=param_bounds, maxfev=10000000)
#popt, pcov = curve_fit(sin_func, t, s, p0=f_guess, maxfev=10000000)
#
A, w, phi, c = popt
f = w/(2.*np.pi)
T = 1000/f
t = np.array(t)
s = np.array(s)
plt.figure(1)
#Generate Sinus Function
s_fit = A * np.sin(w*t + phi) + c
#Plotting
rcParams['figure.figsize'] =10, 5
fig, ax = plt.subplots()
plt.plot(t, s, "b", label="Original")
plt.plot(t, s_fit, 'k--', label="Fitting")
ytitle='ytitle'
xtitle='xtitle'
ax.set(xlabel=xtitle, ylabel=ytitle)
ax.xaxis.set_major_formatter(FormatStrFormatter('%.2f'))
ax.yaxis.set_major_formatter(FormatStrFormatter('%.2f'))
ax.grid()
#Sidetext
ausgabe = ("Sinus-Fit \nAmplitude = {:.2f} m/s^2 \nPeriode = {:.2f} ms \nOffset = {:.2f} m/s^2".format(A, abs(T), c))
plt.text(0.795, 0.7, ausgabe, family="sans-serif", fontsize=10, ha='left', va='top', transform=fig.transFigure)
box = ax.get_position()
ax.set_position([box.x0, box.y0, box.width * 0.85, box.height])
plt.show()

If the amplitude is fixed and offset is supposedly zero, why fitting them in the first place. Moreover, there is no need for fft to estimate parameters, as there is a simple linear method.
Looks like this:
import matplotlib.pyplot as plt
import numpy as np
from scipy.optimize import curve_fit
from scipy.integrate import cumtrapz
#Time
t = np.array([
313.544, 313.545, 313.546, 313.547, 313.548, 313.549, 313.55,
313.551, 313.552, 313.553, 313.554, 313.555, 313.556, 313.557,
313.558, 313.559, 313.56, 313.561, 313.562, 313.563, 313.564,
313.565, 313.566, 313.567
])
# ~t -= 313.544
#y-Values
s = np.array([
0.911188, -0.43135, -1.80997, -3.27816, -4.85784, -6.59428,
-8.2214, -9.53617, -10.6892, -11.6003, -12.0844, -12.0524,
-11.9749, -11.4891, -10.6131, -9.49873, -8.1154, -6.41442,
-5.09357, -3.99165, -2.72991, -1.71446, -0.56306, 0.440741
])
###fixing the amplitude the the maximum value of the data
amp = np.max( np.abs( s ) )
### sine function with the fixed amplitude from above and no offset
def sine(t, w, f ):
return amp * np.sin( w * t + f )
### getting the nonlinear parameters by using the fact that
### int int y = -y/w**2 + const1 * t + const2
### so the integro-equation as w hidden in a linear factor
### and we can use linear optimization to get it
Sy = cumtrapz( s, x=t, initial = 0 ) ### single integration
SSy = cumtrapz( Sy, x=t, initial = 0 ) ### double integratiom
VMXT = np.array( [ s, t, np.ones( len( t ) ) ] ) ### matrix describing the linear relationship
VMX = np.transpose( VMXT )
A = np.dot( VMXT, VMX )
SV = np.dot( VMXT, SSy )
AI = np.linalg.inv( A )
alpha = np.dot( AI , SV ) ### solution
wstart = np.sqrt( -1 / alpha[0] ) ### as mentioned above, the linear factor ist -1/w**2...so inverse function
### now getting phi by actiully fitting A sin + B cos
### and using tan phi = B/A
VMXT = np.array( [ np.sin( wstart * t ), np.cos( wstart * t ) ] )
VMX = np.transpose( VMXT )
A = np.dot( VMXT, VMX )
SV = np.dot( VMXT, s )
AI = np.linalg.inv( A )
alpha = np.dot( AI , SV ) ### solution
phistart = np.arctan2( alpha[1], alpha[0] )
op, cv = curve_fit( sine, t, s, p0=( wstart, phistart ) )
print( op )
yth = sine( t, *op )
fig = plt.figure()
ax = fig.add_subplot( 1, 1, 1 )
ax.plot( t, s )
ax.plot( t, yth )
plt.show()
Works just fine.

Related

Phase portrait of Verhulst equation

I was trying to an example of the book -"Dynamical Systems with Applications using Python" and I was asked to plot the phase portrait of Verhulst equation, then I came across this post: How to plot a phase portrait of Verhulst equation with SciPy (or SymPy) and Matplotlib?
I'm getting the same plot as the user on the previous post. Whenever, I try to use the accepted solution I get a "division by zero" error. Why doesn't the accepted solution in How to plot a phase portrait of Verhulst equation with SciPy (or SymPy) and Matplotlib? works?
Thank you very much for you help!
Edit:
Using the code from the previous post and the correction given by #Lutz Lehmann
beta, delta, gamma = 1, 2, 1
b,d,c = 1,2,1
C1 = gamma*c-delta*d
C2 = gamma*b-beta*d
C3 = beta*c-delta*b
def verhulst(X, t=0):
return np.array([beta*X[0] - delta*X[0]**2 -gamma*X[0]*X[1],
b*X[1] - d*X[1]**2 -c*X[0]*X[1]])
X_O = np.array([0., 0.])
X_R = np.array([C2/C1, C3/C1])
X_P = np.array([0, b/d])
X_Q = np.array([beta/delta, 0])
def jacobian(X, t=0):
return np.array([[beta-delta*2*X[0]-gamma*X[1], -gamma*x[0]],
[b-d*2*X[1]-c*X[0], -c*X[1]]])
values = np.linspace(0.3, 0.9, 5)
vcolors = plt.cm.autumn_r(np.linspace(0.3, 1., len(values)))
f2 = plt.figure(figsize=(4,4))
for v, col in zip(values, vcolors):
X0 = v * X_R
X = odeint(verhulst, X0, t)
plt.plot(X[:,0], X[:,1], color=col, label='X0=(%.f, %.f)' % ( X0[0], X0[1]) )
ymax = plt.ylim(ymin=0)[1]
xmax = plt.xlim(xmin=0)[1]
nb_points = 20
x = np.linspace(0, xmax, nb_points)
y = np.linspace(0, ymax, nb_points)
X1, Y1 = np.meshgrid(x, y)
DX1, DY1 = verhulst([X1, Y1]) # compute growth rate on the gridt
M = (np.hypot(DX1, DY1)) # Norm of the growth rate
M[M == 0] = 1. # Avoid zero division errors
DX1 /= M # Normalize each arrows
DY1 /= M
plt.quiver(X1, Y1, DX1, DY1, M, cmap=plt.cm.jet)
plt.xlabel('Number of Species 1')
plt.ylabel('Number of Species 2')
plt.legend()
plt.grid()
We have:
That is still different from:
What am I missing?
With the help of #Lutz Lehmann I could rewrite the code to get want I needed.
The solutions is something like this:
import numpy as np
from scipy.integrate import odeint
import matplotlib.pyplot as plt
fig = plt.figure(figsize=(8, 4), dpi= 80, facecolor='whitesmoke', edgecolor='k')
beta, delta, gamma = 1, 2, 1
b,d,c = 1,2,1
t = np.linspace(0, 10, 100)
C1 = gamma*c-delta*d
C2 = gamma*b-beta*d
C3 = beta*c-delta*b
def verhulst(X, t=0):
return np.array([beta*X[0] - delta*X[0]**2 -gamma*X[0]*X[1],
b*X[1] - d*X[1]**2 -c*X[0]*X[1]])
X_O = np.array([0., 0.])
X_R = np.array([C2/C1, C3/C1])
X_P = np.array([0, b/d])
X_Q = np.array([beta/delta, 0])
def jacobian(X, t=0):
return np.array([[beta-delta*2*X[0]-gamma*X[1], -gamma*x[0]],
[b-d*2*X[1]-c*X[0], -c*X[1]]])
values = np.linspace(0.05, 0.15, 5)
vcolors = plt.cm.autumn_r(np.linspace(0.3, 1., len(values)))
for v, col in zip(values, vcolors):
X0 = [v,0.2-v]
X = odeint(verhulst, X0, t)
plt.plot(X[:,0], X[:,1], color=col, label='X0=(%.f, %.f)' % ( X0[0], X0[1]) )
for v, col in zip(values, vcolors):
X0 = [6 * v, 6 *(0.2-v)]
X = odeint(verhulst, X0, t)
plt.plot(X[:,0], X[:,1], color=col, label='X0=(%.f, %.f)' % ( X0[0], X0[1]) )
ymax = plt.ylim(ymin=0)[1]
xmax = plt.xlim(xmin=0)[1]
nb_points = 20
x = np.linspace(0, xmax, nb_points)
y = np.linspace(0, ymax, nb_points)
X1, Y1 = np.meshgrid(x, y)
DX1, DY1 = verhulst([X1, Y1]) # compute growth rate on the gridt
M = (np.hypot(DX1, DY1)) # Norm of the growth rate
M[M == 0] = 1. # Avoid zero division errors
DX1 /= M # Normalize each arrows
DY1 /= M
plt.quiver(X1, Y1, DX1, DY1, M, cmap=plt.cm.jet)
plt.xlabel('Number of Species 1')
plt.ylabel('Number of Species 2')
plt.grid()
We get what we were looking for:
Finally, I would like to thank again #Lutz Lehnmann for the help. I wouldn't have solved without it him.
Edit 1:
I forgot $t = np.linspace(0, 10, 100)$ and if you change figsize = (8,8), we get a nicer shape in the plot. (Thank you #Trenton McKinney for the remarks)

How to fit a log-normal distribution with Scipy?

I want to fit the log-normal parameters mu and sigma to an existing (measured) log-normal distribution.
The measured log-normal distribution is defined by the following x and y arrays:
x:
4.870000000000000760e-09
5.620000000000000859e-09
6.490000000000000543e-09
7.500000000000000984e-09
8.660000000000001114e-09
1.000000000000000021e-08
1.155000000000000085e-08
1.334000000000000067e-08
1.540000000000000224e-08
1.778000000000000105e-08
2.054000000000000062e-08
2.371000000000000188e-08
2.738000000000000099e-08
3.162000000000000124e-08
3.652000000000000541e-08
4.217000000000000637e-08
4.870000000000000595e-08
5.623000000000000125e-08
6.493999999999999784e-08
7.498999999999999850e-08
8.659999999999999460e-08
1.000000000000000087e-07
1.154800000000000123e-07
1.333500000000000129e-07
1.539900000000000177e-07
1.778300000000000247e-07
2.053499999999999958e-07
2.371399999999999913e-07
2.738399999999999692e-07
3.162300000000000199e-07
3.651700000000000333e-07
4.217000000000000240e-07
4.869700000000000784e-07
8.659600000000001124e-07
1.000000000000000167e-06
y:
1.883186407957446899e+11
3.609524622222222290e+11
7.508596384507042236e+11
2.226776878843930664e+12
4.845941940346821289e+12
7.979258430057803711e+12
1.101088735028901758e+13
1.346205871213872852e+13
1.509035024739884375e+13
1.599175638381502930e+13
1.668097844161849805e+13
1.786208191445086719e+13
2.007139089017341016e+13
2.346096336416185156e+13
2.763042850867051953e+13
3.177726578034682031e+13
3.552045143352600781e+13
3.858765218497110156e+13
4.051697248554913281e+13
4.132681209248554688e+13
4.112713068208092188e+13
4.003871248554913281e+13
3.797625966473988281e+13
3.472541513294797656e+13
3.017757826589595312e+13
2.454670317919075000e+13
1.840085110982658984e+13
1.250047161156069336e+13
7.540309609248554688e+12
3.912091102658959473e+12
1.632974141040462402e+12
4.585002890867052002e+11
1.260128910303030243e+11
7.276263267445255280e+09
1.120399584203921509e+10
Plotted this looks like this:
When I now use scipy.stats.lognorm.fit like this:
shape, loc, scale = stats.lognorm.fit(y, floc=0)
mu = np.log(scale)
sigma = shape
y_fit = 1 / x * 1 / (sigma * np.sqrt(2*np.pi)) * np.exp(-(np.log(x)-mu)**2/(2*sigma**2))
The resulting y_fit looks like this:
2.774453764650559735e-92
9.215468156399056736e-92
3.066511893903929907e-91
1.022335884325557513e-90
3.371353425505715432e-90
1.107869289600567113e-89
3.632923945686527959e-89
1.186352074527947499e-88
3.843439346384186221e-88
1.241282395050092616e-87
4.012158206798217088e-87
1.283531486148302474e-86
4.102813367932395623e-86
1.306865297124819703e-85
4.149188517768147925e-85
1.309743071360157226e-84
4.121819150664498056e-84
1.289935574540856462e-83
4.028475776631639341e-83
1.251854680594688466e-82
3.876254948575364474e-82
1.194751160823721531e-81
3.669411018320463915e-81
1.122061051084741563e-80
3.418224619543735425e-80
1.037398725542414359e-79
3.134554301786779178e-79
9.436770981828214504e-79
2.828745744939237710e-78
8.447588129217592353e-78
2.512030904806250195e-77
7.442222461482558402e-77
2.195666296758331429e-76
1.598228276801569301e-74
4.622033883255558750e-74
And is obliviously very far away from the original y values. I do realize that I haven't used the initial x values at all. So I assume I need to shift (and maybe also scale) the resulting distribution somehow.
However I can't wrap my head around how I need to do this. How do I correctly fit a log-normal distribution in Python?
It works out of the box with curve_fit if you scale the data. I am not sure if scaling and re-scaling makes sense, though. (this seems to confirm the ansatz)
import matplotlib.pyplot as plt
import numpy as np
from scipy.optimize import curve_fit
def log_fit( x, a, mu, sigma ):
return a / x * 1. / (sigma * np.sqrt( 2. * np.pi ) ) * np.exp( -( np.log( x ) - mu )**2 / ( 2. * sigma**2 ) )
pp = np.argmax( y )
yM = y[ pp ]
xM = x[ pp ]
xR = x/xM
yR = y/yM
print xM, yM
sol, err = curve_fit( log_fit, xR, yR )
print sol
scaledSol = [ yM * sol[0] * xM , sol[1] + np.log(xM), sol[2] ]
print scaledSol
yF = np.fromiter( ( log_fit( xx, *sol ) for xx in xR ), np.float )
yFIR = np.fromiter( ( log_fit( xx, *scaledSol ) for xx in x ), np.float )
fig = plt.figure()
ax = fig.add_subplot( 2,1, 1)
bx = fig.add_subplot( 2,1, 2)
ax.plot( x, y )
ax.plot( x, yFIR )
bx.plot( xR, yR )
bx.plot( xR, yF )
plt.show()
Providing
>> 7.499e-08 41326812092485.55
>> [2.93003525 0.68436895 0.87481153]
>> [9080465.32138486, -15.72154211628693, 0.8748115349982701]
and
Anyhow, does not really look like that's the fit function.
My equation search turned up a log-normal shifted type equation giving a good fit to "y = a * exp(-0.5 * ((log(x-d)-b)/c)**2)" with parameters
a = 4.2503194887395930E+13
b = -1.6090252935097830E+01
c = 6.0250205607650253E-01
d = -2.2907054835882373E-08
No scaling needed.

How can I fit a good Lorentzian on python using scipy.optimize.curve_fit?

I'm trying to fit a Lorentzian function with more than one absorption peak (Mössbauer spectra), but the curve_fit function it not working properly, fitting just few peaks. How can I fit it?
Figure: Trying to adjusting multi-Lorentzian
Below I show my code. Please, help me.
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
def mymodel_hema(x,a1,b1,c1,a2,b2,c2,a3,b3,c3,a4,b4,c4,a5,b5,c5,a6,b6,c6):
f = 160000 - (c1*a1)/(c1+(x-b1)**2) - (c2*a2)/(c2+(x-b2)**2) - (c3*a3)/(c3+(x-b3)**2) - (c4*a4)/(c4+(x-b4)**2) - (c5*a5)/(c5+(x-b5)**2) - (c6*a6)/(c6+(x-b6)**2)
return f
def main():
abre = np.loadtxt('HEMAT_1.dat')
x = np.zeros(len(abre))
y = np.zeros(len(abre))
for i in range(len(abre)):
x[i] = abre[i,0]
y[i] = abre[i,1]
popt,pcov = curve_fit(mymodel_hema, x, y,maxfev=1000000000)
My data --> https://drive.google.com/file/d/1LvCKNdv0oBza_TDwuyNwd29PgQv22VPA/view?usp=sharing
This code uses leastsq instead of curve_fit as the latter one requires a fixed number of parameters. Here I do not want this as I let the code "decide" how many peaks are there. Note that I scaled the data to simplify the fit. The true fitting parameters are calculated easily be scaling back ( and standard error propagation )
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import leastsq
def lorentzian( x, x0, a, gam ):
return a * gam**2 / ( gam**2 + ( x - x0 )**2)
def multi_lorentz( x, params ):
off = params[0]
paramsRest = params[1:]
assert not ( len( paramsRest ) % 3 )
return off + sum( [ lorentzian( x, *paramsRest[ i : i+3 ] ) for i in range( 0, len( paramsRest ), 3 ) ] )
def res_multi_lorentz( params, xData, yData ):
diff = [ multi_lorentz( x, params ) - y for x, y in zip( xData, yData ) ]
return diff
xData, yData = np.loadtxt('HEMAT_1.dat', unpack=True )
yData = yData / max(yData)
generalWidth = 1
yDataLoc = yData
startValues = [ max( yData ) ]
counter = 0
while max( yDataLoc ) - min( yDataLoc ) > .1:
counter += 1
if counter > 20: ### max 20 peak...emergency break to avoid infinite loop
break
minP = np.argmin( yDataLoc )
minY = yData[ minP ]
x0 = xData[ minP ]
startValues += [ x0, minY - max( yDataLoc ), generalWidth ]
popt, ier = leastsq( res_multi_lorentz, startValues, args=( xData, yData ) )
yDataLoc = [ y - multi_lorentz( x, popt ) for x,y in zip( xData, yData ) ]
print popt
testData = [ multi_lorentz(x, popt ) for x in xData ]
fig = plt.figure()
ax = fig.add_subplot( 1, 1, 1 )
ax.plot( xData, yData )
ax.plot( xData, testData )
plt.show()
Providing
[ 9.96855817e-01 4.94106598e+02 -2.82103813e-01 4.66272773e+00
2.80688160e+01 -2.72449246e-01 4.71728295e+00 1.31577189e+02
-2.29698620e-01 4.20685229e+00 4.01421993e+02 -1.85917255e-01
5.57859380e+00 2.29704607e+02 -1.47193792e-01 3.91112196e+00
3.03387957e+02 -1.37127711e-01 4.39571905e+00]
and

How to do a polynomial fit with fixed points in 3D

I have sets of x,y,z points in 3D space and another variable called charge which represents the amount of charge that was deposited in a specific x,y,z coordinate. I would like to do a weighted (weighted by the amount of charge deposited in the detector, which just corresponds to a higher weight for more charge) for this data such that it passes through a given point, the vertex.
Now, when I did this for 2D, I tried all sorts of methods (bringing the vertex to the origin and doing the same transformation for all the other points and forcing the fit to go through the origin, giving the vertex really high weight) but none of them were as good as the answer given here by Jaime: How to do a polynomial fit with fixed points
It uses the method of Lagrange multipliers, which I'm vaguely familiar with from an undergraduate Advanced Multi variable course, but not much else and it doesn't seem like the transformation of that code will be as easy as just adding a z coordinate. (Note that even though the code doesn't take into consideration the amount of charge deposited, it still gave me the best results). I was wondering if there was a version of the same algorithm out there, but in 3D. I also contacted the author of the answer in Gmail, but didn't hear back from him.
Here is some more information about my data and what I'm trying to do in 2D: How to weigh the points in a scatter plot for a fit?
Here is my code for doing this in a way where I force the vertex to be at the origin and then fit the data setting fit_intercept=False. I'm currently pursuing this method for 2D data since I'm not sure if there's a 3D version out there for Lagrange multipliers, but there are linear regression ways of doing this in 3D, for instance, here: Fitting a line in 3D:
import numpy as np
import sklearn.linear_model
def plot_best_fit(image_array, vertexX, vertexY):
weights = np.array(image_array)
x = np.where(weights>0)[1]
y = np.where(weights>0)[0]
size = len(image_array) * len(image_array[0])
y = np.zeros((len(image_array), len(image_array[0])))
for i in range(len(np.where(weights>0)[0])):
y[np.where(weights>0)[0][i]][np.where(weights>0)[1][i]] = np.where(weights>0)[0][i]
y = y.reshape(size)
x = np.array(range(len(image_array)) * len(image_array[0]))
weights = weights.reshape((size))
for i in range(len(x)):
x[i] -= vertexX
y[i] -= vertexY
model = sklearn.linear_model.LinearRegression(fit_intercept=False)
model.fit(x.reshape((-1, 1)),y,sample_weight=weights)
line_x = np.linspace(0, 512, 100).reshape((-1,1))
pred = model.predict(line_x)
m, b = np.polyfit(np.linspace(0, 512, 100), np.array(pred), 1)
angle = math.atan(m) * 180/math.pi
return line_x, pred, angle, b, m
image_array is a numpy array and vertexX and vertexY are the x and y coordinates of the vertex, respectively. Here's my data: https://uploadfiles.io/bbhxo. I cannot create a toy data as there is not a simple way of replicating this data, it was produced by Geant4 simulation of a neutrino interacting with an argon nucleus. I don't want to get rid of the complexity of the data. And this specific event happens to be the one for which my code does not work, I'm not sure if I can generate a data specifically so my code doesn't work on it.
This is more a hand made solution using basic optimization. It is straight forward. One just measures the distance of a point to the line to be fitted and minimizes the weighted distances using basic optimize.leastsq. Code is as follows:
# -*- coding: utf-8 -*
from matplotlib import pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.cm as cm
from scipy import optimize
import numpy as np
def rnd( a ):
return a * ( 1 - 2 * np.random.random() )
def affine_line( s, theta, phi, x0, y0, z0 ):
a = np.sin( theta) * np.cos( phi )
b = np.sin( theta) * np.sin( phi )
c = np.cos( theta )
return np.array( [ s * a + x0, s * b + y0, s * c + z0 ] )
def point_to_line_distance( x , y, z , theta, phi, x0, y0, z0 ):
xx = x - x0
yy = y - y0
zz = z - z0
a = np.sin( theta) * np.cos( phi )
b = np.sin( theta) * np.sin( phi )
c = np.cos( theta )
r = np.array( [ xx, yy, zz ] )
t = np.array( [ a, b, c ] )
return np.linalg.norm( r - np.dot( r, t) * t )
def residuals( parameters, fixpoint, data, weights=None ):
theta, phi = parameters
x0, y0, z0 = fixpoint
if weights is None:
w = np.ones( len( data ) )
else:
w = np.array( weights )
diff = np.array( [ point_to_line_distance( x , y, z , theta, phi , *fixpoint ) for x, y, z in data ] )
diff = diff * w
return diff
### some test data
fixpoint = [ 1, 2 , -.3 ]
trueline = np.array( [ affine_line( s, .7, 1.7, *fixpoint ) for s in np.linspace( -1, 2, 50 ) ] )
rndData = np.array( [ np.array( [ a + rnd( .6), b + rnd( .35 ), c + rnd( .45 ) ] ) for a, b, c in trueline ] )
zData = [ 20 * point_to_line_distance( x , y, z , .7, 1.7, *fixpoint ) for x, y, z in rndData ]
### unweighted
bestFitValuesUW, ier= optimize.leastsq(residuals, [ 0, 0],args=( fixpoint, rndData ) )
print bestFitValuesUW
uwLine = np.array( [ affine_line( s, bestFitValuesUW[0], bestFitValuesUW[1], *fixpoint ) for s in np.linspace( -2, 2, 50 ) ] )
### weighted ( chose inverse distance as weight....would be charge in OP's case )
bestFitValuesW, ier= optimize.leastsq(residuals, [ 0, 0],args=( fixpoint, rndData, [ 1./s for s in zData ] ) )
print bestFitValuesW
wLine = np.array( [ affine_line( s, bestFitValuesW[0], bestFitValuesW[1], *fixpoint ) for s in np.linspace( -2, 2, 50 ) ] )
### plotting
fig = plt.figure()
ax = fig.add_subplot( 1, 1, 1, projection='3d' )
ax.plot( *np.transpose(trueline ) )
ax.scatter( *fixpoint, color='k' )
ax.scatter( rndData[::,0], rndData[::,1], rndData[::,2] , c=zData, cmap=cm.jet )
ax.plot( *np.transpose( uwLine ) )
ax.plot( *np.transpose( wLine ) )
ax.set_xlim( [ 0, 2.5 ] )
ax.set_ylim( [ 1, 3.5 ] )
ax.set_zlim( [ -1.25, 1.25 ] )
plt.show()
which returns
>> [-0.68236386 -1.3057938 ]
>> [-0.70928735 -1.4617517 ]
The fix point is shown in black. The original line in blue. The unweighted and weighted fit are in orange and green, respectively. Data is colored according to distance to line.

plannar fit with odrpack

I am trying to use scipy.odr to get a best fit plane for some x, y, z points.
I define the plane equation implicitly as ax + by + cz + d = 0 and I perform a least squares (with scipy.linalg.lstsq) to provide the odr with an initial estimation.
The components of the beta vector (where beta = [a, b, c, d]) returned by the odr are of a magnitude between 1e167 and 1e172... Is such a result trustworthy? I find the numbers to be absurd...
Note that the points come from 3D scanning of a relatively flat face which is almost parallel to the xz plane (nearly vertical).
Here is the pprint() of the odr result:
'
Beta: [ 3.14570111e-170 3.21821458e-169 4.49232028e-172 4.49374557e-167]
Beta Std Error: [ 0. 0. 0. 0.]
Beta Covariance: [[ 6.37459471e-10 -8.57690019e-09 -2.18092934e-11 -1.13009384e-06]
[ -8.57690019e-09 5.11732570e-07 1.30123070e-09 6.74263262e-05]
[ -2.18092934e-11 1.30123070e-09 5.22674068e-12 1.70799469e-07]
[ -1.13009384e-06 6.74263262e-05 1.70799469e-07 8.88444676e-03]]
Residual Variance: 0.0
Inverse Condition #: 0.0010484041422201213
Reason(s) for Halting:
Sum of squares convergence
None
'
The code I am using :
import numpy as np
import scipy.linalg
from scipy import odr
import pickle
def planar_fit(points):
# best-fit linear plane
a = np.c_[points[:, 0], points[:, 1], np.ones(points.shape[0])]
c, _, _, _ = scipy.linalg.lstsq(a, points[:, 2]) # coefficients
# The coefficients are returned as an array beta=[a, b, c, d] from the implicit form 'a*x + b*y + c*z + d = 0'.
beta = np.r_[c[0], c[1], -1, c[2]] / c[2]
return beta
def odr_planar_fit(points):
def f_3(beta, xyz):
""" implicit definition of the plane"""
return beta[0] * xyz[0] + beta[1] * xyz[1] + beta[2] * xyz[2] + beta[3]
# # Coordinates of the 2D points
x = points[:, 0]
y = points[:, 1]
z = points[:, 2]
# Use least squares for initial estimate.
beta0 = planar_fit(points)
# Create the data object for the odr. The equation is given in the implicit form 'a*x + b*y + c*z + d = 0' and
# beta=[a, b, c, d] (beta is the vector to be fitted). The positional argument y=1 means that the dimensionality
# of the fitting is 1.
lsc_data = odr.Data(np.row_stack([x, y, z]), y=1)
# Create the odr model
lsc_model = odr.Model(f_3, implicit=True)
# Create the odr object based on the data, the model and the first estimation vector.
lsc_odr = odr.ODR(lsc_data, lsc_model, beta0)
# run the regression.
lsc_out = lsc_odr.run()
return lsc_out, beta0
def main():
#import from pickle.
with open('./points.pkl', 'rb') as f:
points = np.array(pickle.load(f))
# Perform the ODR
odr_out, lstsq = odr_planar_fit(points)
print(lstsq)
print(odr_out.pprint())
main()
The pickle containing my points.
ODR is completely fine with multidimensional data, you were going the correct direction.
You just chose bad to use the implicit version of ODR with your f_3 definition. The problem is you have a function A*X=0 which you try to minimize without any additional constraints. Of course, the best the optimizer can do is to minimize the magnitude of A towards zero - that minimizes the error the best! For the implicit optimization to work, you need to somehow introduce a constraint on magnitude of A, e.g. by dividing by the last number:
def f_3(beta, xyz):
""" implicit definition of the plane"""
return beta[0]/beta[3] * xyz[0] + beta[1]/beta[3] * xyz[1] + beta[2]/beta[3] * xyz[2] + 1.0
This way, the optimizer has no other option than to do what you wanted it to do :)
Alternatively, you can also convert your model to the explicit form y = ax + cz + d, which doesn't suffer from the magnitude problems (as b == 1 all the time).
Of course, you could get additional precision by shifting your points to origin and scaling them to have a unit variance in distance.
Since I'm also about to use ODR, I was curious about its properties, so I played around to find out how precise and sensitive it is, and here's the result: https://gist.github.com/peci1/fb1cea77c41fe8ace6c0db8ef82539a3 .
I tested both implicit and exlicit ODR, with and without normalization, and with initial guess either from LSQ or a bad one (to see how sensitive to the guess it is). It looked like this on your data:
Basically, the yellow and grey planes are the implicit fits without normalization, which came out pretty bad, and the rest of ODR fits is more or less the same. You can see the ODR fits differ a bit from the (faint blue) LSQ fit (which is expected).
As far as I understand the odr it is not made for 3D data, but I might be wrong here. As this is a simple plane fit, I suggest to use simple leastsq. Moreover, note that you do not really have 4 free parameters as you can divide a * x + b * y + c * z + d = 0 e.g. by d providing a' * x + b' * y + c' * z + 1 = 0 ( if d is not zero ).
If instead we write the plane in the form: all points P for which(P - p0) * n = 0 we already have the odr function for free. One can simplify by assuming that the plane offset vector p0 = s * n is the scaled normal vector. Like this there are 3 free parameters, the scale s and the direction angles of the normal vector (theta, phi).
The according fit looks as follows
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import numpy as np
from scipy.optimize import leastsq
from random import random
# for rotating test data
def y_mx( theta ):
out = np.array( [ np.cos( theta ),0, np.sin( theta ), 0, 1, 0, -np.sin( theta ),0, np.cos( theta ) ] )
return out.reshape( 3, 3)
# for rotating test data
def z_mx( theta ):
out = np.array( [ np.cos( theta ), np.sin( theta ), 0, -np.sin( theta ), np.cos( theta ), 0, 0, 0, 1 ] )
return out.reshape( 3, 3)
# for test data
def make_plane( theta, phi, px, py, pz, n=100 ):
points=[]
for i in range( n ):
x = 1 - 2 * random( )
y = 1 - 2 * random( )
z = 0.15 * ( 1 - 2 * random() )
points += [ np.array( [ x, y, z] ) ]
points = np.array( points)
points = [ np.array( [px, py, pz ] ) + np.dot( z_mx( phi ), np.dot( y_mx( theta ) , p ) ) for p in points ]
return np.array( points )
# residual function for leastsq
# note the plane equation is (P - p0) n = 0 if P is member of plane
# and n is normal vector of plane directly provides the normal distance function
# moreover p0 can be chosen to be s * n
def residuals( params, points ):
scale, theta, phi = params
nVector = np.array( [ np.sin( theta ) * np.cos( phi ), np.sin( theta ) * np.sin( phi ), np.cos( theta ) ] )
p0 = scale * nVector
diff = [ np.dot( p - p0, nVector ) for p in points]
return diff
# some test data
pnts = make_plane( 1.5, 1.49, .15, .2, .33)
#and the fit
guess=[ 0, 0, 0 ]
bestfit, err = leastsq( residuals, guess, pnts )
#the resulting normal vectot and offset
nVectorFit = np.array( [ np.sin( bestfit[1] ) * np.cos( bestfit[2] ), np.sin( bestfit[1] ) * np.sin( bestfit[2] ), np.cos( bestfit[1] ) ] )
p0Fit = bestfit[0] * nVectorFit
# converting to standard plane equation
a = nVectorFit[0] / nVectorFit[1]
c = nVectorFit[2] / nVectorFit[1]
d = bestfit[0] / nVectorFit[1]
# plane equation data
X = np.linspace( -.6, .6, 20 )
Z = np.linspace( -.6, .6, 20 )
XX, ZZ = np.meshgrid( X, Z )
YY = -a * XX - c * ZZ + d
#plotting
fig = plt.figure()
ax = fig.add_subplot( 1, 1, 1, projection='3d')
# original data
ax.scatter( pnts[:,0], pnts[:,1] , pnts[:,2])
# offset vector
ax.plot( [0, p0Fit[0] ], [0, p0Fit[1] ], [0, p0Fit[2] ], color = 'r')
# fitted plane
ax.plot_wireframe(XX, YY, ZZ , color = '#9900bb')
ax.set_xlim( [-1,1] )
ax.set_ylim( [-1,1] )
ax.set_zlim( [-1,1] )
ax.set_xlabel("x")
ax.set_ylabel("y")
ax.set_zlabel("z")
plt.show()
Providing
Blue points is noisy data, purple is the fitted plane, and red the offset vector.
It is easy to see that the for the case here y = a * x + c * z + d, a, c, d are calculated straight forward from the fit result.

Categories