Least Squares Fit on Cubic Bezier Curve - python

I would like fit a cubic bezier curve on a set of 500 random points.
Here's the code I have for the bezier curve:
import numpy as np
from scipy.misc import comb
def bernstein_poly(i, n, t):
"""
The Bernstein polynomial of n, i as a function of t
"""
return comb(n, i) * ( t**(n-i) ) * (1 - t)**i
def bezier_curve(points, nTimes=1000):
nPoints = len(points)
x = np.array([p[0] for p in points])
y = np.array([p[1] for p in points])
t = np.linspace(0.0, 1.0, nTimes)
polynomial_array = np.array([ bernstein_poly(i, nPoints-1, t) for i in range(0, nPoints) ])
xvals = np.dot(x, polynomial_array)
yvals = np.dot(y, polynomial_array)
return xvals, yvals
if __name__ == "__main__":
from matplotlib import pyplot as plt
nPoints = 4
points = np.random.rand(nPoints,2)*200
xpoints = [p[0] for p in points]
ypoints = [p[1] for p in points]
xvals, yvals = bezier_curve(points, nTimes=1000)
plt.plot(xvals, yvals)
plt.plot(xpoints, ypoints, "ro")
for nr in range(len(points)):
plt.text(points[nr][0], points[nr][1], nr)
plt.show()
I'm aware that Numpy and Scipy have least squares methods: numpy.linalg.lstsq and scipy.optimize.least_squares
But I'm not sure how can I use them for fitting the curve on the 500 points. Can someone offer some assistance?
Thank you

Use the function curve_fit in scipy, https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.curve_fit.html
import numpy as np
from scipy.optimize import curve_fit
def func(x, a, b, c):
return a * np.exp(-b * x) + c
xdata = np.linspace(0, 4, 50)
y = func(xdata, 2.5, 1.3, 0.5)
ydata = y + 0.2 * np.random.normal(size=len(xdata))
popt, pcov = curve_fit(func, xdata, ydata)
#Constrain the optimization to the region of 0 < a < 3, 0 < b < 2 and 0 < c < 1:
popt, pcov = curve_fit(func, xdata, ydata, bounds=(0, [3., 2., 1.]))

The scipy documentation itself has a most excellent tutorial on using splines here:
https://docs.scipy.org/doc/scipy/reference/tutorial/interpolate.html
with lots of code, examples and cool graphs comparing different types of splines.

Related

How to find the best values for a and b using optimization.curve_fit. in python. I keep getting an error for my func

I am trying to find the best values for my model y = aX +b by using optimization.curve_fit but keep getting the following error:
func() missing 1 required positional argument: 'b'
My code is as follows:
def func(x, y, a, b):
return y == a*x + b
xdata = np.array(time1_list)
ydata = np.array(phi1_list)
# Initial guess.
x0 = np.array([0.0, 0.0])
print(optimization.curve_fit(func, xdata, ydata, x0))
I have tried using this optimization method but cannot seem to get past this error. The data consists of 2 lists, xdata and ydata. My model is y = ax+b.
Eventually I will be using a least squares fit to find the optimum values of a and b.
Need to change the definition of your function like this:
def func(x, a, b):
y = a*x + b
return y
Here is full exemple:
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
def func(x, a, b):
y = a*x + b
return y
xdata = np.linspace(1, 10, 100)
y = func(xdata, 2, 5)
rng = np.random.default_rng()
y_noise = rng.normal(size=xdata.size) *0.2
ydata = y + y_noise
# Initial guess.
x0 = np.array([0.0, 0.0])
# print(optimization.curve_fit(func, xdata, ydata, x0))
popt, pcov = curve_fit(func, xdata, ydata, x0)
plt.figure()
plt.plot(xdata, ydata, 'b-', label='data')
plt.plot(xdata, func(xdata, *popt), 'r-',label='fit: a=%5.3f, b=%5.3f' % tuple(popt))
plt.xlabel('x')
plt.ylabel('y')
plt.legend()
plt.show()
You will get the figure :

How to plot lower boundary with a scatter plot with curve_fit and a linear line function?

I use the following code to plot a scatter plot. I have been trying to plot the lower boundary for it. I tried following the other question but I was unable to replicate it for my objective function and data. The code is as follows :
from numpy import arange
import pandas as pd
from pandas import read_csv
from scipy.optimize import curve_fit
from matplotlib import pyplot
def objective(x, a, b):
return a * x + b
events = pd.read_excel('values.xlsx')
x = events.loc[:, 'Value']
y = events.loc[:, 'Frame']
popt, _ = curve_fit(objective, x, y)
a, b = popt
pyplot.scatter(x, y)
x_line = arange(min(x), max(x), 1)
y_line = objective(x_line, a, b)
pyplot.plot(x_line, y_line, '--', color='purple')
xmin, xmax = pyplot.xlim() # the limits of the x-axis for drawing the line
ymin, ymax = pyplot.ylim()
pos_min = np.argmin(x)
pos_max = np.argmax(x)
alpha_min = x[pos_min]
alpha_max = x[pos_max]
pyplot.show()
I want to plot the lower boundary of the points like .
As you have a linear function, your upper and lower bound will have the same slope a but different b-values. So, we calculate them for all points and choose the lowest and highest:
import numpy as np
from scipy.optimize import curve_fit
from matplotlib import pyplot
def objective(x, a, b):
return a * x + b
#sample data
rng = np.random.default_rng(123)
x = np.linspace(2, 10, 150)
y = objective(x, -2, 3)
y += 5 * rng.random(len(x))
popt, _ = curve_fit(objective, x, y)
a, b = popt
pyplot.scatter(x, y, label="raw data")
x_line = np.asarray([np.min(x), np.max(x)])
y_line = objective(x_line, a, b)
pyplot.plot(x_line, y_line, '--', color='purple', label=f"y={a:.2f}x+{b:.2f}")
b_values = y - a * x
pyplot.plot(x_line, objective(x_line, a, np.min(b_values)), '--', color='red', label="lower bound")
pyplot.plot(x_line, objective(x_line, a, np.max(b_values)), '--', color='orange', label="upper bound")
pyplot.legend()
pyplot.show()
Sample output:

How do i get the curve_fit to fit the data?

I can't figure out why my curve_fit is not following the data?
import numpy as np
import matplotlib.pyplot as plt
import scipy.io as sio
import math
from scipy.stats import binom, poisson, norm
from scipy.optimize import curve_fit
AD1 = sio.loadmat('ATLAS_DATA1' ,squeeze_me=True,mat_dtype = True)
locals().update({k :AD1[k]for k in ['n','e']})
xData = e
yData = n
yErr = 0
plt.xlabel("e (GeV)")
plt.ylabel("Number of events (n)")
plt.errorbar(xData,yData,yErr,marker = '.',linestyle = '')
plt.show()
def func(e, a, b):
return a * np.exp(-b * e)
xDat = e
yDat = func(xDat, 2, 1)
popt, pcov = curve_fit(func, xDat, yDat)
plt.plot(xDat, func(xDat, *popt))
plt.show()
Below is my data for n at the top and e at the bottom.
Data for n and e
Graph for the data that i want to fit
import numpy as np
import matplotlib.pyplot as plt
import scipy.optimize as optimize
from scipy.optimize import curve_fit
Write your xData and yData as numpy arrays as follows:
I used a sample from it
xData =np.array([383,358,326,366,335,331,308,299,303,325,306,299,270,282,253,265,248,256,220,208,252,215,220,237,204,213,224,212])
yData = np.array([101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,124,128])
plt.xlabel("e (GeV)")
plt.ylabel("Number of events (n)")
plt.scatter(xData,yData)
plt.show()
Heres the original data
Its bettter to use plt.scatter than plt.errorbar
i found this equation better for your curve
def func(x, a, c, d):
return a*np.exp(-c*x)+d
Same thing goes for xDat (write it as np.array)
xDat = xData
yDat = func(xDat, 2, 1)
plt.scatter(xDat,yDat)
popt, pcov = curve_fit(func, xData, yData,p = (10, 1e-6, 100))
plt.plot(xDat, func(xDat, *popt))
plt.show()
Tip: Dont use lower case e as a variable, because most of the time e represents the exponential constant e=2.77
UPDATE :
if you want to use your original function heres the code:
def func(e, a, b):
return a * np.exp(-b * e)
popt, pcov = curve_fit(func, xData, yData,p0 = [10,-0.00001])

Is there a way to write a math formula on matplotlib plot dynamically?

I'm doing a template for my lab work in Python. To summarize the purpose of it, it's to plot data points and fit a pre-defined model with scipy curve_fit. Usually I fit polynomials or exponential curves. I managed to print the fitting params dynamically on the plot, but I have to manually type in the relevant equation every time. I'm wondering, is there an elegant way to do this dynamically? I've read about sympy, but for the time being I couldn't make it.
Here's the code:
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from datetime import datetime
#two example functions
def f(x, p0, p1):
return p0 * x + p1
def g(x, p0, p1):
return p0 * np.exp(x * p1)
#example data
xval = np.array([0,1,2,3,4,5,6])
yval = np.array([0, 2,3.95,5.8,8.1, 10.2, 12.4])
#curve fitting
popt, pcov = curve_fit(f, xval, yval)
plt.rcParams.update({'font.size': 12})
plt.figure(figsize=(9,7))
plt.plot(xval, yval,'ko', label = 'Data points', markersize = 7)
plt.title('TITLE', fontsize = 15)
plt.grid()
plt.plot(xval, f(xval, *popt),'r-', label = 'Fit')
#printing the params on plot
for idx in range(len(popt)):
plt.text(0.8,0.05+0.05*(idx+1), 'p'+str(idx)+' = {0:.5f}'.format(popt[idx]), transform=plt.gca().transAxes)
#manually writing the equation, that's what I want to print dynamically
plt.text(0.8, 0.05, '$y = p0 \cdot x + p1 $' , transform=plt.gca().transAxes)
plt.text(0.86, 1.01, datetime.today().strftime('%Y.%m.%d.'), transform=plt.gca().transAxes)
plt.text(0 ,1.01, 'NAME', transform=plt.gca().transAxes)
plt.ylabel('Y axis title')
plt.xlabel('X axis title')
plt.legend()
plt.show()
The expected result is:
if I use a function for fitting - let's say g(x, p0, p1) which returns p0 * np.exp(x * p1) then the returned formula itself should be printed on the plot, just like the other one in the example code :
plt.text(0.8, 0.05, '$y = p0 \cdot x + p1 $' , transform=plt.gca().transAxes)
except it's a manual solution.
I really appreciate any suggestions.
I think that you may use sympy package.
It allows to define custom variables, create, expressions and then evaluate it. I'm not sure what is impact on performance
Here is your code with changes:
import numpy as np
import sympy
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from datetime import datetime
#two example functions
x, p0, p1 = sympy.var("x p0 p1")
f = p0 * x + p1
g = p0 * sympy.exp(x*p1)
def partial_fun(sympy_expr):
def res_fun(X, P0, P1):
return np.array([sympy_expr.evalf(subs={x: x_, p0: P0, p1: P1}) for x_ in X], dtype=np.float)
return res_fun
#example data
xval = np.array([0,1,2,3,4,5,6])
yval = np.array([0, 2,3.95,5.8,8.1, 10.2, 12.4])
#curve fitting
popt, pcov = curve_fit(partial_fun(f), xval, yval)
plt.rcParams.update({'font.size': 12})
plt.figure(figsize=(9,7))
plt.plot(xval, yval,'ko', label = 'Data points', markersize = 7)
plt.title('TITLE', fontsize = 15)
plt.grid()
plt.plot(xval, partial_fun(f)(xval, *popt),'r-', label = 'Fit')
#printing the params on plot
for idx in range(len(popt)):
plt.text(0.8,0.05+0.05*(idx+1), 'p'+str(idx)+' = {0:.5f}'.format(popt[idx]), transform=plt.gca().transAxes)
#manually writing the equation, that's what I want to print dynamically
plt.text(0.8, 0.05, f'$y = {f} $' , transform=plt.gca().transAxes)
plt.text(0.86, 1.01, datetime.today().strftime('%Y.%m.%d.'), transform=plt.gca().transAxes)
plt.text(0 ,1.01, 'NAME', transform=plt.gca().transAxes)
plt.ylabel('Y axis title')
plt.xlabel('X axis title')
plt.legend()
plt.show()
I actually managed to make a solution (without sympy though), and I have to type in manually the formulas, but they are selected automatically. I use dictionary for that.
Here's the code:
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from datetime import datetime
fun_dict = {}
#three example functions
def f(x, p0, p1):
return p0 * x + p1
def g(x, p0, p1):
return p0 * np.exp(x * p1)
def h(x, p0, p1, p2):
return p0 * x ** 2 + p1 * x + p2
f_string = '$y = p0 \cdot x + p1 $'
fun_dict['f'] = f_string
g_string = '$y = p0 \cdot e^{p1 \cdot x} $'
fun_dict['g'] = g_string
h_string = '$y = p0 \cdot x^2 + p1 \cdot x + p2$'
fun_dict['h'] = h_string
#example data
xval = np.array([0,1,2,3,4,5,6])
yval = np.array([0, 2,3.95,5.8,8.1, 10.2, 12.4])
def get_fun(func):
popt, _ = curve_fit(func, xval, yval)
return popt, fun_dict[str(func.__name__)], func
popt, str_name, func = get_fun(h)
plt.rcParams.update({'font.size': 12})
plt.figure(figsize=(9,7))
plt.plot(xval, yval,'ko', label = 'Data points', markersize = 7)
plt.title('TITLE', fontsize = 15)
plt.grid()
plt.plot(xval, func(xval, *popt),'r-', label = 'Fit')
for idx in range(len(popt)):
plt.text(0.8,0.05+0.05*(idx+1), 'p'+str(idx)+' = {0:.5f}'.format(popt[idx]), transform=plt.gca().transAxes)
plt.text(0.7, 0.05, str_name, transform=plt.gca().transAxes)
plt.text(0.86, 1.01, datetime.today().strftime('%Y.%m.%d.'), transform=plt.gca().transAxes)
plt.text(0 ,1.01, 'NAME', transform=plt.gca().transAxes)
plt.ylabel('Y axis title')
plt.xlabel('X axis title')
plt.legend()
plt.show()

Python Data Fitting

I am getting a horrible fit when I am trying to fit a parabola to this data.
I am initially making a histogram of the data which is the position of an object and then plotting the negative log values of the histogram bin counts to the position using a parabola fit.
the code I am using is this:
time,pos=postime()
plt.plot(time, pos)
poslen=len(pos)
plt.xlabel('Time')
plt.ylabel('Positions')
plt.show()
n,bins,patches = plt.hist(pos,bins=100)
n=n.tolist()
plt.show()
l=len(bins)
s=len(n)
posx=[]
i=0
j=0
pbin=[]
sig=[]
while j < (l-1):
pbin.append((bins[j]+bins[j+1])/2)
j=j+1
while i < s:
if n[i]==0:
pbin[i]=0
else:
sig.append(np.power(1/n[i],2))
n[i]=n[i]/poslen
n[i]=np.log(n[i])
n[i]=n[i]*(-1)
i=i+1
n[:]=[y for y in n if y != 0]
pbin[:]=[y for y in pbin if y != 0]
from scipy.optimize import curve_fit
def parabola(x, a , b):
return a * (np.power(x,2)) + b
popt, pcov = curve_fit(parabola, pbin, n)
print popt
plt.plot(pbin,n)
plt.plot(pbin, parabola(pbin, *popt), 'r-')
I am not sure why you are computing the histogram... But here is a working example which does not require histogram computation.
import numpy as np
from scipy.optimize import curve_fit
from matplotlib import pyplot
time_ = np.arange(-5, 5, 0.1)
pos = time_**2 + np.random.rand(len(time_))*5
def parabola(x, a, b):
return a * (np.power(x, 2)) + b
popt, pcov = curve_fit(parabola, time_, pos)
yfit = parabola(time_, *popt)
pyplot.plot(time_, pos, 'o')
pyplot.plot(time_, yfit)
Also, if your time_ vector is not uniformly sampled, and you want it to be uniformly sampled for the fit, you can do: fittime_ = np.linsapce(np.min(time_), np.max(time_)) and then yfit = parabola(fittime_, *popt).
You can also use matrix inversion.
import numpy as np
import matplotlib.pyplot as plt
x = np.linspace(-5,5,100)
Y = (np.power(x,2) + np.random.normal(0,1,x.shape)).reshape(-1,1)
X = np.c_[np.ones(x.shape), x, np.power(x,2)]
A = np.linalg.inv(X.transpose().dot(X)).dot(X.transpose().dot(Y))
Yp = X.dot(A)
fig = plt.figure()
ax = fig.add_subplot()
plt.plot(x,Y,'o',alpha=0.5)
plt.plot(x,Yp)
plt.show()
The matrix form is
X*A=Y
A=(Xt*X)-1*Xt*Y
You can have a better idea here if needed. It does not always work out and you may want to apply some form of regularization.

Categories