How to fix location=0 at python curve fit scipy.optimize - python

Here are the sample codes
x=[1.5,4,10,50,90]
y=[6/100,2.6/100,1.4/100,0.4/100,0.2/100]
def f(x, a, loc,scale):
loc=0
return gamma.pdf(x, a, loc, scale)
optimize.curve_fit(f, x, y)
The results are giving me a loc=1. Is there any way that I can make loc=0?
I noticed that, when x does not have integer elements, the loc can not be fixed at 0, otherwise the curve fit does not work. May I know the algorithm behind that?
As an example to show why mine code did not work in some cases,
from scipy import optimize
from scipy.stats import gamma
def f(x, a, loc,scale):
loc=0
return gamma.pdf(x, a, loc, scale)
init_guess=[0.1,0,0.1]
fig= plt.subplots(figsize=(5,3))
fit_2worst = optimize.curve_fit(f, x, y,p0=init_guess)
x2 = np.linspace (0, 100, 200)
y2 = gamma.pdf(x2, a=fit_2worst[0][0], loc=fit_2worst[0][1],scale=fit_2worst[0][2])
plt.title('Gamma with k='+str("{:.2}".format(fit_2worst[0][0]))+'\nTheta='+str(int(fit_2worst[0][2])))
plt.plot(x2, y2, "y-")
print ('k:',fit_2worst[0][0],'location:',fit_2worst[0][1],'theta:',fit_2worst[0][2])
plt.show()
Returns are
k: 36.171512499294444 location: 0.0 theta: 3.725335489050758
The shown picture is
With the code proposed by #Joe, I am able to get the correct one
def f(x, a, scale):
#loc=0
return gamma.pdf(x, a, scale=scale, loc=0)
fig= plt.subplots(figsize=(5,3))
opt = optimize.curve_fit(f, x, y)
x2 = np.linspace (0, 100, 200)
y2 = gamma.pdf(x2, a=opt[0][0],scale=opt[0][1])
plt.title('Gamma with k='+str("{:.2}".format(opt[0][0]))+'\nTheta='+str(int(opt[0][1])))
plt.plot(x2, y2, "y-")
print ('k:',opt[0][0],'location:',0,'theta:',opt[0][1])
plt.show()
With a return
k: 0.23311781831847955 location: 0 theta: 132.0300661365553
I am not sure why the previous codes do not work for floats but integers?

This is just least squares.
You can make loc = 0 by not making it variable and so not free for the optimizer to use. Try
def f(x, a, scale):
#loc=0
return gamma.pdf(x, a, scale=scale, loc=0)
optimize.curve_fit(f, x, y)
With image:
import matplotlib.pyplot as plt
from scipy import optimize
from scipy.stats import gamma
import numpy as np
x=[1.5,4,10,50,90]
y=[6/100,2.6/100,1.4/100,0.4/100,0.2/100]
def f(x, a, scale):
#loc=0
return gamma.pdf(x, a, scale=scale, loc=0)
opt = optimize.curve_fit(f, x, y)
print(opt)
x_0 = np.arange(0.0, 90)
y_0 = f(x_0, *(opt[0]))
plt.plot(x,y)
plt.plot(x_0,y_0, 'r.')
plt.show()

Related

How to plot lower boundary with a scatter plot with curve_fit and a linear line function?

I use the following code to plot a scatter plot. I have been trying to plot the lower boundary for it. I tried following the other question but I was unable to replicate it for my objective function and data. The code is as follows :
from numpy import arange
import pandas as pd
from pandas import read_csv
from scipy.optimize import curve_fit
from matplotlib import pyplot
def objective(x, a, b):
return a * x + b
events = pd.read_excel('values.xlsx')
x = events.loc[:, 'Value']
y = events.loc[:, 'Frame']
popt, _ = curve_fit(objective, x, y)
a, b = popt
pyplot.scatter(x, y)
x_line = arange(min(x), max(x), 1)
y_line = objective(x_line, a, b)
pyplot.plot(x_line, y_line, '--', color='purple')
xmin, xmax = pyplot.xlim() # the limits of the x-axis for drawing the line
ymin, ymax = pyplot.ylim()
pos_min = np.argmin(x)
pos_max = np.argmax(x)
alpha_min = x[pos_min]
alpha_max = x[pos_max]
pyplot.show()
I want to plot the lower boundary of the points like .
As you have a linear function, your upper and lower bound will have the same slope a but different b-values. So, we calculate them for all points and choose the lowest and highest:
import numpy as np
from scipy.optimize import curve_fit
from matplotlib import pyplot
def objective(x, a, b):
return a * x + b
#sample data
rng = np.random.default_rng(123)
x = np.linspace(2, 10, 150)
y = objective(x, -2, 3)
y += 5 * rng.random(len(x))
popt, _ = curve_fit(objective, x, y)
a, b = popt
pyplot.scatter(x, y, label="raw data")
x_line = np.asarray([np.min(x), np.max(x)])
y_line = objective(x_line, a, b)
pyplot.plot(x_line, y_line, '--', color='purple', label=f"y={a:.2f}x+{b:.2f}")
b_values = y - a * x
pyplot.plot(x_line, objective(x_line, a, np.min(b_values)), '--', color='red', label="lower bound")
pyplot.plot(x_line, objective(x_line, a, np.max(b_values)), '--', color='orange', label="upper bound")
pyplot.legend()
pyplot.show()
Sample output:

How do I solve the error in plot variable dimension mismatch?

I am trying to fit a data generated using formula-1 by formula-2. The former has 3 parameters, whereas the later has 5 fitting parameters. But now I get error in plotting the fitted curve due to shape mismatch.
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
def func(x, a, b, c, d, e):
return (((a/e) * (2*x)**b) + (d * (2*x)**c))
y = []
x = []
A = 6.7
B = 2.0
C = 0.115
for N in np.logspace(1, 9., 100, base = 10.):
x.append(int(N))
y.append(np.exp((A-np.log(int(N)))/B)+C)
plt.loglog(x, y, 'b:*', label='data')
popt, pcov = curve_fit(func, x, y)
print(popt)
plt.loglog(x, func(x, *popt))
I would like to see the fitted curve, but there s a dimension error in the last line '''plt.loglog(x, func(x, *popt))'''
One way to do this is to create a list y_model in which you add the element y corresponding to each x.
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
def func(x, a, b, c, d, e):
return (((a/e) * (2*x)**b) + (d * (2*x)**c))
y = []
x = []
A = 6.7
B = 2.0
C = 0.115
for N in np.logspace(1, 9., 100, base = 10.):
x.append(int(N))
y.append(np.exp((A-np.log(int(N)))/B)+C)
popt, pcov = curve_fit(func, x, y)
y_model = []
for e in x:
y_model.append(func(e, *popt))
plt.loglog(x, y, 'b:*', label='data')
plt.loglog(x, y_model)
Result:

mulitple plot at the same position on mayavi

I'm trying to plot this "F" vector function in R3. Together with the Sphere centered at origin of radius Pi/2.
I do not understand why my vectors and my sphere are not at the same position.
I do not understand mlab.axes "axes visibility", I just would like to display the usual x,y,z axes.
Sorry if it is obvious to some, but I went through the standard documentation, few examples and google searches for a few hours, and I'm still clueless.
My current Mayavi Result and Code:
import numpy as np
from mayavi import mlab
# Functions
def h(t):
return np.exp(-1/t) * (1/np.cos(t))
def F(x, y, z):
norm = np.linalg.norm([x, y, z])
h_norm = (h(norm)/norm)
return [x*h_norm, y*h_norm, z*h_norm]
# Vectors
x, y, z = np.meshgrid(np.arange(-2, 2, 0.5),
np.arange(-2, 2, 0.5),
np.arange(-2, 2, 0.5))
u, v, w = F(x, y, z)
src = mlab.pipeline.vector_field(u, v, w)
mlab.pipeline.vectors(src, mask_points=20, scale_factor=.5)
# Ball
dphi, dtheta = np.pi/250.0, np.pi/250.0
[phi, theta] = np.mgrid[0:2*np.pi:dphi,
0:np.pi:dtheta]
r = np.pi / 2
x2 = r*np.sin(theta)*np.cos(phi)
y2 = r*np.sin(theta)*np.sin(phi)
z2 = r*np.cos(theta)
s = mlab.mesh(x2, y2, z2)
#mlab.axes(x_axis_visibility=True, y_axis_visibility=True)
# View it all.
mlab.outline()
mlab.show()

SciPy curve_fit with np.log returns immediately with popt = p0, pcov = inf

I'm trying to optimize a logarithmic fit to a data set with scipy.optimize.curve_fit. Before trying it on an actual data set, I wrote code to run on a dummy data set.
def do_fitting():
x = np.linspace(0, 4, 100)
y = func(x, 1.1, .4, 5)
y2 = y + 0.2 * np.random.normal(size=len(x))
popt, pcov = curve_fit(func, x, y2, p0=np.array([2, 0.5, 1]))
plt.figure()
plt.plot(x, y, 'bo', label="Clean Data")
plt.plot(x, y2, 'ko', label="Fuzzed Data")
plt.plot(x, func(x, *popt), 'r-', label="Fitted Curve")
plt.legend()
plt.show()
Of course, do_fitting() relies on func(), which it passes to curve_fit. Here's the problem. When I pass a func() that contains np.log, i.e. the function that I actually want to fit to, curve_fit declares that p0 (the initial condition) is the optimal solution and returns immediately with an infinite covariance.
Here's what happens if I run do_fitting() with a non-logarithmic func():
def func(x, a, b, c):
return a * np.exp(x*b) + c
popt = [ 0.90894173 0.44279212 5.19928151]
pcov = [[ 0.02044817 -0.00471525 -0.02601574]
[-0.00471525 0.00109879 0.00592502]
[-0.02601574 0.00592502 0.0339901 ]]
Here's what happens when I run do_fitting() with a logarithmic func():
def func(x, a, b, c):
return a * np.log(x*b) + c
popt = [ 2. 0.5 1. ]
pcov = inf
You'll notice that the logarithmic solution for popt is equal to the value I gave curve_fit for p0 in the above do_fitting(). This is true, and pcov is infinite, for every value of p0 I have tried.
What am I doing wrong here?
The problem is very simple - since the first value in your x array is 0, you are taking the log of 0, which is equal to -inf:
x = np.linspace(0, 4, 100)
p0 = np.array([2, 0.5, 1])
print(func(x, *p0).min())
# -inf
I was able to fit a logarithmic function just fine using the following code (hardly modified from your original):
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
def func(x, a, b, c):
return a * np.log(x+b) + c
def do_fitting():
x = np.linspace(0, 4, 100)
y = func(x, 1.1, .4, 5)
y2 = y + 0.2 * np.random.normal(size=len(x))
popt, pcov = curve_fit(func, x, y2, p0=np.array([2, 0.5, 1]))
plt.figure()
plt.plot(x, y, 'bo', label="Clean Data")
plt.plot(x, y2, 'ko', label="Fuzzed Data")
plt.plot(x, func(x, *popt), 'r-', label="Fitted Curve")
plt.legend()
plt.show()
do_fitting()
(Unfortunately I can't post a picture of the final fit, but it agrees quite nicely with the clean data).
Likely your problem is not the logarithm itself, but some difficulty curve_fit is having with the specific function you're trying to fit. Can you edit your question to provide an example of the exact logarithmic function you're trying to fit?
EDIT: The function you provided is not well-defined for x=0, and produces a RuntimeWarning upon execution. curve_fit is not good at handling NaNs, and will not be able to fit the function in this case. If you change x to
x = np.linspace(1, 4, 100)
curve_fit performs just fine.

Finding the maximum of a curve scipy

I have fitted curve to a set of data points. I would like to know how to find the maximum point of my curve and then I would like to annotate that point (I don't want to use by largest y value from my data to do this). I cannot exactly write my code but here is the basic layout of my code.
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
x = [1,2,3,4,5]
y = [1,4,16,4,1]
def f(x, p1, p2, p3):
return p3*(p1/((x-p2)**2 + (p1/2)**2))
p0 = (8, 16, 0.1) # guess perameters
plt.plot(x,y,"ro")
popt, pcov = curve_fit(f, x, y, p0)
plt.plot(x, f(x, *popt))
Also is there a way to find the peak width?
Am I missing a simple built in function that could do this? Could I differentiate the function and find the point at which it is zero? If so how?
After you fit to find the best parameters to maximize your function, you can find the peak using minimize_scalar (or one of the other methods from scipy.optimize).
Note that in below, I've shifted x[2]=3.2 so that the peak of the curve doesn't land on a data point and we can be sure we're finding the peak to the curve, not the data.
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit, minimize_scalar
x = [1,2,3.2,4,5]
y = [1,4,16,4,1]
def f(x, p1, p2, p3):
return p3*(p1/((x-p2)**2 + (p1/2)**2))
p0 = (8, 16, 0.1) # guess perameters
plt.plot(x,y,"ro")
popt, pcov = curve_fit(f, x, y, p0)
# find the peak
fm = lambda x: -f(x, *popt)
r = minimize_scalar(fm, bounds=(1, 5))
print "maximum:", r["x"], f(r["x"], *popt) #maximum: 2.99846874275 18.3928199902
x_curve = np.linspace(1, 5, 100)
plt.plot(x_curve, f(x_curve, *popt))
plt.plot(r['x'], f(r['x'], *popt), 'ko')
plt.show()
Of course, rather than optimizing the function, we could just calculate it for a bunch of x-values and get close:
x = np.linspace(1, 5, 10000)
y = f(x, *popt)
imax = np.argmax(y)
print imax, x[imax] # 4996 2.99859985999
If you don't mind using sympy, it's pretty easy. Assuming the code you posted has already been run:
import sympy
sym_x = sympy.symbols('x', real=True)
sym_f = f(sym_x, *popt)
sym_df = sym_f.diff()
solns = sympy.solve(sym_df) # returns [3.0]

Categories