How do I solve the error in plot variable dimension mismatch? - python

I am trying to fit a data generated using formula-1 by formula-2. The former has 3 parameters, whereas the later has 5 fitting parameters. But now I get error in plotting the fitted curve due to shape mismatch.
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
def func(x, a, b, c, d, e):
return (((a/e) * (2*x)**b) + (d * (2*x)**c))
y = []
x = []
A = 6.7
B = 2.0
C = 0.115
for N in np.logspace(1, 9., 100, base = 10.):
x.append(int(N))
y.append(np.exp((A-np.log(int(N)))/B)+C)
plt.loglog(x, y, 'b:*', label='data')
popt, pcov = curve_fit(func, x, y)
print(popt)
plt.loglog(x, func(x, *popt))
I would like to see the fitted curve, but there s a dimension error in the last line '''plt.loglog(x, func(x, *popt))'''

One way to do this is to create a list y_model in which you add the element y corresponding to each x.
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
def func(x, a, b, c, d, e):
return (((a/e) * (2*x)**b) + (d * (2*x)**c))
y = []
x = []
A = 6.7
B = 2.0
C = 0.115
for N in np.logspace(1, 9., 100, base = 10.):
x.append(int(N))
y.append(np.exp((A-np.log(int(N)))/B)+C)
popt, pcov = curve_fit(func, x, y)
y_model = []
for e in x:
y_model.append(func(e, *popt))
plt.loglog(x, y, 'b:*', label='data')
plt.loglog(x, y_model)
Result:

Related

How to find the best values for a and b using optimization.curve_fit. in python. I keep getting an error for my func

I am trying to find the best values for my model y = aX +b by using optimization.curve_fit but keep getting the following error:
func() missing 1 required positional argument: 'b'
My code is as follows:
def func(x, y, a, b):
return y == a*x + b
xdata = np.array(time1_list)
ydata = np.array(phi1_list)
# Initial guess.
x0 = np.array([0.0, 0.0])
print(optimization.curve_fit(func, xdata, ydata, x0))
I have tried using this optimization method but cannot seem to get past this error. The data consists of 2 lists, xdata and ydata. My model is y = ax+b.
Eventually I will be using a least squares fit to find the optimum values of a and b.
Need to change the definition of your function like this:
def func(x, a, b):
y = a*x + b
return y
Here is full exemple:
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
def func(x, a, b):
y = a*x + b
return y
xdata = np.linspace(1, 10, 100)
y = func(xdata, 2, 5)
rng = np.random.default_rng()
y_noise = rng.normal(size=xdata.size) *0.2
ydata = y + y_noise
# Initial guess.
x0 = np.array([0.0, 0.0])
# print(optimization.curve_fit(func, xdata, ydata, x0))
popt, pcov = curve_fit(func, xdata, ydata, x0)
plt.figure()
plt.plot(xdata, ydata, 'b-', label='data')
plt.plot(xdata, func(xdata, *popt), 'r-',label='fit: a=%5.3f, b=%5.3f' % tuple(popt))
plt.xlabel('x')
plt.ylabel('y')
plt.legend()
plt.show()
You will get the figure :

How to plot lower boundary with a scatter plot with curve_fit and a linear line function?

I use the following code to plot a scatter plot. I have been trying to plot the lower boundary for it. I tried following the other question but I was unable to replicate it for my objective function and data. The code is as follows :
from numpy import arange
import pandas as pd
from pandas import read_csv
from scipy.optimize import curve_fit
from matplotlib import pyplot
def objective(x, a, b):
return a * x + b
events = pd.read_excel('values.xlsx')
x = events.loc[:, 'Value']
y = events.loc[:, 'Frame']
popt, _ = curve_fit(objective, x, y)
a, b = popt
pyplot.scatter(x, y)
x_line = arange(min(x), max(x), 1)
y_line = objective(x_line, a, b)
pyplot.plot(x_line, y_line, '--', color='purple')
xmin, xmax = pyplot.xlim() # the limits of the x-axis for drawing the line
ymin, ymax = pyplot.ylim()
pos_min = np.argmin(x)
pos_max = np.argmax(x)
alpha_min = x[pos_min]
alpha_max = x[pos_max]
pyplot.show()
I want to plot the lower boundary of the points like .
As you have a linear function, your upper and lower bound will have the same slope a but different b-values. So, we calculate them for all points and choose the lowest and highest:
import numpy as np
from scipy.optimize import curve_fit
from matplotlib import pyplot
def objective(x, a, b):
return a * x + b
#sample data
rng = np.random.default_rng(123)
x = np.linspace(2, 10, 150)
y = objective(x, -2, 3)
y += 5 * rng.random(len(x))
popt, _ = curve_fit(objective, x, y)
a, b = popt
pyplot.scatter(x, y, label="raw data")
x_line = np.asarray([np.min(x), np.max(x)])
y_line = objective(x_line, a, b)
pyplot.plot(x_line, y_line, '--', color='purple', label=f"y={a:.2f}x+{b:.2f}")
b_values = y - a * x
pyplot.plot(x_line, objective(x_line, a, np.min(b_values)), '--', color='red', label="lower bound")
pyplot.plot(x_line, objective(x_line, a, np.max(b_values)), '--', color='orange', label="upper bound")
pyplot.legend()
pyplot.show()
Sample output:

How do i get the curve_fit to fit the data?

I can't figure out why my curve_fit is not following the data?
import numpy as np
import matplotlib.pyplot as plt
import scipy.io as sio
import math
from scipy.stats import binom, poisson, norm
from scipy.optimize import curve_fit
AD1 = sio.loadmat('ATLAS_DATA1' ,squeeze_me=True,mat_dtype = True)
locals().update({k :AD1[k]for k in ['n','e']})
xData = e
yData = n
yErr = 0
plt.xlabel("e (GeV)")
plt.ylabel("Number of events (n)")
plt.errorbar(xData,yData,yErr,marker = '.',linestyle = '')
plt.show()
def func(e, a, b):
return a * np.exp(-b * e)
xDat = e
yDat = func(xDat, 2, 1)
popt, pcov = curve_fit(func, xDat, yDat)
plt.plot(xDat, func(xDat, *popt))
plt.show()
Below is my data for n at the top and e at the bottom.
Data for n and e
Graph for the data that i want to fit
import numpy as np
import matplotlib.pyplot as plt
import scipy.optimize as optimize
from scipy.optimize import curve_fit
Write your xData and yData as numpy arrays as follows:
I used a sample from it
xData =np.array([383,358,326,366,335,331,308,299,303,325,306,299,270,282,253,265,248,256,220,208,252,215,220,237,204,213,224,212])
yData = np.array([101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,124,128])
plt.xlabel("e (GeV)")
plt.ylabel("Number of events (n)")
plt.scatter(xData,yData)
plt.show()
Heres the original data
Its bettter to use plt.scatter than plt.errorbar
i found this equation better for your curve
def func(x, a, c, d):
return a*np.exp(-c*x)+d
Same thing goes for xDat (write it as np.array)
xDat = xData
yDat = func(xDat, 2, 1)
plt.scatter(xDat,yDat)
popt, pcov = curve_fit(func, xData, yData,p = (10, 1e-6, 100))
plt.plot(xDat, func(xDat, *popt))
plt.show()
Tip: Dont use lower case e as a variable, because most of the time e represents the exponential constant e=2.77
UPDATE :
if you want to use your original function heres the code:
def func(e, a, b):
return a * np.exp(-b * e)
popt, pcov = curve_fit(func, xData, yData,p0 = [10,-0.00001])

How to fix location=0 at python curve fit scipy.optimize

Here are the sample codes
x=[1.5,4,10,50,90]
y=[6/100,2.6/100,1.4/100,0.4/100,0.2/100]
def f(x, a, loc,scale):
loc=0
return gamma.pdf(x, a, loc, scale)
optimize.curve_fit(f, x, y)
The results are giving me a loc=1. Is there any way that I can make loc=0?
I noticed that, when x does not have integer elements, the loc can not be fixed at 0, otherwise the curve fit does not work. May I know the algorithm behind that?
As an example to show why mine code did not work in some cases,
from scipy import optimize
from scipy.stats import gamma
def f(x, a, loc,scale):
loc=0
return gamma.pdf(x, a, loc, scale)
init_guess=[0.1,0,0.1]
fig= plt.subplots(figsize=(5,3))
fit_2worst = optimize.curve_fit(f, x, y,p0=init_guess)
x2 = np.linspace (0, 100, 200)
y2 = gamma.pdf(x2, a=fit_2worst[0][0], loc=fit_2worst[0][1],scale=fit_2worst[0][2])
plt.title('Gamma with k='+str("{:.2}".format(fit_2worst[0][0]))+'\nTheta='+str(int(fit_2worst[0][2])))
plt.plot(x2, y2, "y-")
print ('k:',fit_2worst[0][0],'location:',fit_2worst[0][1],'theta:',fit_2worst[0][2])
plt.show()
Returns are
k: 36.171512499294444 location: 0.0 theta: 3.725335489050758
The shown picture is
With the code proposed by #Joe, I am able to get the correct one
def f(x, a, scale):
#loc=0
return gamma.pdf(x, a, scale=scale, loc=0)
fig= plt.subplots(figsize=(5,3))
opt = optimize.curve_fit(f, x, y)
x2 = np.linspace (0, 100, 200)
y2 = gamma.pdf(x2, a=opt[0][0],scale=opt[0][1])
plt.title('Gamma with k='+str("{:.2}".format(opt[0][0]))+'\nTheta='+str(int(opt[0][1])))
plt.plot(x2, y2, "y-")
print ('k:',opt[0][0],'location:',0,'theta:',opt[0][1])
plt.show()
With a return
k: 0.23311781831847955 location: 0 theta: 132.0300661365553
I am not sure why the previous codes do not work for floats but integers?
This is just least squares.
You can make loc = 0 by not making it variable and so not free for the optimizer to use. Try
def f(x, a, scale):
#loc=0
return gamma.pdf(x, a, scale=scale, loc=0)
optimize.curve_fit(f, x, y)
With image:
import matplotlib.pyplot as plt
from scipy import optimize
from scipy.stats import gamma
import numpy as np
x=[1.5,4,10,50,90]
y=[6/100,2.6/100,1.4/100,0.4/100,0.2/100]
def f(x, a, scale):
#loc=0
return gamma.pdf(x, a, scale=scale, loc=0)
opt = optimize.curve_fit(f, x, y)
print(opt)
x_0 = np.arange(0.0, 90)
y_0 = f(x_0, *(opt[0]))
plt.plot(x,y)
plt.plot(x_0,y_0, 'r.')
plt.show()

curve_fit() using python

def model(A, x, mu, sigma):
return A*exp(-((x-mu)**2)/(2*sigma**2))
from scipy.optimize import curve_fit
mu=np.mean(d_spacing_2)
sigma=np.std(d_spacing_2)
f=intensity_2
x=d_spacing_2
popt, pcov = curve_fit(model, A, x, mu, sigma)
TypeError: model() missing 2 required positional arguments: 'mu' and 'sigma'
You are using curve_fit totally wrong. Here is working example from the help of curve_fit and some additional plotting:
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
def func(x, a, b, c):
return a * np.exp(-b * x) + c
xdata = np.linspace(0, 4, 50)
y = func(xdata, 2.5, 1.3, 0.5)
ydata = y + 0.2 * np.random.normal(size=len(xdata))
popt, pcov = curve_fit(func, xdata, ydata,p0=[2,1,1])
plt.ion()
plt.plot(xdata,ydata,'o')
xplot = np.linspace(0,4,100)
plt.plot(xplot,func(xplot,*popt))
The first input argument of curve_fit is the function the second the x values of the data and the third the y values. You should normally also use the optional input argument p0, which is an initial guess for the solution.

Categories