Overfitting with curve.fit

Overfitting with curve.fit - python

can anyone help me struggle with fitting issue from curve.fit. I would like to fit my data to a second order equation. But I obtained a result like a linear equation.
Here is my code:
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
def func(x, a, b, c):
f = a*np.power(x, 2) + b*x + c
return f
xdata_prime=[3.0328562996216282, 3.101784841139168, 3.1707134502066894, 3.2396419917242292, 3.308570533241769, 3.3774990747593088, 3.3774990747593088, 3.4337789932367149, 3.4900589392912855, 3.5463388577686916, 3.6026187762460977, 3.6588987223006684]
ydata_prime=[6.344300000000002, 6.723900000000002, 7.080399999999999, 7.399800000000001, 7.649099999999999, 7.753100000000002, 7.753100000000002, 7.658600000000002, 7.442100000000002, 7.180100000000001, 6.902700000000001, 6.6211]
plt.plot(xdata_prime, ydata_prime, 'b-', label='data')
popt, pcov = curve_fit(func, xdata_prime, ydata_prime)
popt
plt.plot(xdata_prime, func(xdata_prime, *popt), 'r-',label='fit')
plt.xlabel('x')
plt.ylabel('y')
plt.legend()
plt.show()

Your arrays need to be numpy arrays because your function is doing vectorized operations (namely a*np.power(x, 2)). So with this your code will work:
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
def func(x, a, b, c):
f = a*np.power(x, 2) + b*x + c
return f
xdata_prime=np.array([3.0328562996216282, 3.101784841139168, 3.1707134502066894, 3.2396419917242292, 3.308570533241769, 3.3774990747593088, 3.3774990747593088, 3.4337789932367149, 3.4900589392912855, 3.5463388577686916, 3.6026187762460977, 3.6588987223006684])
ydata_prime=np.array([6.344300000000002, 6.723900000000002, 7.080399999999999, 7.399800000000001, 7.649099999999999, 7.753100000000002, 7.753100000000002, 7.658600000000002, 7.442100000000002, 7.180100000000001, 6.902700000000001, 6.6211])
plt.plot(xdata_prime, ydata_prime, 'b-', label='data')
popt, pcov = curve_fit(func, xdata_prime, ydata_prime)
plt.plot(xdata_prime, func(xdata_prime, *popt), 'r-',label='fit')
plt.xlabel('x')
plt.ylabel('y')
plt.legend()
plt.show()

Related

scipy optimization according to the experimental curve

Please tell me how to determine the unknown parameters in the calculated curve, using scipy optimization, having an experimental curve at the input. I need to determine the unknown parameters a, b, c (in the code below) from the calculated curve, so that the standard deviation functional is minimal
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from math import pi
def func(a,b,c):
return -a/(2*np.tan(c*(pi/2)))+np.sqrt(b+(a**2)/(4*np.tan((c)*(pi/2))))
file=experimental curve in .txt file
pd_file=pd.read_csv(file, sep="\s+",header=None,names=['frequence', 'y'],
skiprows=1)
xdata=pd_file['frequence']
ydata=pd_file['y']
popt, pcov = curve_fit(func, xdata, ydata, p0=[0.6,1], maxfev=500000000)
print('popt',popt)

I do not think your functional form is suitable for fitting the data you have. After some experimentation may I suggest a different one:
def func2(x,b, d):
return 0.2/ (1 + b * x + d * np.log(1+x))
file='chi_strich_strich_H0.txt'
pd_file=pd.read_csv(file, sep="\s+",header=None,names=['frequence', 'y'],
skiprows=1)
xdata=pd_file['frequence']
ydata=pd_file['y']
popt, pcov = curve_fit(func2, xdata, ydata, p0=[0,0], maxfev=500000000)
print('popt',popt)
yfit = func2(xdata,popt[0], popt[1])
plt.plot(xdata, ydata, '.', label = 'data')
plt.plot(xdata, yfit, '-', label = 'fit')
plt.legend(loc = 'best')
plt.show()
popt: [ 1.84672386e-05 -7.69652828e-02]
The fit is on this plot:

How do i get the curve_fit to fit the data?

I can't figure out why my curve_fit is not following the data?
import numpy as np
import matplotlib.pyplot as plt
import scipy.io as sio
import math
from scipy.stats import binom, poisson, norm
from scipy.optimize import curve_fit
AD1 = sio.loadmat('ATLAS_DATA1' ,squeeze_me=True,mat_dtype = True)
locals().update({k :AD1[k]for k in ['n','e']})
xData = e
yData = n
yErr = 0
plt.xlabel("e (GeV)")
plt.ylabel("Number of events (n)")
plt.errorbar(xData,yData,yErr,marker = '.',linestyle = '')
plt.show()
def func(e, a, b):
return a * np.exp(-b * e)
xDat = e
yDat = func(xDat, 2, 1)
popt, pcov = curve_fit(func, xDat, yDat)
plt.plot(xDat, func(xDat, *popt))
plt.show()
Below is my data for n at the top and e at the bottom.
Data for n and e
Graph for the data that i want to fit

import numpy as np
import matplotlib.pyplot as plt
import scipy.optimize as optimize
from scipy.optimize import curve_fit
Write your xData and yData as numpy arrays as follows:
I used a sample from it
xData =np.array([383,358,326,366,335,331,308,299,303,325,306,299,270,282,253,265,248,256,220,208,252,215,220,237,204,213,224,212])
yData = np.array([101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,124,128])
plt.xlabel("e (GeV)")
plt.ylabel("Number of events (n)")
plt.scatter(xData,yData)
plt.show()
Heres the original data
Its bettter to use plt.scatter than plt.errorbar
i found this equation better for your curve
def func(x, a, c, d):
return a*np.exp(-c*x)+d
Same thing goes for xDat (write it as np.array)
xDat = xData
yDat = func(xDat, 2, 1)
plt.scatter(xDat,yDat)
popt, pcov = curve_fit(func, xData, yData,p = (10, 1e-6, 100))
plt.plot(xDat, func(xDat, *popt))
plt.show()
Tip: Dont use lower case e as a variable, because most of the time e represents the exponential constant e=2.77
UPDATE :
if you want to use your original function heres the code:
def func(e, a, b):
return a * np.exp(-b * e)
popt, pcov = curve_fit(func, xData, yData,p0 = [10,-0.00001])

Fitted Exponential Curve Error

import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
def func(x, a, b, c):
return a * np.exp(-b * x) + c
x = [333,500,1000,2000,5000,10000]
y = [195.3267, 233.0235, 264.5914,294.8728, 328.3523,345.4688]
popt, pcov = curve_fit(func, x, y)
plt.figure()
plt.plot(x, y, 'ko', label="Original Noised Data")
plt.plot(x, func(x, *popt), 'r-', label="Fitted Curve")
plt.legend()
plt.show()
Error:
C:\Users\Aidan\Anaconda3\lib\site-packages\scipy\optimize\minpack.py:794:
OptimizeWarning: Covariance of the parameters could not be estimated
category=OptimizeWarning)
--------------------------------------------------------------------------- TypeError Traceback (most recent call
last) in ()
14 plt.figure()
15 plt.plot(x, y, 'ko', label="Original Noised Data")
---> 16 plt.plot(x, func(x, *popt), 'r-', label="Fitted Curve")
17 plt.legend()
18 plt.show()
in func(x, a, b, c)
4
5 def func(x, a, b, c):
----> 6 return a * np.exp(-b * x) + c
7
8 x = [333,500,1000,2000,5000,10000]
TypeError: 'numpy.float64' object cannot be interpreted as an integer
For some reason I am not able to get a curve fit based on my data. I am following the exponential example from here: How to do exponential and logarithmic curve fitting in Python? I found only polynomial fitting
But I am using an two arrays rather than made up random data. I am new to python!

There are a few issues with your code.
You use lists instead of numpy.ndarray: the numpy and scipy routines are meant to work with numpy.ndarray and they use them internally. You should use them as well.
You are likely to get overflow issues with your data and your function, e.g. np.exp(-1000) is already approximated to zero in Python3
You are trying to fit a function that it is unlikely to fit your data. It looks more like an exponential recovery than a decay.
The following code tentatively address all these issues:
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
def func(x, a, b, c):
return a * (1 - np.exp(-b * x)) + c
x = np.array([333.0,500.0,1000.0,2000.0,5000.0,10000.0]) / 1000
y = np.array([195.3267, 233.0235, 264.5914,294.8728, 328.3523,345.4688]) / 10
popt, pcov = curve_fit(func, x, y)
print(popt)
plt.figure()
plt.plot(x, y, 'ko', label="Original Noised Data")
plt.plot(x, func(x, *popt), 'r-', label="Fitted Curve")
plt.legend()
plt.show()

gaussian fit..error: OptimizeWarning: Covariance of the parameters could not be estimated

I am trying to fit my data with a gaussian function. I have set the parameters right and as far as I can evaluate, my code is also correct. But I am not getting the correct fit, and there is some error about the covariance of the parameters. Could anyone pls invest their time to review this code and tell me what I am missing?
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
old_settings = np.seterr(all='ignore')
from scipy import interp
from scipy import genfromtxt
import scipy
from math import pi, sqrt
old_settings = np.seterr(all='ignore')
data= genfromtxt('steel.txt')
x= data[:,0]
y= data[:,3]
def gaus(x, amp, mu, s):
return amp *np.exp(-(x-mu)**2/(2*s**2))
amp,mu,s= 400, 0, -0.1
popt, pcov = curve_fit(gaus,x,y,p0=(400, 0, -0.1))
print(popt)
p1 = popt[0]
p2 = popt[1]
p3 = popt[2]
residuals = y - gaus(x, amp, mu, s)
fres = sum( (residuals**2)/gaus(x, amp, mu, s) )
print(fres)
curvey = gaus(x, p1,p2,p3)
plt.plot(x,y, 'b.', label='experimental data')
plt.plot(x, curvey, 'r.', label='gaussian fit')
plt.legend(loc='best')
plt.ylim(2700,4000)
plt.xlabel('velocity')
plt.ylabel('counts per seconds')
plt.legend()
plt.show()
My data is here:
https://www.dropbox.com/s/7wn34goicl8wu0l/steel.txt?dl=0

Your fit function has a range from 0 to amp. This is not the range your data set has. Solution: Add an offset to your function:
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from scipy import genfromtxt
data= genfromtxt('steel.txt')
x= data[:,0]
y= data[:,3]
#added offset parameter
def gaus(x, amp, mu, s, offset):
return amp *np.exp(-(x-mu)**2/(2*s**2)) + offset
popt, pcov = curve_fit(gaus,x,y,p0=(-4, 0, -0.1, 100))
print(popt)
#better resolution for fit curve representation
curvex = np.linspace(np.min(x), np.max(x), 1000)
curvey = gaus(curvex, *popt)
plt.plot(x,y, 'b.', label='experimental data')
plt.plot(curvex, curvey, 'r', label='gaussian fit')
plt.legend(loc='best')
plt.ylim(2700,4000)
plt.xlabel('velocity')
plt.ylabel('counts per seconds')
plt.legend()
plt.show()
Output

Non-linear curve-fitting program in python

I would like to find and plot a function f that represents a curve fitted on some number of set points that I already know, x and y.
After some research I started experimenting with scipy.optimize and curve_fit but on the reference guide I found that the program uses a function to fit the data instead and it assumes ydata = f(xdata, *params) + eps.
So my question is this: What do I have to change in my code to use the curve_fit or any other library to find the function of the curve using my set points? (note: I want to know the function as well so I can integrate later for my project and plot it). I know that its going to be a decaying exponencial function but don't know the exact parameters. This is what I tried in my program:
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
def func(x, a, b, c):
return a * np.exp(-b * x) + c
xdata = np.array([0.2, 0.5, 0.8, 1])
ydata = np.array([6, 1, 0.5, 0.2])
plt.plot(xdata, ydata, 'b-', label='data')
popt, pcov = curve_fit(func, xdata, ydata)
plt.plot(xdata, func(xdata, *popt), 'r-', label='fit')
plt.xlabel('x')
plt.ylabel('y')
plt.legend()
plt.show()
Am currently developing this project on a Raspberry Pi, if it changes anything. And would like to use least squares method since is great and precise, but any other method that works well is welcome.
Again, this is based on the reference guide of scipy library. Also, I get the following graph, which is not even a curve: Graph and curve based on set points

import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
def func(x, a, b, c):
return a * np.exp(-b * x) + c
#c is a constant so taking the derivative makes it go to zero
def deriv(x, a, b, c):
return -a * b * np.exp(-b * x)
#Integrating gives you another c coefficient (offset) let's call it c1 and set it equal to zero by default
def integ(x, a, b, c, c1 = 0):
return -a/b * np.exp(-b * x) + c*x + c1
#There are only 4 (x,y) points here
xdata = np.array([0.2, 0.5, 0.8, 1])
ydata = np.array([6, 1, 0.5, 0.2])
#curve_fit already uses "non-linear least squares to fit a function, f, to data"
popt, pcov = curve_fit(func, xdata, ydata)
a,b,c = popt #these are the optimal parameters for fitting your 4 data points
#Now get more x values to plot the curve along so it looks like a curve
step = 0.01
fit_xs = np.arange(min(xdata),max(xdata),step)
#Plot the results
plt.plot(xdata, ydata, 'bx', label='data')
plt.plot(fit_xs, func(fit_xs,a,b,c), 'r-', label='fit')
plt.plot(fit_xs, deriv(fit_xs,a,b,c), 'g-', label='deriv')
plt.plot(fit_xs, integ(fit_xs,a,b,c), 'm-', label='integ')
plt.xlabel('x')
plt.ylabel('y')
plt.legend()
plt.show()

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Overfitting with curve.fit - python

Related

scipy optimization according to the experimental curve

How do i get the curve_fit to fit the data?

Fitted Exponential Curve Error

gaussian fit..error: OptimizeWarning: Covariance of the parameters could not be estimated

Non-linear curve-fitting program in python

Categories

Resources