Unable to fit a curve using scipy.optimize curve_fit - python

Hi everyone I am trying to fit a curve using Python scipy.optimize.curve_fit. The end result of the fit is very poor. After getting the parameters, and reconstructing the curve with the new parameters, the end result is a very poorly fitted curve.
I should be expecting a downward sloping curve, instead of an upward sloping one.
Attached the code below
import pandas as pd
import numpy as np
import datetime as dt
from scipy.optimize import curve_fit
import matplotlib.pyplot as plt
variance = {"Nov2022":0.166092943386744, "May2023":0.119276119381951, "Jun2023":0.113809061614305}
variance = pd.Series(variance)
time = list(variance.index)
# dt.datetime.strptime(time[0], "%b%Y")
time = [dt.datetime.strptime(x, "%b%Y") for x in time]
# print(time)
today = dt.datetime.today()
time_years = [(x - today).days/365 for x in time]
def Var(T, sigma, alpha):
t = 1/365
var = sigma*2 /(2*alpha) * (1 - np.exp(-2*alpha * (np.array(T) - t)))
return var
#the fitting is done here with both parameters
# popt, pcov = curve_fit(ModelVar, time_years, list(variance), bounds=([0,0],[np.inf, np.inf]))
popt, pcov = curve_fit(Var, time_years, list(variance))
sigma = popt[0]
alpha = popt[1]
#after fitting, reconstruct with the given alpha and sigma
pd.options.display.float_format = "{:.15f}".format
fitted_model = Var(time_years, sigma, alpha)
df = pd.DataFrame(variance).rename(columns= {0:"HistoricalVar"}, errors = "raise").assign(fitted_var = fitted_model)
plt.plot(df)
plt.show()
print(df)

Related

Scipy Optimize Curve fit not properly fitting with real data

I am trying to fit a decaying exponential function to real world data. I'm having a problem with aligning the function to the actual data.
Here's my code:
def test_func(x, a, b, c):
return a*np.exp(-b*x)*np.sin(c*x)
my_time = np.linspace(0,2.5e-6,25000)
p0 = [60000, 700000, 2841842]
params, params_covariance = curve_fit(test_func, my_time, my_amp,p0)
My signal and fitted function
My question: why doesn't the fitted function start where my data starts increasing in amplitude?
As I said in my comment, the problem is that your function does not take into account that the exponential curve can be shifted. If you include this shift as an additional parameter, the fit will probably converge.
from scipy.optimize import curve_fit
from matplotlib import pyplot as plt
import numpy as np
def test_func(x, a, b, c, d):
return a*np.exp(-b*(x+d))*np.sin(c*(x+d))
my_time = np.linspace(0,2.5e-6,25000)
#generate fake data
testp0 = [66372, 765189, 2841842, -1.23e-7]
test_amp = test_func(my_time, *testp0)
my_amp = test_func(my_time, *testp0)
my_amp[:2222] = my_amp[2222]
p0 = [600, 700000, 2000, -2e-7]
params, params_covariance = curve_fit(test_func, my_time, test_amp, p0)
print(params)
fit_amp = test_func(my_time, *params)
plt.plot(my_time, my_amp, label="data")
plt.plot(my_time, fit_amp, label="fit")
plt.legend()
plt.show()
Sample output

Multiple gaussian auto-fitting in Python

The problem is to fitter on all my wavelength peaks a Gaussian in order to make a medium adjustment as accurate as possible
My question is how to make the Gaussian adjustment on all my peaks automatically without having to manually specify the coordinates of the peaks
For that, I realized the Gaussian adjustment of the brightest peaks, but I would like to generalize it to the following peaks. Subsequently, the Gaussian adjustment will allow me to obtain a polynomial adjustment fine enough to stagger pixels in wavelength
import numpy as np
from astropy.io import fits
import matplotlib.pyplot as plt
from scipy import interpolate
from tqdm import tqdm
from scipy import ndimage
import peakutils
from scipy.optimize import curve_fit
def gauss(x, x0, amp, wid):
return amp * np.exp( -((x - x0)/wid)**2)
def multi_gauss(x, *params):
y = np.zeros_like(x)
for i in range(0, len(params), 3):
x0, amp, wid = params[i:i+3]
y = y + gauss(x, x0, amp, wid)
return y
neon = fits.getdata(data_directory + wave_filename + '.fits')
neon_sp = np.mean(neon, axis= 0)
n_pix = len(neon_sp)
peaks_index = peakutils.peak.indexes(neon_sp, thres=0.05, min_dist=2)
### peals around the brightest peak
bright_index = peaks_index[np.argmax(neon_sp[peaks_index])]
delta_pix = 20
ind_min = bright_index - delta_pix
ind_max = bright_index + delta_pix
peak_select = peaks_index[np.where((peaks_index > ind_min) & (peaks_index < ind_max))]
peak_select_sort = peak_select[np.argsort(-neon_sp[peak_select])]
if peak_select_sort[1] > peak_select_sort[0] :
ind_max = bright_index + 40
else :
ind_min = bright_index - 40
peak_select = peaks_index[np.where((peaks_index > ind_min) & (peaks_index < ind_max))]
peak_select_sort = peak_select[np.argsort(-neon_sp[peak_select])]
plt.figure(num=0)
plt.clf()
plt.plot(neon_sp)
plt.plot(peaks_index,neon_sp[peaks_index], 'r+')
plt.plot(peak_select,neon_sp[peak_select], 'ro')
### Gaussian fit
x = np.arange(n_pix)
xx = np.arange(0, n_pix, .1)
n_peak = 4
bright_index_fit = np.zeros(n_peak)
for i in range(n_peak):
p = peak_select_sort[i]
guess = [p, neon_sp[p], .5]
popt, pcov = curve_fit(gauss, x, neon_sp, p0=guess)
fit = gauss(xx, *popt)
bright_index_fit[i] = popt[0]
plt.plot(xx,fit, '--')
bright_wave = [703.2, 724.5, 693.0, 743.9]

Fitting peaks with Scipy curve_fit, error optimal parameters not found

I recently started with Python because I have an enormous amount of data where I want to automatically fit a Gaussian to the peaks in spectra. Below is an example of three peaks that I want to fit with three individual peaks.
I have found a question where someone is looking for something very similar, How can I fit multiple Gaussian curved to mass spectrometry data in Python?, and adopted it to my script.
I have added my code at the bottom and when I run the last section I get the error "RuntimeError: Optimal parameters not found: Number of calls to function has reached maxfev = 800." What am I missing?
The data can be downloaded at https://www.dropbox.com/s/zowawljcjco70yh/data_so.h5?dl=0
#%%
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import sparse
from scipy.sparse.linalg import spsolve
from scipy.optimize import curve_fit
#%% Read data
path = 'D:/Python/data_so.h5'
f = pd.read_hdf(path, mode = 'r')
t = f.loc[:, 'Time stamp']
d = f.drop(['Time stamp', 'Name spectrum'], axis = 1)
#%% Extract desired wavenumber range
wn_st=2000
wn_ed=2500
ix_st=np.argmin(abs(d.columns.values-wn_st))
ix_ed=np.argmin(abs(d.columns.values-wn_ed))
d = d.iloc[:, ix_st:ix_ed+1]
#%% AsLS baseline correction
spectrum = 230
y = d.iloc[spectrum]
niter = 10
lam = 200000
p = 0.005
L = len(y)
D = sparse.diags([1,-2,1],[0,-1,-2], shape=(L,L-2))
w = np.ones(L)
for i in range(niter):
W = sparse.spdiags(w, 0, L, L)
Z = W + lam * D.dot(D.transpose())
z = spsolve(Z, w*y)
w = p * (y > z) + (1-p) * (y < z)
corr = d.iloc[spectrum,:] - z
#%% Plot spectrum, baseline and corrected spectrum
plt.clf()
plt.plot(d.columns, d.iloc[spectrum,:])
plt.plot(d.columns, z)
plt.plot(d.columns, corr)
plt.gca().invert_xaxis()
plt.show()
#%%
x = d.columns.values
def gauss(x, a, mu, sig):
return a*np.exp(-(x.astype(float)-mu)**2/(2*sig**2))
fitx = x[(x>2232)*(x<2252)]
fity = y[(x>2232)*(x<2252)]
mu=np.sum(fitx*fity)/np.sum(fity)
sig=np.sqrt(np.sum(fity*(fitx-mu)**2)/np.sum(fity))
popt, pcov = curve_fit(gauss, fitx, fity, p0=[max(fity),mu, sig])
plt.plot(x, gauss(x, popt[0],popt[1],popt[2]), 'r-', label='fit')

Non linear regression using curve_fit

I was trying to fit my data to the function that is written below, but when using curve_fit the results don't match the data at all.
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
nu=[0.00,0.03,0.01,-0.02,0.00,-0.06]
data=np.loadtxt('impedancia.txt')
use=np.transpose(data)
Z=use[0]
omega=use[1]
def func(x,a,b,c):
return a/(x**2)+b+c*x**2
popt,poc=curve_fit(func,omega,Z)
plt.plot(omega,Z,'bo',markersize=3.5)
plt.plot(omega,func(omega,*popt))`
I was wondering if anyone could help me with this.
Here is my code and plotted result, with the scipy.optimize.differential_evolution module used to estimate initial parameters for the non-linear solver. Note that this code uses a variation on the Lorentzian peak equation similar to yours, however lines 20 and 21 allow you to select the equation. The peak equation in your code does not appear to fit the narrow peak of the data as well as the recommended peak equation currently selected.
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
import warnings
from scipy.optimize import differential_evolution
# bounds on parameters are set in generate_Initial_Parameters() below
def func_original(x,a,b,c):
return a/(x**2)+b+c*x**2
# bounds on parameters are set in generate_Initial_Parameters() below
def func_recommended(x,a,b,c):
return a / (b + (x-c)**2)
# select peak function here
#func = func_original
func = func_recommended
# function for genetic algorithm to minimize (sum of squared error)
# bounds on parameters are set in generate_Initial_Parameters() below
def sumOfSquaredError(parameterTuple):
warnings.filterwarnings("ignore") # do not print warnings by genetic algorithm
return np.sum((yData - func(xData, *parameterTuple)) ** 2)
def generate_Initial_Parameters():
# data min and max used for bounds
maxX = max(xData)
minX = min(xData)
maxY = max(yData)
minY = min(yData)
minSearch = min([minX, minY])
maxSearch = max([maxX, maxY])
parameterBounds = []
parameterBounds.append([minSearch, maxSearch]) # parameter bounds for a
parameterBounds.append([minSearch, maxSearch]) # parameter bounds for b
parameterBounds.append([minSearch, maxSearch]) # parameter bounds for c
# "seed" the numpy random number generator for repeatable results
result = differential_evolution(sumOfSquaredError, parameterBounds, seed=3)
return result.x
# load data from text file
data=np.loadtxt('impedancia.txt')
use=np.transpose(data)
yData=use[0]
xData=use[1]
# generate initial parameter values
initialParameters = generate_Initial_Parameters()
# curve fit the data
fittedParameters, niepewnosci = curve_fit(func, xData, yData, initialParameters)
# create values for display of fitted peak function
a, b, c = fittedParameters
y_fit = func(xData, a, b, c)
plt.plot(xData, yData, 'D') # plot the raw data
plt.plot(xData, y_fit) # plot the equation using the fitted parameters
plt.show()
print(fittedParameters)

Integration of a Gaussian function to count the number of particle under area

I need to count the number of particle under the fitted Gaussian curve. The area of the fitted curve can be found by integrating the function within the limit (mean-3*sigma) to (mean+3*sigma). Would you please help me to solve this. Thanks for your kind consideration.
import pylab as py
import numpy as np
from scipy import optimize
from scipy.stats import stats
import matplotlib.pyplot as plt
import pandas as pd
BackPFT='T067.csv'
df_180 = pd.read_csv(BackPFT, error_bad_lines=False, header=1)
x_180=df_180.iloc[:,3]
y_180=df_180.iloc[:,4]
#want to plot the distribution of s calculated by the following equation
s=np.sqrt((((16*x_180**2*38.22**2)/((4*38.22**2-y_180**2)**2))+1))-1
#Shape of this distribution is Gaussian
#I need to fit this distribution by following parameter
mean=0.433
sigma=0.014
draw=s
#Definition of bin number
bi=np.linspace(0.01,8, 1000)
data = py.hist(draw.dropna(), bins = bi)
#Definition of Gaussian function
def f(x, a, b, c):
return (a * py.exp(-(x - mean)**2.0 / (2 *sigma**2)))
x = [0.5 * (data[1][i] + data[1][i+1]) for i in xrange(len(data[1])-1)]
y = data[0]
#Fitting the peak of the distribution
popt, pcov = optimize.curve_fit(f, x, y)
chi2, p = stats.chisquare(popt)
x_fit = py.linspace(x[0], x[-1], 80000)
y_fit = f(x_fit, *popt)
plot(x_fit, y_fit, lw=3, color="r",ls="--")
plt.xlim(0,2)
plt.tick_params(axis='both', which='major', labelsize=20)
plt.show()
The problem is how to integrate the defined function (f) and count the number under the area. Here I attach the file T067.csv. Thanks in advance for your kind consideration.
BackPFT='T061.csv'
df_180 = pd.read_csv(BackPFT, skip_blank_lines=True ,skiprows=1,header=None,skipfooter=None,engine='python')
x_180=df_180.iloc[:,3]
y_180=df_180.iloc[:,4]
b=42.4
E=109.8
LET=24.19
REL=127.32
mean=0.339; m1=0.259
sigma=0.012; s1=0.015
s=np.sqrt((((16*x_180**2*b**2)/((4*b**2-y_180**2)**2))+1))-1
draw=s
bi=np.linspace(0,8, 2000)
binwidth=0.004
#I want to plot the dsitribution of s. This distribution has three gaussian peaks
data = py.hist(draw.dropna(), bins = bi,color='gray',)
#first Gaussian function for the first peak (peaks counted from the right)
def f(x, a, b, c):
return (a * py.exp(-(x - mean)**2.0 / (2 *sigma**2)))
# fitting the function (Gaussian)
x = [0.5 * (data[1][i] + data[1][i+1]) for i in xrange(len(data[1])-1)]
y = data[0]
popt, pcov = optimize.curve_fit(f, x, y)
chi, p = stats.chisquare(popt)
x_fit = py.linspace(x[0], x[-1], 80000)
y_fit = f(x_fit, *popt)
plot(x_fit, y_fit, lw=5, color="r",ls="--")
#integration of first function f
gaussF = lambda x, a: f(x, a, sigma, mean)
bins=((6*sigma)/(binwidth))
delta = ((mean+3*sigma) - (mean-3*sigma))/bins
f1 = lambda x : f(x, popt[0], sigma, mean)
result = quad(f1,mean-3*sigma,mean+3*sigma)
area = result[0] # this give the area after integration of the gaussian
numPar = area / delta # this gives the number of particle under the integrated area
print"\n\tArea under curve = ", area, "\n\tNumber of particel= ", numPar
The file T061.csv here. Thanks Dr. I Putu Susila for his kind co-operation and interest.

Categories