How to get confidence intervals from curve_fit - python

My question involves statistics and python and I am a beginner in both. I am running a simulation, and for each value for the independent variable (X) I produce 1000 values for the dependent variable (Y). What I have done is that I calculated the average of Y for each value of X and fitted these averages using scipy.optimize.curve_fit. The curve fits nicely, but I want to draw also the confidence intervals. I am not sure if what I am doing is correct or if what I want to do can be done, but my question is how can I get the confidence intervals from the covariance matrix produced by curve_fit. The code reads the averages from files first then it just simply uses curve_fit.
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
def readTDvsTx(L, B, P, fileformat):
# L should be '_Fixed_' or '_'
TD = []
infile = open(fileformat.format(L, B, P), 'r')
infile.readline() # To remove header
for line in infile:
l = line.split() # each line contains TxR followed by CD followed by TD
if eval(l[0]) >= 70 and eval(l[0]) <=190:
td = eval(l[2])
TD.append(td)
infile.close()
tdArray = np.array(TD)
return tdArray
def rec(x, a, b):
return a * (1 / (x**2)) + b
fileformat = 'Densities_file{}BS{}_PRNTS{}.txt'
txR = np.array(range(70, 200, 20))
parents = np.array(range(1,6))
disc_p1 = readTDvsTx('_Fixed_', 5, 1, fileformat)
popt, pcov = curve_fit(rec, txR, disc_p1)
plt.plot(txR, rec(txR, popt[0], popt[1]), 'r-')
plt.plot(txR, disc_p1, '.')
print(popt)
plt.show()
And here is the resulting fit:

Here's a quick and wrong answer: you can approximate the errors from the covariance matrix for your a and b parameters as the square root of its diagonals: np.sqrt(np.diagonal(pcov)). The parameter uncertainties can then be used to draw the confidence intervals.
The answer is wrong because you before you fit your data to a model, you'll need an estimate of the errors on your averaged disc_p1 points. When averaging, you have lost the information about the scatter of the population, leading curve_fit to believe that the y-points you feed it are absolute and undisputable. This might cause an underestimation of your parameter errors.
For an estimate of the uncertainties of your averaged Y values, you need to estimate their dispersion measure and pass it along to curve_fit while saying that your errors are absolute. Below is an example of how to do this for a random dataset where each of your points consists of a 1000 samples drawn from a normal distribution.
from scipy.optimize import curve_fit
import matplotlib.pylab as plt
import numpy as np
# model function
func = lambda x, a, b: a * (1 / (x**2)) + b
# approximating OP points
n_ypoints = 7
x_data = np.linspace(70, 190, n_ypoints)
# approximating the original scatter in Y-data
n_nested_points = 1000
point_errors = 50
y_data = [func(x, 4e6, -100) + np.random.normal(x, point_errors,
n_nested_points) for x in x_data]
# averages and dispersion of data
y_means = np.array(y_data).mean(axis = 1)
y_spread = np.array(y_data).std(axis = 1)
best_fit_ab, covar = curve_fit(func, x_data, y_means,
sigma = y_spread,
absolute_sigma = True)
sigma_ab = np.sqrt(np.diagonal(covar))
from uncertainties import ufloat
a = ufloat(best_fit_ab[0], sigma_ab[0])
b = ufloat(best_fit_ab[1], sigma_ab[1])
text_res = "Best fit parameters:\na = {}\nb = {}".format(a, b)
print(text_res)
# plotting the unaveraged data
flier_kwargs = dict(marker = 'o', markerfacecolor = 'silver',
markersize = 3, alpha=0.7)
line_kwargs = dict(color = 'k', linewidth = 1)
bp = plt.boxplot(y_data, positions = x_data,
capprops = line_kwargs,
boxprops = line_kwargs,
whiskerprops = line_kwargs,
medianprops = line_kwargs,
flierprops = flier_kwargs,
widths = 5,
manage_ticks = False)
# plotting the averaged data with calculated dispersion
#plt.scatter(x_data, y_means, facecolor = 'silver', alpha = 1)
#plt.errorbar(x_data, y_means, y_spread, fmt = 'none', ecolor = 'black')
# plotting the model
hires_x = np.linspace(50, 190, 100)
plt.plot(hires_x, func(hires_x, *best_fit_ab), 'black')
bound_upper = func(hires_x, *(best_fit_ab + sigma_ab))
bound_lower = func(hires_x, *(best_fit_ab - sigma_ab))
# plotting the confidence intervals
plt.fill_between(hires_x, bound_lower, bound_upper,
color = 'black', alpha = 0.15)
plt.text(140, 800, text_res)
plt.xlim(40, 200)
plt.ylim(0, 1000)
plt.show()
Edit:
If you are not considering the intrinsic errors on the data points, you are probably fine with using the "qiuck and wrong" case I mentioned before. The square root of the diagonal entries of covariance matrix can then be used to calculate your confidence intervals. However, note that the confidence intervals have shrunk now that we've dropped the uncertainties:
from scipy.optimize import curve_fit
import matplotlib.pylab as plt
import numpy as np
func = lambda x, a, b: a * (1 / (x**2)) + b
n_ypoints = 7
x_data = np.linspace(70, 190, n_ypoints)
y_data = np.array([786.31, 487.27, 341.78, 265.49,
224.76, 208.04, 200.22])
best_fit_ab, covar = curve_fit(func, x_data, y_data)
sigma_ab = np.sqrt(np.diagonal(covar))
# an easy way to properly format parameter errors
from uncertainties import ufloat
a = ufloat(best_fit_ab[0], sigma_ab[0])
b = ufloat(best_fit_ab[1], sigma_ab[1])
text_res = "Best fit parameters:\na = {}\nb = {}".format(a, b)
print(text_res)
plt.scatter(x_data, y_data, facecolor = 'silver',
edgecolor = 'k', s = 10, alpha = 1)
# plotting the model
hires_x = np.linspace(50, 200, 100)
plt.plot(hires_x, func(hires_x, *best_fit_ab), 'black')
bound_upper = func(hires_x, *(best_fit_ab + sigma_ab))
bound_lower = func(hires_x, *(best_fit_ab - sigma_ab))
# plotting the confidence intervals
plt.fill_between(hires_x, bound_lower, bound_upper,
color = 'black', alpha = 0.15)
plt.text(140, 630, text_res)
plt.xlim(60, 200)
plt.ylim(0, 800)
plt.show()
If you're unsure whether to include the absolute errors or how to estimate them in your case, you'd be better off asking for advice at Cross Validated, as Stack Overflow is mainly for discussion on implementations of regression methods and not for discussion on the underlying statistics.

Related

Can one create a distribution characterizing the multiplication of two distributions in Python?

I have two distributions and I would like to know the properties of the multiplication of these distributions.
For example, if I had the distribution of properties velocity and time, I want the characteristics of the probability distribution of distance.
With reasonable estimates for the inegration bounds, I can calculate the probability density function from the product of two random variables:
from scipy import stats
import numpy as np
import matplotlib.pyplot as plt
T, dt = np.linspace(0,20,201, retstep = True)
T = T[1:] # avoid divide by zero below
V = np.linspace(0,20,201)
D = np.linspace(0,120,201)
P_t = stats.gamma(4,1) # probability distribution for time
P_v = stats.norm(8,2) # probability distribution for speed
# complete integration
P_d = [np.trapz(P_t.pdf(T) * P_v.pdf(d / T) / T, dx = dt) for d in D]
plt.plot(T, P_t.pdf(T), label = 'time')
plt.plot(V, P_v.pdf(V), label = 'velocity')
plt.plot(D, P_d, label = 'distance')
plt.legend()
plt.ylabel('Probability density')
I would like to be able to compute things like P_d.sf(d), P_d.cdf(d), etc., for arbitrary values of d. Can I create a new distribution (perhaps using scipy.stats.rv_continuous) to characterize distance?
The solution took a bit of time to understand the rv_continuous. Cobbling together knowledge from a bunch of examples (I should have documented them--sorry) I think I got a working solution.
The only issue is that the domain needs to be known in advance, but I can work with that. If someone has ideas for how to fix that, please let me know.
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
import scipy as sp
interp1d = sp.interpolate.interp1d
trapz = sp.integrate.trapz
# Time domain vector - needed in class
dt = 0.01
t_max = 10
T = np.arange(dt, t_max + dt, dt)
# Distance domain vector - needed in class
dd = 0.01
d_max = 30
D = np.arange(0, d_max + dd, dd)
class MultiplicativeModel(stats.rv_continuous):
def __init__(self, Tmodel, Vmodel, *args, **kwargs):
super().__init__(*args, **kwargs)
self.Tmodel = Tmodel # The time-domain probability function
self.Vmodel = Vmodel # The velocity-domain probability function
# Create vectors for interpolation of distributions
self.pdf_vec = np.array([trapz(self.Tmodel.pdf(T) * \
self.Vmodel.pdf(_ / T) / T, dx = dt) \
for _ in D])
self.cdf_vec = np.cumsum(self.pdf_vec) * dd
self.sf_vec = 1 - self.cdf_vec
# define key functions for rv_continuous class
self._pdf = interp1d(D, self.pdf_vec, assume_sorted=True)
self._sf = interp1d(D, self.sf_vec, assume_sorted=True)
self._cdf = interp1d(D, self.cdf_vec, assume_sorted=True)
# Extraolation option below is necessary because sometimes rvs picks
# a number really really close to 1 or 0 and this spits out an error if it
# is outside of the interpolation range.
self._ppf = interp1d(self.cdf_vec, D, assume_sorted=True,
fill_value = 'extrapolate')
# Moments
self._munp = lambda n, *args: np.trapz(self.pdf_vec * D ** n, dx=dd)
With the above defined, we get results like:
dv = 0.01
v_max = 10
V = np.arange(0, v_max + dv, dv)
model = MultiplicativeModel(stats.norm(3, 1),
stats.uniform(loc=2, scale = 2))
# test moments and stats functions
print(f'median: {model.median()}')
# median: 8.700970199181763
print(f'moments: {model.stats(moments = "mvsk")}')
#moments: (array(9.00872026), array(12.2315612), array(0.44131568), array(0.16819043))
plt.figure(figsize=(6,4))
plt.plot(T, model.Tmodel.pdf(T), label = 'Time PDF')
plt.plot(V, model.Vmodel.pdf(V), label = 'Velocity PDF')
plt.plot(D, model.pdf(D), label = 'Distance PDF')
plt.plot(D, model.cdf(D), label = 'Distance CDF')
plt.plot(D, model.sf(D), label = 'Distance SF')
x = model.rvs(size=10**5)
plt.hist(x, bins = 50, density = True, alpha = 0.5, label = 'Sampled distribution')
plt.legend()
plt.xlim([0,30])

How can I make my 2D Gaussian fit to my image

I am trying to fit a 2D Gaussian to an image to find the location of the brightest point in it. My code looks like this:
import numpy as np
import astropy.io.fits as fits
import os
from astropy.stats import mad_std
from scipy.optimize import curve_fit
import matplotlib.pyplot as plt
from matplotlib.patches import Circle
from lmfit.models import GaussianModel
from astropy.modeling import models, fitting
def gaussian(xycoor,x0, y0, sigma, amp):
'''This Function is the Gaussian Function'''
x, y = xycoor # x and y taken from fit function. Stars at 0, increases by 1, goes to length of axis
A = 1 / (2*sigma**2)
eq = amp*np.exp(-A*((x-x0)**2 + (y-y0)**2)) #Gaussian
return eq
def fit(image):
med = np.median(image)
image = image-med
image = image[0,0,:,:]
max_index = np.where(image >= np.max(image))
x0 = max_index[1] #Middle of X axis
y0 = max_index[0] #Middle of Y axis
x = np.arange(0, image.shape[1], 1) #Stars at 0, increases by 1, goes to length of axis
y = np.arange(0, image.shape[0], 1) #Stars at 0, increases by 1, goes to length of axis
xx, yy = np.meshgrid(x, y) #creates a grid to plot the function over
sigma = np.std(image) #The standard dev given in the Gaussian
amp = np.max(image) #amplitude
guess = [x0, y0, sigma, amp] #The initial guess for the gaussian fitting
low = [0,0,0,0] #start of data array
#Upper Bounds x0: length of x axis, y0: length of y axis, st dev: max value in image, amplitude: 2x the max value
upper = [image.shape[0], image.shape[1], np.max(image), np.max(image)*2]
bounds = [low, upper]
params, pcov = curve_fit(gaussian, (xx.ravel(), yy.ravel()), image.ravel(),p0 = guess, bounds = bounds) #optimal fit. Not sure what pcov is.
return params
def plotting(image, params):
fig, ax = plt.subplots()
ax.imshow(image)
ax.scatter(params[0], params[1],s = 10, c = 'red', marker = 'x')
circle = Circle((params[0], params[1]), params[2], facecolor = 'none', edgecolor = 'red', linewidth = 1)
ax.add_patch(circle)
plt.show()
data = fits.getdata('AzTECC100.fits') #read in file
med = np.median(data)
data = data - med
data = data[0,0,:,:]
parameters = fit(data)
#generates a gaussian based on the parameters given
plotting(data, parameters)
The image is plotting and the code is giving no errors but the fitting isn't working. It's just putting an x wherever the x0 and y0 are. The pixel values in my image are very small. The max value is 0.0007 and std dev is 0.0001 and the x and y are a few orders of magnitude larger. So I believe my problem is that because of this my eq is going to zero everywhere so the curve_fit is failing. I'm wondering if there's a better way to construct my gaussian so that it plots correctly?
I do not have access to your image. Instead I have generated some test "image" as follows:
y, x = np.indices((51,51))
x -= 25
y -= 25
data = 3 * np.exp(-0.7 * ((x+2)**2 + (y-1)**2))
Also, I have modified your code for plotting to increase the radius of the circle by 10:
circle = Circle((params[0], params[1]), 10 * params[2], ...)
and I commented out two more lines:
# image = image[0,0,:,:]
# data = data[0,0,:,:]
The result that I get is shown in the attached image and it looks reasonable to me:
Could it be that the issue is in how you access data from the FITS file? (e.g., image = image[0,0,:,:]) Are the data 4D array? Why do you have 4 indices?
I also saw that you have asked a similar question here: Astropy.model 2DGaussian issue in which you tried to use just astropy.modeling. I will look into that question.
NOTE: you can replace code such as
max_index = np.where(image >= np.max(image))
x0 = max_index[1] #Middle of X axis
y0 = max_index[0] #Middle of Y axis
with
y0, x0 = np.unravel_index(np.argmax(data), data.shape)

Fitting exponential decay with scipy negative log likelihood minimization not converging

I'm trying to fit exponential decay functions using negative log likelihood minimization, but even with good starting parameters x0 for the minimizer I can't seem to make this converge. Why? Did I write this up wrong?
edited to include conventional binned likelihood aka a "curve" fit
import numpy as np
from scipy.optimize import minimize, curve_fit
import matplotlib.pyplot as plt
np.random.seed(1)
def exp_fit(x, N, L):
return N * np.exp(- L * x)
def negloglik(args, func, data):
"""Negative log likelihood"""
return - np.sum(np.log(func(data, *args)))
def histpoints_w_err(X, bins):
counts, bin_edges = np.histogram(X, bins = bins, normed = False)
bin_centers = (bin_edges[1:] + bin_edges[:-1]) / 2
bin_err = np.sqrt(counts)
# Generate fitting points
x = bin_centers[counts > 0] # filter along counts, to remove any value in the same position as an empty bin
y = counts[counts > 0]
sy = bin_err[counts > 0]
return x, y, sy
data = np.random.exponential(0.5, 1000)
bins = np.arange(0, 3, 0.1)
x, y, sy = histpoints_w_err(data, bins)
popt, pcov = curve_fit(exp_fit, x, y, sigma = sy)
xpts = np.linspace(0, 3, 100)
# All variables must be positive
bnds = ((0, None),
(0, None))
result = minimize(negloglik,
args = (exp_fit, data),
x0 = (popt[0], popt[1]), # Give it the parameters of the fit that worked
method = "SLSQP",
bounds = bnds)
jac = result.get("jac")
plt.hist(data, bins = bins)
plt.plot(xpts, exp_fit(xpts, *popt), label = "Binned fit: {:.2f}exp(-{:.2f}x)".format(*popt))
plt.plot(xpts, exp_fit(xpts, *jac), label = "Unbinned fit: {:.2f}exp(-{:.2f}x)".format(*jac))
plt.text(s = result, x = 0.8, y = popt[0]*0.2)
plt.legend()
plt.show()
I have removed the method like this:
# minimize the negative log-Likelihood
result = minimize(negloglik, args = (exp_fit, data), x0 = (15, 0.5))
and what I got was:
fun: -1601.1177190942697
hess_inv: array([[2.16565679e+11, 5.15680574e+09],
[5.15680574e+09, 1.22792520e+08]])
jac: array([0., 0.])
message: 'Optimization terminated successfully.'
nfev: 120
nit: 28
njev: 30
status: 0
success: True
x: array([8986009.32851534, 213973.66153225])
where previously success was False. Maybe the method should be changed to other? The default one, if not specified is BFGS, L-BFGS-B, SLSQP, depending if the problem has constraints or bounds.
So, redid the whole thing using iminuit and probfit instead of scipy. The syntax is a little weird (especially how iminuit changes input argument names to match fitting parameters), but once you get going it's quite simple to use. It's a shame that there's very little community documentation out there, on this.
Here I've done the unbinned likelihood fit:
import iminuit, probfit
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats
np.random.seed(1)
data = np.random.exponential(0.5, 500)
def unbinned_exp_LLH(data, loc_init, scale_init, limit_loc, limit_scale):
# Define function to fit
def exp_func(x, loc, scale):
return scipy.stats.expon.pdf(x, loc, scale)
# Define initial parameters
init_params = dict(loc = loc_init, scale = scale_init)
# Create an unbinned likelihood object with function and data.
unbin = probfit.UnbinnedLH(exp_func, data)
# Minimizes the unbinned likelihood for the given function
m = iminuit.Minuit(unbin,
**init_params,
limit_scale = limit_loc,
limit_loc = limit_scale,
pedantic=False,
print_level=0)
m.migrad()
params = m.values.values() # Get out fit values
errs = m.errors.values()
return params, errs
params, errs = unbinned_exp_LLH(data, loc_init = 0, scale_init = 0.5, limit_loc = (-1, 1), limit_scale = (-1, 1))
loc, scale = params
# Plot
x_pts = np.linspace(0, 3, 100)
plt.plot(x_pts, scipy.stats.expon.pdf(x_pts, *params), label = "exp(-{1:.2f}x)".format(*params), color = "black")
plt.hist(data, color = "lightgrey", bins = 20, label = "generated data", normed = True)
plt.xlim(0, 3)
plt.legend()
plt.show()

Integration of a Gaussian function to count the number of particle under area

I need to count the number of particle under the fitted Gaussian curve. The area of the fitted curve can be found by integrating the function within the limit (mean-3*sigma) to (mean+3*sigma). Would you please help me to solve this. Thanks for your kind consideration.
import pylab as py
import numpy as np
from scipy import optimize
from scipy.stats import stats
import matplotlib.pyplot as plt
import pandas as pd
BackPFT='T067.csv'
df_180 = pd.read_csv(BackPFT, error_bad_lines=False, header=1)
x_180=df_180.iloc[:,3]
y_180=df_180.iloc[:,4]
#want to plot the distribution of s calculated by the following equation
s=np.sqrt((((16*x_180**2*38.22**2)/((4*38.22**2-y_180**2)**2))+1))-1
#Shape of this distribution is Gaussian
#I need to fit this distribution by following parameter
mean=0.433
sigma=0.014
draw=s
#Definition of bin number
bi=np.linspace(0.01,8, 1000)
data = py.hist(draw.dropna(), bins = bi)
#Definition of Gaussian function
def f(x, a, b, c):
return (a * py.exp(-(x - mean)**2.0 / (2 *sigma**2)))
x = [0.5 * (data[1][i] + data[1][i+1]) for i in xrange(len(data[1])-1)]
y = data[0]
#Fitting the peak of the distribution
popt, pcov = optimize.curve_fit(f, x, y)
chi2, p = stats.chisquare(popt)
x_fit = py.linspace(x[0], x[-1], 80000)
y_fit = f(x_fit, *popt)
plot(x_fit, y_fit, lw=3, color="r",ls="--")
plt.xlim(0,2)
plt.tick_params(axis='both', which='major', labelsize=20)
plt.show()
The problem is how to integrate the defined function (f) and count the number under the area. Here I attach the file T067.csv. Thanks in advance for your kind consideration.
BackPFT='T061.csv'
df_180 = pd.read_csv(BackPFT, skip_blank_lines=True ,skiprows=1,header=None,skipfooter=None,engine='python')
x_180=df_180.iloc[:,3]
y_180=df_180.iloc[:,4]
b=42.4
E=109.8
LET=24.19
REL=127.32
mean=0.339; m1=0.259
sigma=0.012; s1=0.015
s=np.sqrt((((16*x_180**2*b**2)/((4*b**2-y_180**2)**2))+1))-1
draw=s
bi=np.linspace(0,8, 2000)
binwidth=0.004
#I want to plot the dsitribution of s. This distribution has three gaussian peaks
data = py.hist(draw.dropna(), bins = bi,color='gray',)
#first Gaussian function for the first peak (peaks counted from the right)
def f(x, a, b, c):
return (a * py.exp(-(x - mean)**2.0 / (2 *sigma**2)))
# fitting the function (Gaussian)
x = [0.5 * (data[1][i] + data[1][i+1]) for i in xrange(len(data[1])-1)]
y = data[0]
popt, pcov = optimize.curve_fit(f, x, y)
chi, p = stats.chisquare(popt)
x_fit = py.linspace(x[0], x[-1], 80000)
y_fit = f(x_fit, *popt)
plot(x_fit, y_fit, lw=5, color="r",ls="--")
#integration of first function f
gaussF = lambda x, a: f(x, a, sigma, mean)
bins=((6*sigma)/(binwidth))
delta = ((mean+3*sigma) - (mean-3*sigma))/bins
f1 = lambda x : f(x, popt[0], sigma, mean)
result = quad(f1,mean-3*sigma,mean+3*sigma)
area = result[0] # this give the area after integration of the gaussian
numPar = area / delta # this gives the number of particle under the integrated area
print"\n\tArea under curve = ", area, "\n\tNumber of particel= ", numPar
The file T061.csv here. Thanks Dr. I Putu Susila for his kind co-operation and interest.

Separating gaussian components of a curve using python

I am trying to deblend the emission lines of low resolution spectrum in order to get the gaussian components. This plot represents the kind of data I am using:
After searching a bit, the only option I found was the application of the gauest function from the kmpfit package (http://www.astro.rug.nl/software/kapteyn/kmpfittutorial.html#gauest). I have copied their example but I cannot make it work.
I wonder if anyone could please offer me any alternative to do this or how to correct my code:
import numpy as np
import matplotlib.pyplot as plt
from scipy import optimize
def CurveData():
x = np.array([3963.67285156, 3964.49560547, 3965.31835938, 3966.14111328, 3966.96362305,
3967.78637695, 3968.60913086, 3969.43188477, 3970.25463867, 3971.07714844,
3971.89990234, 3972.72265625, 3973.54541016, 3974.36791992, 3975.19067383])
y = np.array([1.75001533e-16, 2.15520995e-16, 2.85030769e-16, 4.10072843e-16, 7.17558032e-16,
1.27759917e-15, 1.57074192e-15, 1.40802933e-15, 1.45038722e-15, 1.55195653e-15,
1.09280316e-15, 4.96611341e-16, 2.68777266e-16, 1.87075114e-16, 1.64335999e-16])
return x, y
def FindMaxima(xval, yval):
xval = np.asarray(xval)
yval = np.asarray(yval)
sort_idx = np.argsort(xval)
yval = yval[sort_idx]
gradient = np.diff(yval)
maxima = np.diff((gradient > 0).view(np.int8))
ListIndeces = np.concatenate((([0],) if gradient[0] < 0 else ()) + (np.where(maxima == -1)[0] + 1,) + (([len(yval)-1],) if gradient[-1] > 0 else ()))
X_Maxima, Y_Maxima = [], []
for index in ListIndeces:
X_Maxima.append(xval[index])
Y_Maxima.append(yval[index])
return X_Maxima, Y_Maxima
def GaussianMixture_Model(p, x, ZeroLevel):
y = 0.0
N_Comps = int(len(p) / 3)
for i in range(N_Comps):
A, mu, sigma = p[i*3:(i+1)*3]
y += A * np.exp(-(x-mu)*(x-mu)/(2.0*sigma*sigma))
Output = y + ZeroLevel
return Output
def Residuals_GaussianMixture(p, x, y, ZeroLevel):
return GaussianMixture_Model(p, x, ZeroLevel) - y
Wave, Flux = CurveData()
Wave_Maxima, Flux_Maxima = FindMaxima(Wave, Flux)
EmLines_Number = len(Wave_Maxima)
ContinuumLevel = 1.64191e-16
# Define initial values
p_0 = []
for i in range(EmLines_Number):
p_0.append(Flux_Maxima[i])
p_0.append(Wave_Maxima[i])
p_0.append(2.0)
p1, conv = optimize.leastsq(Residuals_GaussianMixture, p_0[:],args=(Wave, Flux, ContinuumLevel))
Fig = plt.figure(figsize = (16, 10))
Axis1 = Fig.add_subplot(111)
Axis1.plot(Wave, Flux, label='Emission line')
Axis1.plot(Wave, GaussianMixture_Model(p1, Wave, ContinuumLevel), 'r', label='Fit with optimize.leastsq')
print p1
Axis1.plot(Wave, GaussianMixture_Model([p1[0],p1[1],p1[2]], Wave, ContinuumLevel), 'g:', label='Gaussian components')
Axis1.plot(Wave, GaussianMixture_Model([p1[3],p1[4],p1[5]], Wave, ContinuumLevel), 'g:')
Axis1.set_xlabel( r'Wavelength $(\AA)$',)
Axis1.set_ylabel('Flux' + r'$(erg\,cm^{-2} s^{-1} \AA^{-1})$')
plt.legend()
plt.show()
A typical simplistic way to fit:
def model(p,x):
A,x1,sig1,B,x2,sig2 = p
return A*np.exp(-(x-x1)**2/sig1**2) + B*np.exp(-(x-x2)**2/sig2**2)
def res(p,x,y):
return model(p,x) - y
from scipy import optimize
p0 = [1e-15,3968,2,1e-15,3972,2]
p1,conv = optimize.leastsq(res,p0[:],args=(x,y))
plot(x,y,'+') # data
#fitted function
plot(arange(3962,3976,0.1),model(p1,arange(3962,3976,0.1)),'-')
Where p0 is your initial guess. By the looks of things, you might want to use Lorentzian functions...
If you use full_output=True, you get all kind of info about the fitting. Also check out curve_fit and the fmin* functions in scipy.optimize. There are plenty of wrappers around these around, but often, like here, it's easier to use them directly.

Categories