Separating gaussian components of a curve using python

Separating gaussian components of a curve using python - python

I am trying to deblend the emission lines of low resolution spectrum in order to get the gaussian components. This plot represents the kind of data I am using:
After searching a bit, the only option I found was the application of the gauest function from the kmpfit package (http://www.astro.rug.nl/software/kapteyn/kmpfittutorial.html#gauest). I have copied their example but I cannot make it work.
I wonder if anyone could please offer me any alternative to do this or how to correct my code:
import numpy as np
import matplotlib.pyplot as plt
from scipy import optimize
def CurveData():
x = np.array([3963.67285156, 3964.49560547, 3965.31835938, 3966.14111328, 3966.96362305,
3967.78637695, 3968.60913086, 3969.43188477, 3970.25463867, 3971.07714844,
3971.89990234, 3972.72265625, 3973.54541016, 3974.36791992, 3975.19067383])
y = np.array([1.75001533e-16, 2.15520995e-16, 2.85030769e-16, 4.10072843e-16, 7.17558032e-16,
1.27759917e-15, 1.57074192e-15, 1.40802933e-15, 1.45038722e-15, 1.55195653e-15,
1.09280316e-15, 4.96611341e-16, 2.68777266e-16, 1.87075114e-16, 1.64335999e-16])
return x, y
def FindMaxima(xval, yval):
xval = np.asarray(xval)
yval = np.asarray(yval)
sort_idx = np.argsort(xval)
yval = yval[sort_idx]
gradient = np.diff(yval)
maxima = np.diff((gradient > 0).view(np.int8))
ListIndeces = np.concatenate((([0],) if gradient[0] < 0 else ()) + (np.where(maxima == -1)[0] + 1,) + (([len(yval)-1],) if gradient[-1] > 0 else ()))
X_Maxima, Y_Maxima = [], []
for index in ListIndeces:
X_Maxima.append(xval[index])
Y_Maxima.append(yval[index])
return X_Maxima, Y_Maxima
def GaussianMixture_Model(p, x, ZeroLevel):
y = 0.0
N_Comps = int(len(p) / 3)
for i in range(N_Comps):
A, mu, sigma = p[i*3:(i+1)*3]
y += A * np.exp(-(x-mu)*(x-mu)/(2.0*sigma*sigma))
Output = y + ZeroLevel
return Output
def Residuals_GaussianMixture(p, x, y, ZeroLevel):
return GaussianMixture_Model(p, x, ZeroLevel) - y
Wave, Flux = CurveData()
Wave_Maxima, Flux_Maxima = FindMaxima(Wave, Flux)
EmLines_Number = len(Wave_Maxima)
ContinuumLevel = 1.64191e-16
# Define initial values
p_0 = []
for i in range(EmLines_Number):
p_0.append(Flux_Maxima[i])
p_0.append(Wave_Maxima[i])
p_0.append(2.0)
p1, conv = optimize.leastsq(Residuals_GaussianMixture, p_0[:],args=(Wave, Flux, ContinuumLevel))
Fig = plt.figure(figsize = (16, 10))
Axis1 = Fig.add_subplot(111)
Axis1.plot(Wave, Flux, label='Emission line')
Axis1.plot(Wave, GaussianMixture_Model(p1, Wave, ContinuumLevel), 'r', label='Fit with optimize.leastsq')
print p1
Axis1.plot(Wave, GaussianMixture_Model([p1[0],p1[1],p1[2]], Wave, ContinuumLevel), 'g:', label='Gaussian components')
Axis1.plot(Wave, GaussianMixture_Model([p1[3],p1[4],p1[5]], Wave, ContinuumLevel), 'g:')
Axis1.set_xlabel( r'Wavelength $(\AA)$',)
Axis1.set_ylabel('Flux' + r'$(erg\,cm^{-2} s^{-1} \AA^{-1})$')
plt.legend()
plt.show()

A typical simplistic way to fit:
def model(p,x):
A,x1,sig1,B,x2,sig2 = p
return A*np.exp(-(x-x1)**2/sig1**2) + B*np.exp(-(x-x2)**2/sig2**2)
def res(p,x,y):
return model(p,x) - y
from scipy import optimize
p0 = [1e-15,3968,2,1e-15,3972,2]
p1,conv = optimize.leastsq(res,p0[:],args=(x,y))
plot(x,y,'+') # data
#fitted function
plot(arange(3962,3976,0.1),model(p1,arange(3962,3976,0.1)),'-')
Where p0 is your initial guess. By the looks of things, you might want to use Lorentzian functions...
If you use full_output=True, you get all kind of info about the fitting. Also check out curve_fit and the fmin* functions in scipy.optimize. There are plenty of wrappers around these around, but often, like here, it's easier to use them directly.

Related

Can one create a distribution characterizing the multiplication of two distributions in Python?

I have two distributions and I would like to know the properties of the multiplication of these distributions.
For example, if I had the distribution of properties velocity and time, I want the characteristics of the probability distribution of distance.
With reasonable estimates for the inegration bounds, I can calculate the probability density function from the product of two random variables:
from scipy import stats
import numpy as np
import matplotlib.pyplot as plt
T, dt = np.linspace(0,20,201, retstep = True)
T = T[1:] # avoid divide by zero below
V = np.linspace(0,20,201)
D = np.linspace(0,120,201)
P_t = stats.gamma(4,1) # probability distribution for time
P_v = stats.norm(8,2) # probability distribution for speed
# complete integration
P_d = [np.trapz(P_t.pdf(T) * P_v.pdf(d / T) / T, dx = dt) for d in D]
plt.plot(T, P_t.pdf(T), label = 'time')
plt.plot(V, P_v.pdf(V), label = 'velocity')
plt.plot(D, P_d, label = 'distance')
plt.legend()
plt.ylabel('Probability density')
I would like to be able to compute things like P_d.sf(d), P_d.cdf(d), etc., for arbitrary values of d. Can I create a new distribution (perhaps using scipy.stats.rv_continuous) to characterize distance?

The solution took a bit of time to understand the rv_continuous. Cobbling together knowledge from a bunch of examples (I should have documented them--sorry) I think I got a working solution.
The only issue is that the domain needs to be known in advance, but I can work with that. If someone has ideas for how to fix that, please let me know.
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
import scipy as sp
interp1d = sp.interpolate.interp1d
trapz = sp.integrate.trapz
# Time domain vector - needed in class
dt = 0.01
t_max = 10
T = np.arange(dt, t_max + dt, dt)
# Distance domain vector - needed in class
dd = 0.01
d_max = 30
D = np.arange(0, d_max + dd, dd)
class MultiplicativeModel(stats.rv_continuous):
def __init__(self, Tmodel, Vmodel, *args, **kwargs):
super().__init__(*args, **kwargs)
self.Tmodel = Tmodel # The time-domain probability function
self.Vmodel = Vmodel # The velocity-domain probability function
# Create vectors for interpolation of distributions
self.pdf_vec = np.array([trapz(self.Tmodel.pdf(T) * \
self.Vmodel.pdf(_ / T) / T, dx = dt) \
for _ in D])
self.cdf_vec = np.cumsum(self.pdf_vec) * dd
self.sf_vec = 1 - self.cdf_vec
# define key functions for rv_continuous class
self._pdf = interp1d(D, self.pdf_vec, assume_sorted=True)
self._sf = interp1d(D, self.sf_vec, assume_sorted=True)
self._cdf = interp1d(D, self.cdf_vec, assume_sorted=True)
# Extraolation option below is necessary because sometimes rvs picks
# a number really really close to 1 or 0 and this spits out an error if it
# is outside of the interpolation range.
self._ppf = interp1d(self.cdf_vec, D, assume_sorted=True,
fill_value = 'extrapolate')
# Moments
self._munp = lambda n, *args: np.trapz(self.pdf_vec * D ** n, dx=dd)
With the above defined, we get results like:
dv = 0.01
v_max = 10
V = np.arange(0, v_max + dv, dv)
model = MultiplicativeModel(stats.norm(3, 1),
stats.uniform(loc=2, scale = 2))
# test moments and stats functions
print(f'median: {model.median()}')
# median: 8.700970199181763
print(f'moments: {model.stats(moments = "mvsk")}')
#moments: (array(9.00872026), array(12.2315612), array(0.44131568), array(0.16819043))
plt.figure(figsize=(6,4))
plt.plot(T, model.Tmodel.pdf(T), label = 'Time PDF')
plt.plot(V, model.Vmodel.pdf(V), label = 'Velocity PDF')
plt.plot(D, model.pdf(D), label = 'Distance PDF')
plt.plot(D, model.cdf(D), label = 'Distance CDF')
plt.plot(D, model.sf(D), label = 'Distance SF')
x = model.rvs(size=10**5)
plt.hist(x, bins = 50, density = True, alpha = 0.5, label = 'Sampled distribution')
plt.legend()
plt.xlim([0,30])

Avoiding plotting ODEs divergent solutions ODEint

I'm trying to plot a phase plane and I want it to look nice. However, some solutions of the system of equations diverge because of the initial conditions. Is there some way that I can make a try/except chain in order when the solution diverges it doesn't plot it. Here is my code:
import matplotlib.pyplot as plt
import numpy as np
from scipy.integrate import odeint
import pylab as pl
def aux_func(x):
y = x[0]-x[1]
if (np.abs(y) <= 1):
f = y**3 + 0.5*y
else:
f = 2*y - np.sign(y)
return f
def function(x,t):
x1_dot = x[1]
x2_dot = -x[1] - aux_func(x)
return [x1_dot,x2_dot]
ts = np.linspace(0, 20, 300)
ic_1 = np.linspace(-1,1,10)
ic_2 = np.linspace(-1,1,10)
for r1 in ic_1:
for r2 in ic_2:
x0 = (r1,r2)
try:
xs = odeint(function, x0, ts)
plt.plot(xs[:,0], xs[:,1],"r-",linewidth=.8)
except:
pass
# Nombre de los ejes, limites,
plt.xlabel("$x_1$", fontsize=12)
plt.ylabel("$x_2$", fontsize=12)
# plt.tick_params(labelsize=10)
# plt.xticks(np.linspace(0,1,11))
# plt.yticks(np.linspace(0,1,11))
plt.xlim(-1, 1)
plt.ylim(-1, 1)
# Grafica el campo vectorial
X1, X2 = np.mgrid[-1:1:20j,-1:1:20j]
u=X2
d= X1-X2
t = np.zeros(np.shape(d))
for i in range(len(d)):
for j in range(len(d[0])):
if np.abs(d[i][j]) > 1:
t[i][j]= 2*d[i][j]-0.5*np.sign(d[i][j])
else:
t[i][j] =d[i][j]**3 + 0.5*d[i][j]
v=-X2-t
pl.quiver(X1, X2, u, v, color = 'b',width = .002)
plt.grid()
plt.title('Plano de Fase Punto 1')
#plt.savefig('FasePunto4.png')
plt.show()
The code is plotting the following:
Appreciate the help.

This can be solved by avoiding the wrong divergences at all, so that there is no need for exception handling.
This is a discontinuous ODE which can lead to unusual effects like a sliding mode. One way to quickly work around that is to mollify the jump by implementing a blending zone where the vector field changes quickly but continuously from one phase to the other (see Unsure about how to use event function in Matlab for other generic work-arounds). The changes for that can be implemented as
def aux_func(x):
def softsign(u): return np.tanh(1e4*u)
y = x[0]-x[1]
h = 0.5*(1+softsign(y**2-1)
# h is about zero for |y|<1 and about 1 for |y|>1
f1 = y**3 + 0.5*y # for |y|<1
f2 = 2*y - softsign(y) # for |y|>1, note the second mollification
return (1-h)*f1+h*f2
With no further changes to the code this gives the plot
Note that pylab is obsolete, all its functionality can also be accessed via plt=matplotlib.pyplot.

Graphing polynomials

With some help I have produced the following code. Below are some of the desired outputs for given inputs. However I am having some trouble completing the last task of this code. Looking for some help with this, any guidance or help is greatly appreciated, thanks!
flops = 0
def add(x1, x2):
global flops
flops += 1
return x1 + x2
def multiply(x1, x2):
global flops
flops += 1
return x1 * x2
def poly_horner(A, x):
global flops
flops = 0
p = A[-1]
i = len(A) - 2
while i >= 0:
p = add(multiply(p, x), A[i])
i -= 1
return p
def poly_naive(A, x):
global flops
p = 0
flops = 0
for i, a in enumerate(A):
xp = 1
for _ in range(i):
xp = multiply(xp, x)
p = add(p, multiply(xp, a))
return p
Given the following inputs, I got the following outputs:
poly_horner([1,2,3,4,5], 2)
129
print(flops)
8
poly_naive([1,2,3,4,5, 2])
129
print(flops)[![enter image description here][1]][1]
20
np.polyval([5,4,3,2,1], 2)
129

I assume you want to create a figure, though your question is quite vague...but I have a few minutes to kill while my code runs. Anyway, it seems you MIGHT be having difficulty plotting.
import numpy as np
import pylab as pl
x = np.arange(10)
y = x * np.pi
# you can calculate a line of best fit (lobf) using numpy's polyfit function
lobf1 = np.polyfit(x, y, 1) # first degree polynomial
lobf2 = np.polyfit(x, y, 2) # second degree polynomial
lobf3 = np.polyfit(x, y, 3) # third degree polynomial
# you can now use the lines of best fit to calculate the
# value anywhere within the domain using numpy's polyval function
# FIRST, create a figure and a plotting axis within the fig
fig = pl.figure(figsize=(3.25, 2.5))
ax0 = fig.add_subplot(111)
# now use polyval to calculate your y-values at every x
x = np.arange(0, 20, 0.1)
ax0.plot(x, np.polyval(lobf1, x), 'k')
ax0.plot(x, np.polyval(lobf2, x), 'b')
ax0.plot(x, np.polyval(lobf3, x), 'r')
# add a legend for niceness
ax0.legend(('Degree 1', 'Degree 2', 'Degree 3'), fontsize=8, loc=2)
# you can label the axes whatever you like
ax0.set_ylabel('My y-label', fontsize=8)
ax0.set_xlabel('My x-label', fontsize=8)
# you can show the figure on your screen
fig.show()
# and you can save the figure to your computer in different formats
# specifying bbox_inches='tight' helps eliminate unnecessary whitespace around
# the axis when saving...it just looks better this way.
pl.savefig('figName.png', dpi=500, bbox_inches='tight')
pl.savefig('figName.pdf', bbox_inches='tight')
# don't forget to close the figure
pl.close('all')

Python curve_fit with multiple independent variables

Python's curve_fit calculates the best-fit parameters for a function with a single independent variable, but is there a way, using curve_fit or something else, to fit for a function with multiple independent variables? For example:
def func(x, y, a, b, c):
return log(a) + b*log(x) + c*log(y)
where x and y are the independent variable and we would like to fit for a, b, and c.

You can pass curve_fit a multi-dimensional array for the independent variables, but then your func must accept the same thing. For example, calling this array X and unpacking it to x, y for clarity:
import numpy as np
from scipy.optimize import curve_fit
def func(X, a, b, c):
x,y = X
return np.log(a) + b*np.log(x) + c*np.log(y)
# some artificially noisy data to fit
x = np.linspace(0.1,1.1,101)
y = np.linspace(1.,2., 101)
a, b, c = 10., 4., 6.
z = func((x,y), a, b, c) * 1 + np.random.random(101) / 100
# initial guesses for a,b,c:
p0 = 8., 2., 7.
print(curve_fit(func, (x,y), z, p0))
Gives the fit:
(array([ 9.99933937, 3.99710083, 6.00875164]), array([[ 1.75295644e-03, 9.34724308e-05, -2.90150983e-04],
[ 9.34724308e-05, 5.09079478e-06, -1.53939905e-05],
[ -2.90150983e-04, -1.53939905e-05, 4.84935731e-05]]))

optimizing a function with multiple input dimensions and a variable number of parameters
This example shows how to fit a polynomial with a two dimensional input (R^2 -> R) by an increasing number of coefficients. The design is very flexible so that the callable f from curve_fit is defined once for any number of non-keyword arguments.
minimal reproducible example
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
def poly2d(xy, *coefficients):
x = xy[:, 0]
y = xy[:, 1]
proj = x + y
res = 0
for order, coef in enumerate(coefficients):
res += coef * proj ** order
return res
nx = 31
ny = 21
range_x = [-1.5, 1.5]
range_y = [-1, 1]
target_coefficients = (3, 0, -19, 7)
xs = np.linspace(*range_x, nx)
ys = np.linspace(*range_y, ny)
im_x, im_y = np.meshgrid(xs, ys)
xdata = np.c_[im_x.flatten(), im_y.flatten()]
im_target = poly2d(xdata, *target_coefficients).reshape(ny, nx)
fig, axs = plt.subplots(2, 3, figsize=(29.7, 21))
axs = axs.flatten()
ax = axs[0]
ax.set_title('Unknown polynomial P(x+y)\n[secret coefficients: ' + str(target_coefficients) + ']')
sm = ax.imshow(
im_target,
cmap = plt.get_cmap('coolwarm'),
origin='lower'
)
fig.colorbar(sm, ax=ax)
for order in range(5):
ydata=im_target.flatten()
popt, pcov = curve_fit(poly2d, xdata=xdata, ydata=ydata, p0=[0]*(order+1) )
im_fit = poly2d(xdata, *popt).reshape(ny, nx)
ax = axs[1+order]
title = 'Fit O({:d}):'.format(order)
for o, p in enumerate(popt):
if o%2 == 0:
title += '\n'
if o == 0:
title += ' {:=-{w}.1f} (x+y)^{:d}'.format(p, o, w=int(np.log10(max(abs(p), 1))) + 5)
else:
title += ' {:=+{w}.1f} (x+y)^{:d}'.format(p, o, w=int(np.log10(max(abs(p), 1))) + 5)
title += '\nrms: {:.1f}'.format( np.mean((im_fit-im_target)**2)**.5 )
ax.set_title(title)
sm = ax.imshow(
im_fit,
cmap = plt.get_cmap('coolwarm'),
origin='lower'
)
fig.colorbar(sm, ax=ax)
for ax in axs.flatten():
ax.set_xlabel('x')
ax.set_ylabel('y')
plt.show()
P.S. The concept of this answer is identical to my other answer here, but the code example is way more clear. At the time given, I will delete the other answer.

Fitting to an unknown numer of parameters
In this example, we try to reproduce some measured data measData.
In this example measData is generated by the function measuredData(x, a=.2, b=-2, c=-.8, d=.1). I practice, we might have measured measData in a way - so we have no idea, how it is described mathematically. Hence the fit.
We fit by a polynomial, which is described by the function polynomFit(inp, *args). As we want to try out different orders of polynomials, it is important to be flexible in the number of input parameters.
The independent variables (x and y in your case) are encoded in the 'columns'/second dimension of inp.
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
def measuredData(inp, a=.2, b=-2, c=-.8, d=.1):
x=inp[:,0]
y=inp[:,1]
return a+b*x+c*x**2+d*x**3 +y
def polynomFit(inp, *args):
x=inp[:,0]
y=inp[:,1]
res=0
for order in range(len(args)):
print(14,order,args[order],x)
res+=args[order] * x**order
return res +y
inpData=np.linspace(0,10,20).reshape(-1,2)
inpDataStr=['({:.1f},{:.1f})'.format(a,b) for a,b in inpData]
measData=measuredData(inpData)
fig, ax = plt.subplots()
ax.plot(np.arange(inpData.shape[0]), measData, label='measuered', marker='o', linestyle='none' )
for order in range(5):
print(27,inpData)
print(28,measData)
popt, pcov = curve_fit(polynomFit, xdata=inpData, ydata=measData, p0=[0]*(order+1) )
fitData=polynomFit(inpData,*popt)
ax.plot(np.arange(inpData.shape[0]), fitData, label='polyn. fit, order '+str(order), linestyle='--' )
ax.legend( loc='upper left', bbox_to_anchor=(1.05, 1))
print(order, popt)
ax.set_xticklabels(inpDataStr, rotation=90)
Result:

Yes. We can pass multiple variables for curve_fit. I have written a piece of code:
import numpy as np
x = np.random.randn(2,100)
w = np.array([1.5,0.5]).reshape(1,2)
esp = np.random.randn(1,100)
y = np.dot(w,x)+esp
y = y.reshape(100,)
In the above code I have generated x a 2D data set in shape of (2,100) i.e, there are two variables with 100 data points. I have fit the dependent variable y with independent variables x with some noise.
def model_func(x,w1,w2,b):
w = np.array([w1,w2]).reshape(1,2)
b = np.array([b]).reshape(1,1)
y_p = np.dot(w,x)+b
return y_p.reshape(100,)
We have defined a model function that establishes relation between y & x.
Note: The shape of output of the model function or predicted y should be (length of x,)
popt, pcov = curve_fit(model_func,x,y)
The popt is an 1D numpy array containing predicted parameters. In our case there are 3 parameters.

Yes, there is: simply give curve_fit a multi-dimensional array for xData.

How can I implement bivariate normal Gaussian noise?

I want to implement complex standard Gaussian noise in python or C. This figure shows what I want to implement.
And first I implement it in python, like this.
import numpy as np
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
import pylab as pl
size = 100000
BIN = 70
x = np.random.normal(0.0,1.0,size)
y = np.random.normal(0.0,1.0,size)
xhist = pl.hist(x,bins = BIN,range=(-3.5,3.5),normed = True)
yhist = pl.hist(y,bins = BIN,range=(-3.5,3.5),normed = True)
xmesh = np.arange(-3.5,3.5,0.1)
ymesh = np.arange(-3.5,3.5,0.1)
Z = np.zeros((BIN,BIN))
for i in range(BIN):
for j in range(BIN):
Z[i][j] = xhist[0][i] + yhist[0][j]
X,Y = np.meshgrid(xmesh,ymesh)
fig = plt.figure()
ax = Axes3D(fig)
ax.plot_wireframe(X,Y,Z)
plt.show()
However, it is not standard complex Gaussian noise.
The output figure become:
I think Gaussian noises are additive, however, why it become so different?
I already tried to change the parts of code
x = np.random.normal(0.0,1.0,size)
y = np.random.normal(0.0,1.0,size)
to
r = np.random.normal(0.0,1.0,size)
theta = np.random.uniform(0.0,2*np.pi,size)
x = r * np.cos(theta)
y = r * np.sin(theta)
however, the result was same.
Please tell me the correct implementation or equation of bivariate standard Gaussian noise.

So sorry.It's my mistake.
Joint probability is defined by the product, not summation. I was a perfect fool!
So
Z[i][j] = xhist[0][i] + yhist[0][j]
term must become
Z[i][j] = xhist[0][i] * yhist[0][j]
And I checked
for i in range(BIN):
for j in range(BIN):
integral = integral + Z[i][j] * 0.01
will be 1.0.
So if we need complex standard Gaussian noise, we should do adding the real standard Gaussian noise to real part and imaginary part.
This is the graph for comparing.

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Separating gaussian components of a curve using python - python

Related

Can one create a distribution characterizing the multiplication of two distributions in Python?

Avoiding plotting ODEs divergent solutions ODEint

Graphing polynomials

Python curve_fit with multiple independent variables

How can I implement bivariate normal Gaussian noise?

Categories

Resources