How to fit 2 gauss in python - python

I am a new user of Python. I am trying to fit 2 Gaussians with data but there are some errors in the results.
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import scipy as scipy
from scipy import optimize
from matplotlib.ticker import AutoMinorLocator
from matplotlib import gridspec
import matplotlib.ticker as ticker
%matplotlib inline
data = np.loadtxt('csv/test_run09.csv', encoding="utf-8", delimiter=',',skiprows=1)
x = data[:,1]
y1 = data[:,2]
y2 = data[:,3]
y3 = data[:,4]
y4 = data[:,5]
y5 = data[:,6]
y6 = data[:,7]
y7 = data[:,8]
y8 = data[:,9]
y9 = data[:,10]
y10 = data[:,11]
y11 = data[:,12]
y12 = data[:,13]
y13 = data[:,14]
y14 = data[:,15]
amp1 = 2
sigma1 = 0.1
x_array = x[(x>33)&(x<34)]
y_array = y14[(x>33)&(x<34)]
amp2 = np.max(y_array)
sigma2 = np.std(x_array)
def _1gaussian1(x_array, amp1, sigma1):
return amp1*(1/(sigma1*(np.sqrt(2*np.pi))))*(np.exp((-1.0/2.0)*(((x_array-\ 33.49290958)/sigma1)**2))) + 0.2
def _1gaussian2(x_array, amp2, sigma2):
return amp2*(1/(sigma2*(np.sqrt(2*np.pi))))*(np.exp((-1.0/2.0)*(((x_array-\ 33.6312849)/sigma2)**2))) + 0.2
popt_gauss1, pcov_gauss1 = scipy.optimize.curve_fit(_1gaussian1, x_array, y_array, p0=[amp1, sigma1])
popt_gauss2, pcov_gauss2 = scipy.optimize.curve_fit(_1gaussian2, x_array, y_array, p0=[np.max(y_array), np.std(x_array)])
def _2gaussian(x_array, amp1, sigma1, amp2, sigma2):
return amp1*(1/(sigma1*(np.sqrt(2*np.pi))))*(np.exp((-1.0/2.0)*(((x_array- 33.49290958)/sigma1)**2))) + amp2*(1/(sigma1*(np.sqrt(2*np.pi))))*(np.exp((-1.0/2.0)*(((x_array-33.6312849)/sigma2)**2))) + 0.3
popt_2gauss, pcov_2gauss = scipy.optimize.curve_fit(_2gaussian, x_array, y_array, p0=[amp1, sigma1, np.max(y_array), np.std(x_array)])
perr_2gauss = np.sqrt(np.diag(pcov_2gauss))
print(popt_2gauss)
pars_1 = popt_2gauss[0:2]
pars_2 = popt_2gauss[2:4]
gauss_peak_1 = _1gaussian1(x_array, *pars_1)
gauss_peak_2 = _1gaussian2(x_array, *pars_2)
fig = plt.figure(figsize=(7,5))
gs = gridspec.GridSpec(1,1)
ax1 = fig.add_subplot(gs[0])
plt.grid()
ax1.plot(x_array, y_array, "ro")
ax1.plot(x_array, _2gaussian(x_array, *popt_2gauss), 'k--')#,\
# # peak 1
ax1.plot(x_array, gauss_peak_1, "g")
ax1.fill_between(x_array, gauss_peak_1.min(), gauss_peak_1, facecolor="green", alpha=0.5)
# # peak 2
ax1.plot(x_array, gauss_peak_2, "y")
ax1.fill_between(x_array, gauss_peak_2.min(), gauss_peak_2, facecolor="yellow",\ alpha=0.5)
# prints the fitting parameters with their errors
print("-------------Peak 1-------------")
print("amplitude = %0.2f (+/-) %0.2f" % (pars_1[0], perr_2gauss[0]))
print("sigma = %0.2f (+/-) %0.2f" % (pars_1[1], perr_2gauss[1]))
print("area = %0.2f" % np.trapz(gauss_peak_1))
print("-------------Peak 2-------------")
print("amplitude = %0.2f (+/-) %0.2f" % (pars_2[0], perr_2gauss[2]))
print("sigma = %0.2f (+/-) %0.2f" % (pars_2[1], perr_2gauss[3]))
print("area = %0.2f" % np.trapz(gauss_peak_2))
This is the result. I can plot the gauss fitting but the 2nd gauss seems to be wrong because the shape is much larger than the data. What should I do in this case?

Don't rewrite _1gaussian2 as a near-identical implementation of _1gaussian1.
Don't hard-code your time offsets - unless these come from real experimental settings, leave them as degrees of freedom for your fit. Same for the 0.2 vertical offset.
Don't throw away your power data (the column headings). If I interpret them correctly, they're positive or negative power in milliwatts.
Do a parametric fit for every power level; you'll see that the parameters you had fixed should actually be variable.
Don't hard-code the slice 33-34. Use the whole data. Model the function as a simultaneous sum of two Gaussians.
import re
from typing import Iterator
import numpy as np
from matplotlib import pyplot as plt
from scipy.optimize import curve_fit
sqrt2pi = np.sqrt(2 * np.pi)
def parse_power(filename: str) -> Iterator[float]:
pat = re.compile(r'(neg|pos)_(\d+)_(\d+)mW')
with open(filename) as f:
header = next(f)
for match in pat.finditer(header):
magnitude = 1e-3 * float(match.expand(r'\2.\3'))
if match.group(1) == 'neg':
yield -magnitude
else:
yield magnitude
def gaussian(theta: np.ndarray, amp: float, offx: float, sigma: float) -> np.ndarray:
return amp * np.exp(-0.5 * ((theta - offx) / sigma)**2)
def double_gaussian(
theta: np.ndarray,
amp1: float, amp2: float,
offx1: float, offx2: float,
sigma1: float, sigma2: float,
offy: float) -> np.ndarray:
return (
offy
+ gaussian(theta, amp1, offx1, sigma1)
+ gaussian(theta, amp2, offx2, sigma2)
)
def main() -> None:
filename = 'test_run09.csv'
powers = np.array(tuple(parse_power(filename)))
order = powers.argsort()
powers = powers[order]
data = np.loadtxt(filename, encoding="utf-8", delimiter=',', skiprows=1)
theta = data[:, 1]
# Sort columns by power
data[:, 2:] = data[:, 2:][:, order]
amp01 = 3
amp02 = 9
offx01 = 32.2
offx02 = 33.5
sigma01 = 0.1
sigma02 = 0.1
offy0 = 0.2
init = (amp01, amp02, offx01, offx02, sigma01, sigma02, offy0)
parameters = np.empty((len(powers), len(init)))
for j in range(2, data.shape[1]):
y = data[:, j]
result, _ = curve_fit(
f=double_gaussian,
xdata=theta,
ydata=y,
p0=init,
)
parameters[j-2, :] = result
def each_fit_plot():
plt.figure()
plt.scatter(theta, y)
plt.plot(theta, double_gaussian(theta, *result), 'orange')
# each_fit_plot()
def plot_param(ylabel, start, end):
plt.figure()
plt.plot(powers, parameters[:, start:end])
plt.title('Parameters, first and second Gaussian pulse')
plt.xlabel('Power')
plt.ylabel(ylabel)
plot_param('Amplitude', 0, 2)
plot_param('Peak time', 2, 4)
plot_param('Sigma', 4, 6)
plot_param('Shared vertical offset', 6, 7)
plt.show()
if __name__ == '__main__':
main()
If you generalise to a pseudogaussian pulse replacing **2 with abs()**p, the fit improves greatly:

Related

How to stop the roots of a cubic from becoming mixed up when plotting the 3 roots as contour plots?

I have a function which determines the roots of a complex cubic. I am solving the cubic for a variety of k0 and k1 values and showing the solutions as contour plots. Since the cubic has three roots, I produce 3 contour plots for the real parts and 3 for the imaginary parts. However, sometimes you can clearly see that sections of the contour plots for one root really should be swapped with a different contour plot - all the contours should be continuous. I have tried various "sorting methods" which you can see, but none of them fully fix it. What can I do so that the roots don't get mixed up resulting in non-continuous contours.
import numpy as np
import matplotlib.pyplot as plt
# Constants
Ra = 2e4
Pr = 0.1
Omega = 1e5
zeta = 1e-4
deltaN = 0.05
L = 55
def polynomial(k):
m = 1
delta_k = m**2 * np.pi**2 + k[0]**2
a_3 = delta_k
a_2 = 1j*(Ra * Pr * delta_k * k[0])/Omega + (Pr + zeta + 1)*delta_k**2
a_1 = 1j*(Ra * Pr * delta_k**2 * k[0] * (Pr + zeta)/Omega) + k[1] * Pr * zeta * (delta_k**2/L**2 + delta_k) - deltaN * Ra * Pr * k[0]**2 + (Pr * zeta + Pr + zeta) * delta_k**3
a_0 = 1j*(Pr * zeta * k[0] * (Ra * Pr * delta_k**3/Omega + k[1] * Omega * deltaN * delta_k / L**2)) + Pr * zeta * (k[1] * (Pr * delta_k**3 / L**2 + delta_k**2) - deltaN * Ra * delta_k * k[0]**2 + delta_k**4)
x_K = np.roots([a_3, a_2, a_1, a_0])
# x_K = np.sort_complex(x_K)
x_K = sorted(x_K, key=lambda x: x.imag)
# x_K = sorted(x_K, key=lambda x: x.real)
# if x_K[2].imag >= 0:
# x_K[-1], x_K[-2] = x_K[-2], x_K[-1]
# if x_K[0].imag >= x_K[2].imag:
# x_K[0], x_K[-1] = x_K[-1], x_K[0]
if x_K[0].real >= x_K[1].real:
x_K[0], x_K[1] = x_K[1], x_K[0]
# if x_K[1].real >= x_K[2].real:
# x_K[1], x_K[2] = x_K[2], x_K[1]
return x_K
# Create arrays of k[0] and k[1] values for contour plot
k0, k1 = np.linspace(0, 5, 100), np.linspace(0, 5e2, 100)
K0, K1 = np.meshgrid(k0, k1)
# Get roots for each pair of k[0], k[1] value
roots = np.array([polynomial([K0[i, j], K1[i, j]]) for i in range(100) for j in range(100)], dtype=complex)
ky_max = []
Qz_max = []
# Plot real and imaginary parts of roots separately in one figure
fig, axs = plt.subplots(2, 3, figsize=(13.6, 7.6), constrained_layout=True)
axs = axs.ravel()
for i in range(3):
cnt = axs[i].contourf(K0, K1, roots[:, i].real.reshape(K0.shape), levels=20, cmap='coolwarm')
axs[i].set_title(f'Real part of root {i+1}')
axs[i].set_xlabel('$k_y$')
axs[i].set_ylabel('$Q_z$')
# axs[i].set_yscale('log')
fig.colorbar(cnt, ax=axs[i])
cnt = axs[i+3].contourf(K0, K1, roots[:, i].imag.reshape(K0.shape), levels=20, cmap='coolwarm')
axs[i+3].set_title(f'Imaginary part of root {i+1}')
axs[i+3].set_xlabel('$k_y$')
axs[i+3].set_ylabel('$Q_z$')
# axs[i+3].set_yscale('log')
cbar1 = fig.colorbar(cnt, ax=axs[i+3])
cbar1.formatter.set_powerlimits((0, 0))
max_val = np.max(roots[:, i].real)
print(f'Maximum value for real part of root {i+1} is: {max_val}')
max_val = np.max(roots[:, i].real)
max_index = np.argmax(roots[:, i].real)
k0_max, k1_max = K0.flatten()[max_index], K1.flatten()[max_index]
axs[i].scatter(k0_max, k1_max, s=150, color='yellow', marker='x', label=f'Max value {max_val:.4f}')
axs[i].legend(loc=0)
ky_max.append(K0.flatten()[max_index])
Qz_max.append(K1.flatten()[max_index])
print(f'k_y for root {i+1} is: {k0_max}')
print(f'Q_z for root {i+1} is: {k1_max}')
for axis in ['top','bottom','left','right']:
axs[2].spines[axis].set_linewidth(3)
axs[2].spines[axis].set_color("green")
axs[5].spines[axis].set_linewidth(3)
axs[5].spines[axis].set_color("green")
# Create a caption
caption = f'Contour plot showing the real and imaginary components of the roots of the cubic for a range of $k_y$ and $Q_z$ values. Where the other variables are given by: Ra$^* = $ {Ra:.1e}, $\Delta N =$ {deltaN}, Pr = {Pr:.1e}, $\zeta =$ {zeta:.1e}, $\Omega =$ {Omega:.1e}, $L$ = {L}.'
# Create a file name
figure_name = f'decay_contour_Ra={Ra:.1e}_Pr={Pr:.1e}_dN={deltaN}'
pdf_file = f'{figure_name}.pdf'
tex_file = f'{figure_name}.tex'
# save the plot as a PDF
plt.savefig(pdf_file)
# create a text file containing the LaTeX code to include the figure
with open(tex_file, 'w') as f:
f.write("\\begin{figure}[h]\n")
f.write("\\centering\n")
f.write("\\includegraphics[width=0.85\linewidth]{"+ pdf_file+"}\n")
f.write("\\caption{"+ caption +"}\n")
f.write("\\end{figure}\n")
fig2, axs2 = plt.subplots(2, 3, figsize=(11, 8), constrained_layout=True)
for idx_1 in range(3):
k1_slice = 0
indices = np.where(K1.flatten() == k1_slice)
root_slice = roots[indices][:,idx_1].real
k1_slice = K0.flatten()[indices]
root_slice = roots[indices][:,idx_1].real
axs2[0][idx_1].plot(k1_slice, root_slice, color = 'red')
k1_slice_imag = K0.flatten()[indices]
root_slice_imag = roots[indices][:,idx_1].imag
axs2[1][idx_1].plot(k1_slice, root_slice_imag, color = 'red')
axs2[1][idx_1].set_xlabel('$k_y$')
axs2[0][0].set_ylabel('Re$(s)$')
axs2[1][0].set_ylabel('Im$(s)$')
for idx_1 in range(3):
axs2[0][idx_1].plot(k0, -zeta*(np.pi**2 + k0**2), 'x', markevery=10, color = 'black')
# Create a caption
caption = f'Profiles at the $k_y$ at $Q_z = 0$ showing the real and imaginary components of the roots of the cubic for a range of $k_y$ and $Q_z$ values. Where the other variables are given by: Ra$^* = $ {Ra:.1e}, $\Delta N =$ {deltaN}, Pr = {Pr:.1e}, $\zeta =$ {zeta:.1e}, $\Omega =$ {Omega:.1e}, $L$ = {L}.'
# Create a file name
figure_name = f'decay_profiles_Ra={Ra:.1e}_Pr={Pr:.1e}_dN={deltaN}'
pdf_file = f'{figure_name}.pdf'
tex_file = f'{figure_name}.tex'
# create a text file containing the LaTeX code to include the figure
with open(tex_file, 'w') as f:
f.write("\\begin{figure}[h]\n")
f.write("\\centering\n")
f.write("\\includegraphics[width=0.99\linewidth]{"+ pdf_file+"}\n")
f.write("\\caption{"+ caption +"}\n")
f.write("\\end{figure}\n")
for axis in ['top','bottom','left','right']:
axs2[0][2].spines[axis].set_linewidth(3)
axs2[0][2].spines[axis].set_color("green")
axs2[1][2].spines[axis].set_linewidth(3)
axs2[1][2].spines[axis].set_color("green")
# save the plot as a PDF
plt.savefig(pdf_file)
plt.show()
I've tried np.sort, np.sorted, flapping the roots using if statements etc, nothing works 100%
For two successive polynomials P and Q, I suggest simply solving the assignment problem to pair each root of P to the closest root of Q.
You can use scipy's linear_sum_assignment along with distance_matrix to find the best assignment of P's roots with Q's roots.
import numpy as np
from scipy.optimize import linear_sum_assignment
from scipy.spatial import distance_matrix
import matplotlib.pyplot as plt
def get_root_sequence(sequence_of_polynomials):
r0 = np.roots(sequence_of_polynomials[0])
roots = [r0]
for P in sequence_of_polynomials[1:]:
r1 = np.roots(P)
_, idx = linear_sum_assignment(distance_matrix(r0.reshape(3, 1), r1.reshape(3,1)))
r1 = r1[idx]
roots.append(r1)
r0 = r1
return np.array(roots)
sequence_of_polynomials = np.linspace((1,0,0,-1), (1,-7-2j,15+9j,-10-10j), 100)
roots = get_root_sequence(sequence_of_polynomials)
plt.axes().set_aspect('equal')
for i in range(3):
r = roots[:, i]
ordinal = ('first', 'second', 'third')[i]
plt.plot(r.real, r.imag, label=f'{ordinal} root')
for triangle, label in zip((roots[0], roots[-1]), ('x³-1', '(x-2)(x-2-i)(x-3-i)')):
triangle = triangle[[0,1,2,0]]
plt.plot(triangle.real, triangle.imag, label=label)
plt.legend(loc='best')
plt.xlabel('Real part')
plt.ylabel('Imaginary part')
plt.show()

Low pass filter with gaussian weighting in Python

I am trying to apply a low pass filter to a signal in the frequency domain, with gaussian weighting/roll off curve.
I got the first version of the for loop from my collegues and I admit that I do not understand it. Why is the sigma definition the way it is with the logarythm? I would just put it the Gauss function as I read it in wikipedia (second version).
Why is the resulting filter characteristic a little offset (by approx. 0.2?) for both versions and why is the result 0 for the second?
the code:
import numpy as np
import matplotlib.pyplot as plt
from scipy.fftpack import fft
from scipy.fftpack import ifft
data = np.loadtxt("profil.txt")
t = data[:,0]
x = data[:,1]
amplitudes = fft(x)
c = np.abs(amplitudes)
c_norm = 2 * c/len(t)
c[0] /= 2
c_half = c[0:len(amplitudes)//2]
f = np.fft.fftfreq(len(t), d = t[len(t)-1]/(len(t)-1))
p_t = np.zeros(len(amplitudes))
sigma = np.sqrt(np.log(0.5)/(-2.))/np.pi/10 #FIRST VERSION
s = 0.7 #SECOND VERSION
for i in range(len(c)):
e = np.exp(-2*np.pi**2*sigma**2*f[i]**2) #FIRST VERSION
#e = np.e**(-f[i]**2/2/s**2)/s/np.sqrt(2*np.pi) #SECOND VERSION
if e > 0.1:
#FIRST VERSION
p_t[i] = np.exp(-2*np.pi**2*sigma**2*f[i]**2)
p_t[-i] = np.exp(-2 * np.pi ** 2 * sigma ** 2 * f[i] ** 2)
#SECOND VERSION:
#p_t[i] = np.e**(-f[i]**2/2/s**2)/s/np.sqrt(2*np.pi)
#p_t[-i] = np.e ** (-f[i] ** 2 / 2 / s ** 2)/s/np.sqrt(2*np.pi)
else:
pass
x_filtered = ifft(amplitudes * p_t)
fig = plt.figure(constrained_layout = True)
gs = fig.add_gridspec(2, 1)
a00 = fig.add_subplot(gs[0, 0])
a00.plot(t, x,color='blue', label="original")
a00.plot(t,x_filtered, color='red', label="filtered")
a00.legend(bbox_to_anchor=(0,1,1,0), loc="lower left", ncol = 1)
a01 = fig.add_subplot(gs[1, 0])
a01.plot(f,c, label="amplitudes")
a01.set_ylim([0,1000])
a01.legend()
a03 = a01.twinx()
a03.plot(f, p_t, color='red', label='filter characteristic')
plt.show()
thanks in advance for any idea or explanation. The resulting plot (first version):
second version:

Reconstructing polynomials from scipy.interpolate.RectBivariateSpline

I have fitted a 2-D cubic spline using scipy.interpolate.RectBivariateSpline. I would like to access/reconstruct the underlying polynomials within each rectangular cell. How can I do this? My code so far is written below.
I have been able to get the knot points and the coefficients with get_knots() and get_coeffs() so it should be possible to build the polynomials, but I do not know the form of the polynomials that the coefficients correspond to. I tried looking at the SciPy source code but I could not locate the underlying dfitpack.regrid_smth function.
A code demonstrating the fitting:
import numpy as np
from scipy.interpolate import RectBivariateSpline
# Evaluate a demonstration function Z(x, y) = sin(sin(x * y)) on a mesh
# of points.
x0 = -1.0
x1 = 1.0
n_x = 11
x = np.linspace(x0, x1, num = n_x)
y0 = -2.0
y1 = 2.0
n_y = 21
y = np.linspace(y0, y1, num = n_y)
X, Y = np.meshgrid(x, y, indexing = 'ij')
Z = np.sin(np.sin(X * Y))
# Fit the sampled function using SciPy's RectBivariateSpline.
order_spline = 3
smoothing = 0.0
spline_fit_func = RectBivariateSpline(x, y, Z,
kx = order_spline, ky = order_spline, s = smoothing)
And to plot it:
import matplotlib.pyplot as plt
# Make axes.
fig, ax_arr = plt.subplots(1, 2, sharex = True, sharey = True, figsize = (12.0, 8.0))
# Plot the input function.
ax = ax_arr[0]
ax.set_aspect(1.0)
d_x = x[1] - x[0]
x_edges = np.zeros(n_x + 1)
x_edges[:-1] = x - (d_x / 2.0)
x_edges[-1] = x[-1] + (d_x / 2.0)
d_y = y[1] - y[0]
y_edges = np.zeros(n_y + 1)
y_edges[:-1] = y - (d_y / 2.0)
y_edges[-1] = y[-1] + (d_y / 2.0)
ax.pcolormesh(x_edges, y_edges, Z.T)
ax.set_title('Input function')
# Plot the fitted function.
ax = ax_arr[1]
ax.set_aspect(1.0)
n_x_span = n_x * 10
x_span_edges = np.linspace(x0, x1, num = n_x_span)
x_span_centres = (x_span_edges[1:] + x_span_edges[:-1]) / 2.0
#
n_y_span = n_y * 10
y_span_edges = np.linspace(y0, y1, num = n_y_span)
y_span_centres = (y_span_edges[1:] + y_span_edges[:-1]) / 2.0
Z_fit = spline_fit_func(x_span_centres, y_span_centres)
ax.pcolormesh(x_span_edges, y_span_edges, Z_fit.T)
x_knot, y_knot = spline_fit_func.get_knots()
X_knot, Y_knot = np.meshgrid(x_knot, y_knot)
# Plot the knots.
ax.scatter(X_knot, Y_knot, s = 1, c = 'r')
ax.set_title('Fitted function and knots')
plt.show()

How to plot same function with many different values in subplots in numpy/matplotlib python?

I have following python code, and would like to:
Plot the same function in 1 (only one) figure with many different (lets say 4) 'v0' and 'theta' values, each trajectory in a different color.
Make 4 plots in 4 different figures, so that it looks like a square with 4 plots of 4 different 'v0' and 'theta' values
Make a widget to vary the v0 and theta values as the user wants with the mouse.
import numpy as np
import scipy.integrate as integrate
import matplotlib.pyplot as plt
%matplotlib inline
theta = 45.
theta = theta * np.pi/180.
v0 = 20.0
g = 9.81
R = 0.035
m = 0.057
rho = 1.2041
C = 0.5
k = (0.5*np.pi*R**2*C*rho)/m
x0=0
y0=10
vx0 = v0*np.sin(theta)
vy0 =
v0*np.cos(theta)
print(vx0)
print(vy0)
def f_func(X_vek,time):
f = np.zeros(4)
f[0] = X_vek[2]
f[1] = X_vek[3]
f[2] = - k*(f[0]**2 + f[1]**2)**(0.5)*f[0]
f[3] = -g - k*(f[0]**2 + f[1]**2)**(0.5)*f[1]
return f
X0 = [ x0, y0, vx0, vy0]
t0 = 0. tf = 10
tau = 0.05
t = np.arange(t0,tf,tau)
X = integrate.odeint(f_func,X0,t)
x = X[:,0]
y = X[:,1]
vx = X[:,2]
vy = X[:,3]
mask = y >= 0
plt.scatter(x[mask],y[mask])
plt.scatter(x[mask],y[mask])
plt.xlabel('x') plt.ylabel('y') plt.show()
I could do point 1 and 2 of my question with changing the values after plotting, then calculate vx0 and vy0 again and then call the integrate function and finally plot again, but that's kinda weird and not clean. Is there any better way to do that? like an array of different v0 and theta values or something?
Thanks!
Make your code as a function:
def func(theta=45, v0=20):
theta = theta * np.pi/180.
g = 9.81
R = 0.035
m = 0.057
rho = 1.2041
C = 0.5
k = (0.5*np.pi*R**2*C*rho)/m
x0=0
y0=10
vx0 = v0*np.sin(theta)
vy0 = v0*np.cos(theta)
def f_func(X_vek,time):
f0, f1 = X_vek[2:4].tolist()
f2 = - k*(f0**2 + f1**2)**(0.5)*f0
f3 = -g - k*(f0**2 + f1**2)**(0.5)*f1
return [f0, f1, f2, f3]
X0 = [ x0, y0, vx0, vy0]
t0 = 0.
tf = 10
tau = 0.05
t = np.arange(t0,tf,tau)
X = integrate.odeint(f_func,X0,t)
x = X[:,0]
y = X[:,1]
vx = X[:,2]
vy = X[:,3]
mask = y >= 0
return x[mask], y[mask]
then you can plot it with different parameters:
plt.plot(*func())
plt.plot(*func(theta=30))
plt.xlabel('x')
plt.ylabel('y')
plt.show()
I suggest you use Holoviews to make dynamic graph:
import holoviews as hv
hv.extension("bokeh")
hv.DynamicMap(
lambda theta, v0:hv.Curve(func(theta, v0)).redim.range(x=(0, 50), y=(0, 50)),
kdims=[hv.Dimension("theta", range=(0, 80), default=40),
hv.Dimension("v0", range=(1, 40), default=20)])
Here is the result:

emcee walkers burn in but then remain the same

I'm having an issue using emcee. Its a simple enough 3 parameter fit but occasionally (only has occurred in two scenarios so far despite much use) my walkers burn in just fine but then do not move (see figure below). The acceptance fraction reported is 0.
Has anyone else encountered this issue before? I have tried varying my initial conditions and number of walkers and iterations etc. This piece of code has been running well on very similar data sets. Its not a challenging parameter space and it seems unlikely that the walker would be getting "stuck".
Any ideas? I'm stumped... my walkers are lazy it seems...
Sample code below (and sample data file). This code + data file fail when I run it.
`
import numpy as n
import math
import pylab as py
import matplotlib.pyplot as plt
import scipy
from scipy.optimize import curve_fit
from scipy import ndimage
import pyfits
from scipy import stats
import emcee
import corner
import scipy.optimize as op
import matplotlib.pyplot as pl
from matplotlib.ticker import MaxNLocator
def sersic(x, B,r_s,m):
return B * n.exp(-1.0 * (1.9992*m - 0.3271) * ( (x/r_s)**(1.0/m) - 1.))
def lnprior(theta):
B,r_s,m, lnf = theta
if 0.0 < B < 500.0 and 0.5 < m < 10. and r_s > 0. and -10.0 < lnf < 1.0:
return 0.0
return -n.inf
def lnlike(theta, x, y, yerr): #"least squares"
B,r_s,m, lnf = theta
model = sersic(x,B, r_s, m)
inv_sigma2 = 1.0/(yerr**2 + model**2*n.exp(2*lnf))
return -0.5*(n.sum((y-model)**2*inv_sigma2 - n.log(inv_sigma2)))
def lnprob(theta, x, y, yerr):#kills based on priors
lp = lnprior(theta)
if not n.isfinite(lp):
return -n.inf
return lp + lnlike(theta, x, y, yerr)
profile=open("testprofile.dat",'r') #read in the data file
profilelines=profile.readlines()
x=n.empty(len(profilelines))
y=n.empty(len(profilelines))
yerr=n.empty(len(profilelines))
for i,line in enumerate(profilelines):
col=line.split()
x[i]=col[0]
y[i]=col[1]
yerr[i]=col[2]
# Find the maximum likelihood value.
chi2 = lambda *args: -2 * lnlike(*args)
result = op.minimize(chi2, [50,2.0,0.5,0.5], args=(x, y, yerr))
B_ml, rs_ml,m_ml, lnf_ml = result["x"]
print("""Maximum likelihood result:
B = {0}
r_s = {1}
m = {2}
""".format(B_ml, rs_ml,m_ml))
# Set up the sampler.
ndim, nwalkers = 4, 4000
pos = [result["x"] + 1e-4*n.random.randn(ndim) for i in range(nwalkers)]
sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, args=(x, y, yerr))
# Clear and run the production chain.
print("Running MCMC...")
Niter = 2000 #2000
sampler.run_mcmc(pos, Niter, rstate0=n.random.get_state())
print("Done.")
# Print out the mean acceptance fraction.
af = sampler.acceptance_fraction
print "Mean acceptance fraction:", n.mean(af)
# Plot sampler chain
pl.clf()
fig, axes = pl.subplots(3, 1, sharex=True, figsize=(8, 9))
axes[0].plot(sampler.chain[:, :, 0].T, color="k", alpha=0.4)
axes[0].yaxis.set_major_locator(MaxNLocator(5))
axes[0].set_ylabel("$B$")
axes[1].plot(sampler.chain[:, :, 1].T, color="k", alpha=0.4)
axes[1].yaxis.set_major_locator(MaxNLocator(5))
axes[1].set_ylabel("$r_s$")
axes[2].plot(n.exp(sampler.chain[:, :, 2]).T, color="k", alpha=0.4)
axes[2].yaxis.set_major_locator(MaxNLocator(5))
axes[2].set_xlabel("step number")
fig.tight_layout(h_pad=0.0)
fig.savefig("line-time_test.png")
# plot MCMC fit
burnin = 100
samples = sampler.chain[:, burnin:, :3].reshape((-1, ndim-1))
B_mcmc, r_s_mcmc, m_mcmc = map(lambda v: (v[0]),
zip(*n.percentile(samples, [50],
axis=0)))
print("""MCMC result:
B = {0}
r_s = {1}
m = {2}
""".format(B_mcmc, r_s_mcmc, m_mcmc))
pl.close()
# Make the triangle plot.
burnin = 50
samples = sampler.chain[:, burnin:, :3].reshape((-1, ndim-1))
fig = corner.corner(samples, labels=["$B$", "$r_s$", "$m$"])
fig.savefig("line-triangle_test.png")
Here's a better result. I made the random initial samples not so close to the maximum likelihood value and run the chain for a lot more steps with fewer walkers/chains. Notice that I'm plotting the m parameter and not its exponential, as you did.
The mean acceptance fraction is ~0.48, and it took about 1 min to run in my laptop. You can of course add more steps and get an even better result.
import numpy as n
import emcee
import corner
import scipy.optimize as op
import matplotlib.pyplot as pl
from matplotlib.ticker import MaxNLocator
def sersic(x, B, r_s, m):
return B * n.exp(
-1.0 * (1.9992 * m - 0.3271) * ((x / r_s)**(1.0 / m) - 1.))
def lnprior(theta):
B, r_s, m, lnf = theta
if 0.0 < B < 500.0 and 0.5 < m < 10. and r_s > 0. and -10.0 < lnf < 1.0:
return 0.0
return -n.inf
def lnlike(theta, x, y, yerr): # "least squares"
B, r_s, m, lnf = theta
model = sersic(x, B, r_s, m)
inv_sigma2 = 1.0 / (yerr**2 + model**2 * n.exp(2 * lnf))
return -0.5 * (n.sum((y - model)**2 * inv_sigma2 - n.log(inv_sigma2)))
def lnprob(theta, x, y, yerr): # kills based on priors
lp = lnprior(theta)
if not n.isfinite(lp):
return -n.inf
return lp + lnlike(theta, x, y, yerr)
profile = open("testprofile.dat", 'r') # read in the data file
profilelines = profile.readlines()
x = n.empty(len(profilelines))
y = n.empty(len(profilelines))
yerr = n.empty(len(profilelines))
for i, line in enumerate(profilelines):
col = line.split()
x[i] = col[0]
y[i] = col[1]
yerr[i] = col[2]
# Find the maximum likelihood value.
chi2 = lambda *args: -2 * lnlike(*args)
result = op.minimize(chi2, [50, 2.0, 0.5, 0.5], args=(x, y, yerr))
B_ml, rs_ml, m_ml, lnf_ml = result["x"]
print("""Maximum likelihood result:
B = {0}
r_s = {1}
m = {2}
lnf = {3}
""".format(B_ml, rs_ml, m_ml, lnf_ml))
# Set up the sampler.
ndim, nwalkers = 4, 10
pos = [result["x"] + 1e-1 * n.random.randn(ndim) for i in range(nwalkers)]
sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, args=(x, y, yerr))
# Clear and run the production chain.
print("Running MCMC...")
Niter = 50000
sampler.run_mcmc(pos, Niter, rstate0=n.random.get_state())
print("Done.")
# Print out the mean acceptance fraction.
af = sampler.acceptance_fraction
print("Mean acceptance fraction:", n.mean(af))
# Plot sampler chain
pl.clf()
fig, axes = pl.subplots(3, 1, sharex=True, figsize=(8, 9))
axes[0].plot(sampler.chain[:, :, 0].T, color="k", alpha=0.4)
axes[0].yaxis.set_major_locator(MaxNLocator(5))
axes[0].set_ylabel("$B$")
axes[1].plot(sampler.chain[:, :, 1].T, color="k", alpha=0.4)
axes[1].yaxis.set_major_locator(MaxNLocator(5))
axes[1].set_ylabel("$r_s$")
# axes[2].plot(n.exp(sampler.chain[:, :, 2]).T, color="k", alpha=0.4)
axes[2].plot(sampler.chain[:, :, 2].T, color="k", alpha=0.4)
axes[2].yaxis.set_major_locator(MaxNLocator(5))
axes[2].set_ylabel("$m$")
axes[2].set_xlabel("step number")
fig.tight_layout(h_pad=0.0)
fig.savefig("line-time_test.png")
# plot MCMC fit
burnin = 10000
samples = sampler.chain[:, burnin:, :3].reshape((-1, ndim - 1))
B_mcmc, r_s_mcmc, m_mcmc = map(
lambda v: (v[0]), zip(*n.percentile(samples, [50], axis=0)))
print("""MCMC result:
B = {0}
r_s = {1}
m = {2}
""".format(B_mcmc, r_s_mcmc, m_mcmc))
pl.close()
# Make the triangle plot.
burnin = 50
samples = sampler.chain[:, burnin:, :3].reshape((-1, ndim - 1))
fig = corner.corner(samples, labels=["$B$", "$r_s$", "$m$"])
fig.savefig("line-triangle_test.png")

Categories