2D Gaussian Fitting to Histogram - python

I have the following function definition of a 2D Gaussian:
# Return a gaussian distribution at an angle alpha from the x-axis
# from astroML for use with curve_fit
def mult_gaussFun_Fit((x,y),*m):
A,x0,y0,varx,vary,rho,alpha = m
X,Y = np.meshgrid(x,y)
assert rho != 1
a = 1/(2*(1-rho**2))
Z = A*np.exp(-a*((X-x0)**2/(varx)+(Y-y0)**2/(vary)-(2*rho/(np.sqrt(varx*vary)))*(X-x0)*(Y-y0)))
return Z.ravel()
I use the following code to attempt a curve_fit of data drawn from a bivariate gaussian that is converted to a 2D histogram. I am receiving broadcast errors and I am not sure as to why this is happening.
import numpy as np
from scipy.optimize import curve_fit
import matplotlib.pyplot as plt
import gauss
import plotutils
# Produce a number of points in x-y from 1 distribution.
mean = [0,0]
cov = [[3,0],[0,1]]
N = 3000
x,y = np.random.multivariate_normal(mean,cov,N).T
# Prep bins for histogram
bin_size = 0.2
max_edge = 2.5*(np.sqrt(cov[0][0])+np.sqrt(cov[1][1]))
min_edge = -max_edge
bin_num = (max_edge-min_edge)/bin_size
bin_numPlus1 = bin_num + 1
bins = np.linspace(min_edge,max_edge,bin_numPlus1)
# Produce 2D histogram
H,xedges,yedges = np.histogram2d(x,y,bins,normed=False)
bin_centers_x = (xedges[:-1]+xedges[1:])/2.0
bin_centers_y = (yedges[:-1]+yedges[1:])/2.0
# Initial Guess
p0 = (H.max(),mean[0],mean[1],cov[0][0],cov[1][1],0.5,np.pi/4)
# Curve Fit parameters
coeff, var_matrix = curve_fit(gauss.mult_gaussFun_Fit,(bin_centers_x,bin_centers_y),H,p0=p0)
The error is:
Traceback (most recent call last):
File "/home/luis/Documents/SRC2014/galsim_work/2D_Gaussian_Estimate.py", line 44, in <module>
coeff, var_matrix = curve_fit(gauss.mult_gaussFun_Fit,(bin_centers_x,bin_centers_y),H,p0=p0)
File "/usr/local/lib/python2.7/dist-packages/scipy/optimize/minpack.py", line 555, in curve_fit
res = leastsq(func, p0, args=args, full_output=1, **kw)
File "/usr/local/lib/python2.7/dist-packages/scipy/optimize/minpack.py", line 369, in leastsq
shape, dtype = _check_func('leastsq', 'func', func, x0, args, n)
File "/usr/local/lib/python2.7/dist-packages/scipy/optimize/minpack.py", line 20, in _check_func
res = atleast_1d(thefunc(*((x0[:numinputs],) + args)))
File "/usr/local/lib/python2.7/dist-packages/scipy/optimize/minpack.py", line 445, in _general_function
return function(xdata, *params) - ydata
ValueError: operands could not be broadcast together with shapes (4624) (68,68)

I simply needed to perform
H = H.ravel()
and that solves it.

Related

Problem with plotting multiple functions with a for loop in matplotlib

I am using a for loop to plot a curve for each parameter-value (k) - this works just fine for all the negative k-values, but when the loop reaches the k values = 0 or greater, the lambdify function seems to collapse and I get an error stating the dimensions of x and y are not equal.
This is my code:
import sympy as sym
import numpy as np
import matplotlib.pyplot as plt
eta = np.logspace(-1,2,21) #defines eta values, 21 decades from 0.1 to 100
relrho = np.logspace(-2,2,25) #defines values of rho2/rho1, 25 values from 0.01 to 100
k = (relrho-1)/(relrho+1) #defines the reflection coefficient
#parameter of type curve is k
#rhoa/rho1 is the y-axis
#eta is the x-axis
#R is assigned as the ratio of rho_a to rho_1
#x is assigned to eta
#y is assigned to k
x = sym.symbols('x', real = True)
y = sym.symbols('y')
for y in k: #for-loop assumes k value before while-loop is run, then plots the curve, then new k value is assumed
n=1; R=1;
while n<=500:
Rnew = 2*x**3*y**n/(((2*n)**2+x**2)**(3/2))
R = R + Rnew
n = n + 1
R = sym.lambdify(x,R)
plt.loglog(eta, R(eta))
plt.show()
What is going wrong? I am completely at a dead end right now... clueless...
If I plot the curves individually I can plot them for any k-value just fine, but in the loop it collapses after 12 iterations.
runfile('C:/Users/aslak/OneDrive/Desktop/Typecurves.py', wdir='C:/Users/aslak/OneDrive/Desktop')
Traceback (most recent call last):
File "C:\Users\aslak\OneDrive\Desktop\Typecurves.py", line 44, in <module>
plt.loglog(eta, R(eta))
File "C:\Users\aslak\anaconda3\lib\site-packages\matplotlib\pyplot.py", line 2750, in loglog
return gca().loglog(*args, **kwargs)
File "C:\Users\aslak\anaconda3\lib\site-packages\matplotlib\axes\_axes.py", line 1868, in loglog
return self.plot(
File "C:\Users\aslak\anaconda3\lib\site-packages\matplotlib\axes\_axes.py", line 1743, in plot
lines = [*self._get_lines(*args, data=data, **kwargs)]
File "C:\Users\aslak\anaconda3\lib\site-packages\matplotlib\axes\_base.py", line 273, in __call__
yield from self._plot_args(this, kwargs)
File "C:\Users\aslak\anaconda3\lib\site-packages\matplotlib\axes\_base.py", line 399, in _plot_args
raise ValueError(f"x and y must have same first dimension, but "
ValueError: x and y must have same first dimension, but have shapes (21,) and (1,)
runfile('C:/Users/aslak/OneDrive/Desktop/Typecurves.py', wdir='C:/Users/aslak/OneDrive/Desktop')
Traceback (most recent call last):
File "C:\Users\aslak\OneDrive\Desktop\Typecurves.py", line 34, in <module>
plt.loglog(eta, R(eta))
File "C:\Users\aslak\anaconda3\lib\site-packages\matplotlib\pyplot.py", line 2750, in loglog
return gca().loglog(*args, **kwargs)
File "C:\Users\aslak\anaconda3\lib\site-packages\matplotlib\axes\_axes.py", line 1868, in loglog
return self.plot(
File "C:\Users\aslak\anaconda3\lib\site-packages\matplotlib\axes\_axes.py", line 1743, in plot
lines = [*self._get_lines(*args, data=data, **kwargs)]
File "C:\Users\aslak\anaconda3\lib\site-packages\matplotlib\axes\_base.py", line 273, in __call__
yield from self._plot_args(this, kwargs)
File "C:\Users\aslak\anaconda3\lib\site-packages\matplotlib\axes\_base.py", line 399, in _plot_args
raise ValueError(f"x and y must have same first dimension, but "
ValueError: x and y must have same first dimension, but have shapes (21,) and (1,)
The problem is that when y=0, then 2*x**3*y**n/(((2*n)**2+x**2)**(3/2)) will be 0, hence R will be zero. When you lambdify it and pass in a numpy array, it will return the scalar value 0. We need to take into account this fact. Note that in the following code block I also optimize for speed: only one symbolic addition will be executed for every iteration of the for loop.
import sympy as sym
import numpy as np
import matplotlib.pyplot as plt
eta = np.logspace(-1,2,21) #defines eta values, 21 decades from 0.1 to 100
relrho = np.logspace(-2,2,25) #defines values of rho2/rho1, 25 values from 0.01 to 100
k = (relrho-1)/(relrho+1) #defines the reflection coefficient
#parameter of type curve is k
#rhoa/rho1 is the y-axis
#eta is the x-axis
#R is assigned as the ratio of rho_a to rho_1
#x is assigned to eta
#y is assigned to k
x = sym.symbols('x', real = True)
y = sym.symbols('y')
for y in k: #for-loop assumes k value before while-loop is run, then plots the curve, then new k value is assumed
print("y", y)
n=1; R=1;
addends = []
while n<=500:
addends.append(2*x**3*y**n/(((2*n)**2+x**2)**(3/2)))
n = n + 1
R = Add(R, *addends)
R = sym.lambdify(x,R)
R_val = R(eta)
if not hasattr(R_val, "__iter__"):
R_val = R_val * np.ones_like(eta)
plt.loglog(eta, R_val)
plt.show()
Davide_sd pointed out why the code fails, but in this case it is faster to avoid sympy altogether:
import numpy as np
import matplotlib.pyplot as plt
eta = np.logspace(-1, 2, 21)
relrho = np.logspace(-2, 2, 25)
k = ((relrho - 1) / (relrho + 1))
n = np.r_[1:501]
y = k[None, :, None]
x = eta[:, None, None]
R = (2 * x**3 * y**n / (((2 * n)**2 + x**2)**(3 / 2))).sum(axis=2) + 1
plt.loglog(eta, R)
plt.show()

Error non-linear-regression python curve-fit

Hello guys i want to make non-linear regression in python with curve fit
this is my code:
#fit a fourth degree polynomial to the economic data
from numpy import arange
from scipy.optimize import curve_fit
from matplotlib import pyplot
import math
x = [17.47,20.71,21.08,18.08,17.12,14.16,14.06,12.44,11.86,11.19,10.65]
y = [5,35,65,95,125,155,185,215,245,275,305]
# define the true objective function
def objective(x, a, b, c, d, e):
return ((a)-((b)*(x/3-5)))+((c)*(x/305)**2)-((d)*(math.log(305))-math.log(x))+((e)*(math.log(305)-(math.log(x))**2))
popt, _ = curve_fit(objective, x, y)
# summarize the parameter values
a, b, c, d, e = popt
# plot input vs output
pyplot.scatter(x, y)
# define a sequence of inputs between the smallest and largest known inputs
x_line = arange(min(x), max(x), 1)
# calculate the output for the range
y_line = objective(x_line, a, b, c, d, e)
# create a line plot for the mapping function
pyplot.plot(x_line, y_line, '--', color='red')
pyplot.show()
this is my error :
Traceback (most recent call last):
File "C:\Users\Fahmi\PycharmProjects\pythonProject\main.py", line 16, in
popt, _ = curve_fit(objective, x, y)
File "C:\Users\Fahmi\PycharmProjects\pythonProject\venv\lib\site-packages\scipy\optimize\minpack.py", line 784, in curve_fit
res = leastsq(func, p0, Dfun=jac, full_output=1, **kwargs)
File "C:\Users\Fahmi\PycharmProjects\pythonProject\venv\lib\site-packages\scipy\optimize\minpack.py", line 410, in leastsq
shape, dtype = _check_func('leastsq', 'func', func, x0, args, n)
File "C:\Users\Fahmi\PycharmProjects\pythonProject\venv\lib\site-packages\scipy\optimize\minpack.py", line 24, in _check_func
res = atleast_1d(thefunc(((x0[:numinputs],) + args)))
File "C:\Users\Fahmi\PycharmProjects\pythonProject\venv\lib\site-packages\scipy\optimize\minpack.py", line 484, in func_wrapped
return func(xdata, params) - ydata
File "C:\Users\Fahmi\PycharmProjects\pythonProject\main.py", line 13, in objective
return ((a)-((b)(x/3-5)))+((c)(x/305)**2)-((d)(math.log(305))-math.log(x))+((e)(math.log(305)-(math.log(x))**2))
TypeError: only size-1 arrays can be converted to Python scalars
thanks before
This is a known problem with the math library. Simply use numpy and your problem should be fixed as numpy functions have support for scalars and arrays.
#fit a fourth degree polynomial to the economic data
import numpy as np
from scipy.optimize import curve_fit
import matplotlib.pyplot as plt
x = [17.47,20.71,21.08,18.08,17.12,14.16,14.06,12.44,11.86,11.19,10.65]
y = [5,35,65,95,125,155,185,215,245,275,305]
# define the true objective function
def objective(x, a, b, c, d, e):
return ((a)-((b)*(x/3-5)))+((c)*(x/305)**2)-((d)*(np.log(305))-np.log(x))+((e)*(np.log(305)-(np.log(x))**2))
popt, _ = curve_fit(objective, x, y)
# summarize the parameter values
a, b, c, d, e = popt
# plot input vs output
plt.scatter(x, y)
# define a sequence of inputs between the smallest and largest known inputs
x_line = np.arange(np.min(x), np.max(x), 1)
# calculate the output for the range
y_line = objective(x_line, a, b, c, d, e)
# create a line plot for the mapping function
plt.plot(x_line, y_line, '--', color='red')
plt.show()

Random Number Generator Test of Uniformity Code problem plotting

Please tell me what is wrong in my code. Thank you.
It gives me this error when it is ran:
Traceback (most recent call last):
plt.plot(N, Test_Uniform(N))
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/matplotlib/pyplot.py", line 2813, in plot
is not None else {}), **kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/matplotlib/__init__.py", line 1810, in inner
return func(ax, *args, **kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/matplotlib/axes/_axes.py", line 1611, in plot
for line in self._get_lines(*args, **kwargs):
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/matplotlib/axes/_base.py", line 393, in _grab_next_args
yield from self._plot_args(this, kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/matplotlib/axes/_base.py", line 370, in _plot_args
x, y = self._xy_from_xy(x, y)
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/matplotlib/axes/_base.py", line 231, in _xy_from_xy
"have shapes {} and {}".format(x.shape, y.shape))
ValueError: x and y must have same first dimension, but have shapes (1000,) and (3,)
It needs to do this:
import matplotlib.pyplot as plt
import numpy as np
N=np.random.rand(1000)
k_array=np.array([1,3,7])
def Test_Uniform(N):
test_uni=np.array([])
for w in k_array:
test_uni_random=0
for i in N:
test_uni_random += (i**w)/(len(N))
test_uni=np.append(test_uni,test_uni_random)
return test_uni
def Test_uniform_Deviation(N):
new_sum=np.array([])
for z in k_array:
test_uni_rand=0
for q in N:
test_uni_rand += (((q**z)/len(N))-(1/(1+q)))
new_sum=np.append(new_sum,test_uni_rand)
mean_sum=new_sum/len(N)
return mean_sum
plt.plot(N, Test_Uniform(N))
plt.xlabel('N')
plt.xscale('log')
plt.ylabel('series')
plt.show()
plt.plot(N, Test_uniform_Deviation(N))
plt.xlabel('N')
plt.xscale('log')
plt.ylabel('series')
plt.show()
for each k, plot the mean value of r^k found vs. log(N) along a line for the expected limit 1/(1+k). and plot average deviation (second function) vs log(N).
The problem is you're plotting N (1000 numbers) against Test_Uniform(N) which returns an array of three numbers, one for each element of k_array:
test_uni=np.array([])
for w in k_array:
test_uni_random = 0
for i in N:
test_uni_random += i**w / len(N)
test_uni = np.append(test_uni, test_uni_random)
return test_uni
The test_uni array only contains one element for each element in k_array. If we were to invert the loops:
import matplotlib.pyplot as plt
import numpy as np
N = np.random.rand(1000)
k_array = np.array([1, 3, 7])
def Test_Uniform(N):
test_uni = np.array([])
for i in N:
test_uni_random = 0
for w in k_array:
test_uni_random += i ** w / len(N)
test_uni = np.append(test_uni, test_uni_random)
return test_uni
plt.plot(N, Test_Uniform(N))
plt.xlabel('N')
plt.xscale('log')
plt.ylabel('series')
plt.show()
It might not make any mathematical sense but at least N and Test_Uniform(N) now have the same number of elements and can be plotted:

From y(x) animation to y(t) at a given x

I am currently running code simulating a string's motion, and I wish to read its amplitude at a given point and plot this as a function of time, thus representing data that would be read from a guitar's pickup. As I am new to animation in python, the task of obtaining data while presenting this animation has proved a challenge. Here is the code:
import numpy as np
import matplotlib
matplotlib.use("TkAgg")
from matplotlib import pyplot as plt
from math import exp
#import matplotlib.animation as animation
#Power Spectrum
def PowerSpectrum(f):
return (f*f.conjugate()).real/len(f)**2
#triangular pulse
def triangular_pulse(x, xmean, sigma):
return np.where(x<xmean,x*sigma/xmean,sigma-(x-xmean)*(sigma/(200-xmean)))
N_masses = 200
T = 0.0669264714
mu = .03937
cSq = T/mu
c = np.sqrt(cSq)
dx = 1.0
dt = dx/c
print dt
#Initialize some arrays
x0 = np.arange(N_masses)*dx
y = np.zeros(N_masses)
vy = np.zeros(N_masses)
ay = np.zeros(N_masses)
#setup for animation
fig1 = plt.figure()
plt.ion()
wave, = plt.plot(x0,y)
plt.ylim(-30,30)
#Set Initial conditions (pluck)
# # half-pluck
# y = 30*gaussian_pulse(x0,x0[N_masses/2],2)
# quarter-pluck
y = triangular_pulse(x0,x0[N_masses/10],6)
yprev = y - vy*dt
y1 = []
t=0.0
i=0
while t<1000:
dydx = (y[1:] - y[:-1])/dx
ay[1:-1] = ( dydx[1:] - dydx[:-1] )/dx * cSq
# Notice we update both at the same time
yprev, y = y, exp(-.00001*t)*(2*y - yprev + ay * dt**2)
t = t + dt
y1.append(y[1])
i += 1
if i%1==0:
wave.set_data(x0,y)
plt.draw()
gauss_hat = np.fft.fft(y1)
freqs = np.fft.fftfreq(len(y1), d=1.0/100)
half_ps = PowerSpectrum(gauss_hat)
#half-pluck y[1] vs. t
plt.plot(range(1000), y1)
plt.xlabel('time')
plt.ylabel('y[1]')
plt.title('y[1] vs. t')
# power spectrum plots
#plt.plot(freqs,half_ps)
#plt.xlabel('frequency (Hz)')
#plt.ylabel('Intensity')
#plt.title('y[1] Power Spectrum (half pluck)')
#plt.title('y[1] Power Spectrum (quarter pluck)')
#plt.grid(True)
#plt.ioff()
plt.show()
The animation runs, but no plot is presented. I receive the error:
Traceback (most recent call last):
File "/Users/defaultuser/Downloads/compare.py", line 69, in <module>
plt.plot(range(1000), y1)
File "/System/Library/Frameworks/Python.framework/Versions/2.7/Extras/lib/python/matplotlib/pyplot.py", line 2467, in plot
ret = ax.plot(*args, **kwargs)
File "/System/Library/Frameworks/Python.framework/Versions/2.7/Extras/lib/python/matplotlib/axes.py", line 3893, in plot
for line in self._get_lines(*args, **kwargs):
File "/System/Library/Frameworks/Python.framework/Versions/2.7/Extras/lib/python/matplotlib/axes.py", line 322, in _grab_next_args
for seg in self._plot_args(remaining, kwargs):
File "/System/Library/Frameworks/Python.framework/Versions/2.7/Extras/lib/python/matplotlib/axes.py", line 300, in _plot_args
x, y = self._xy_from_xy(x, y)
File "/System/Library/Frameworks/Python.framework/Versions/2.7/Extras/lib/python/matplotlib/axes.py", line 240, in _xy_from_xy
raise ValueError("x and y must have same first dimension")
ValueError: x and y must have same first dimension
[Finished in 30.6s with exit code 1]
Update
That error no longer appears, as the code has been modified to plt.plot(y1) thus granting x and y the same dimension. After commenting out plotting code responsible for the animation, a good graph was obtained. Thus I learned the plt.plot function can't be called more than once and that subplot is required instead.
The first and last lines in the traceback:
File "/Users/defaultuser/Downloads/compare.py", line 69, in <module>
plt.plot(range(1000), y1)
...
raise ValueError("x and y must have same first dimension")
suggest that range(1000) and y1 do not have the same shape.
Indeed, if you paste the code in an interactive session, you'll find
In [17]: len(y1)
Out[17]: 1304
which makes sense since the loop increments t by an amount dt and dt < 1:
t = 0.0
while t<1000:
t = t + dt
y1.append(...)
In [18]: dt
Out[18]: 0.76697947735477701
You can fix the problem by simply using the 1-argument form of plt.plot:
plt.plot(y1)
which is equivalent to
plt.plot(range(len(y1)), y1)

scipy curve_fit fails on exponential fit

When I try to do an exponential fit using curve_fit, scipy returns an error. Am I doing something wrong? Removing the negative sign from np.exp(-b * t) allows curve_fit to work, but the values it returns are way off.
#!/usr/bin/python
import numpy as np
import scipy as sp
from scipy.optimize import curve_fit
import scipy.optimize as opt
import matplotlib.pyplot as plt
x = [40,45,50,55,60]
y = [0.99358851674641158, 0.79779904306220106, 0.60200956937799055, 0.49521531100478472, 0.38842105263157894]
def model_func(t, a, b, c):
return a * np.exp(-b * t) + c
opt_parms, parm_cov = sp.optimize.curve_fit(model_func, x, y, maxfev=1000)
a,b,c = opt_parms
print a,b,c
print x
print y
print model_func(x, a,b,c)
Fails with error:
Traceback (most recent call last):
File "asdf.py", line 18, in <module>
opt_parms, parm_cov = sp.optimize.curve_fit(model_func, x, y, maxfev=1000)
File "/usr/lib/python2.7/dist-packages/scipy/optimize/minpack.py", line 426, in curve_fit
res = leastsq(func, p0, args=args, full_output=1, **kw)
File "/usr/lib/python2.7/dist-packages/scipy/optimize/minpack.py", line 276, in leastsq
m = _check_func('leastsq', 'func', func, x0, args, n)[0]
File "/usr/lib/python2.7/dist-packages/scipy/optimize/minpack.py", line 13, in _check_func
res = atleast_1d(thefunc(*((x0[:numinputs],) + args)))
File "/usr/lib/python2.7/dist-packages/scipy/optimize/minpack.py", line 346, in _general_function
return function(xdata, *params) - ydata
ValueError: operands could not be broadcast together with shapes (0) (5)
change x and y to numpy arrays
x = np.array([40,45,50,55,60])
y = np.array([0.99358851674641158, 0.79779904306220106, 0.60200956937799055, 0.49521531100478472, 0.38842105263157894])
then I think you are good, because the function requires vectorized computation, whereas lists are not adequate.

Categories