Why does scipy.optimize.curve_fit give an error? - python

I have been trying to make a function that solves a set of ordinary differential equations and then fit it to experimental data using the the scipy.optimize.curve_fit function, but I get an error message containing:
"ValueError: object too deep for desired array
odepack.error: Result from function call is not a proper array of floats."
Any help would be much appreciated.
import numpy as np
import matplotlib.pyplot as plt
from scipy.integrate import odeint
from pylab import *
from scipy.optimize import curve_fit
kB = 8.6173324e-5 #eV/K
P_H2O = 0.05 # bar
H_H2O = -0.8 #eV
S_H2O = -0.0016
def K_H2O(T):
return np.exp(-H_H2O/(kB*T))*np.exp(S_H2O/(kB))
def FracA(T):
return K_H2O(T)*P_H2O / (1+K_H2O(T)*P_H2O)
def FracB(T):
return 1-FracA(T)
EA1_A = 0.725 # eV (from experimental data)
def k1_A(A1_A,T):
return A1_A*np.exp(-EA1_A/(kB*T))
A1_B = 5.e3 #
EA1_B = 0.8 #eV
def k1_B(T):
return A1_B*np.exp(-EA1_B/(kB*T))
A2_B = 8.e3 #
EA2_B = 1.0 #eV
def k2_B(T):
return A2_B*np.exp(-EA2_B/(kB*T))
# initial conditions
P_NO0 = 500.e-6 # initial NO
P_NH30 = 530.e-6 # initial NH3
y0 = [P_NO0, P_NH30] # initial condition vector
t = np.linspace(0, 30., 1000) # time grid
def conv(T,A1_A):
def f(y, t):
P_NOi = y[0]
P_NH3i = y[1]
# the differential equations
if y[0] and y[1] > 0:
f0 = -k1_A(A1_A,T)*FracA(T)*P_NOi -k1_B(T)*FracB(T)*P_NOi*P_NH3i**(-0.25)
else:
f0 = 0
if y[0] and y[1] > 0:
f1 = -k1_A(A1_A,T)*FracA(T)*P_NOi -k1_B(T)*FracB(T)*P_NOi*P_NH3i**(-0.25)-k2_B(T)*P_NH3i
else:
f1 = 0
return [f0, f1]
# solve the DEs
soln = odeint(f, y0, t)
P_NO = soln[:, 0]
P_NH3 = soln[:, 1]
NO_conv = 1-P_NO[-1]/P_NO0
return NO_conv
x_real = np.array([433.1,443.1,453.2,463.1,473.1,483.7,494.2,503.5,523.9,553.7,573.6,623.4,673.4,723.3,773.4,823.2])
y_real =np.array([0.064305859, 0.098333053, 0.151494329, 0.217225336, 0.296164608, 0.397472394, 0.508515308, 0.612339428, 0.793549257, 0.892454094, 0.895511489, 0.861625527, 0.949118344, 0.940025727, 0.852439418, 0.727332885])
popt, pcov = curve_fit(conv, x_real, y_real)

Related

Big numbers problem while "odeint" numerical integration

I'm having some computational problems with the following code:
import numpy as np
from numpy import arange
from scipy.integrate import odeint
import matplotlib.pyplot as plt
from scipy.integrate import quad
import matplotlib as mpl
mpl.rcParams['agg.path.chunksize'] = 10000
# parameters
Ms = 100 #GeV Singlet Mass
Me = 0.511e-3 #Gev Electron Mass
Mp = 1.22e19 #GeV Planck Mass
gs = 106.75 # Entropy dof
H0 = 2.133*(0.7)*1e-42 # GeV Hubble parameter (unused)
gx = 2 # WIMP's dof
g = 100 # total dof
sigmav=[1e-25,1e-11,1e-12] # cross section's order of magnitude
xi=1e-2
xe=1e2
npts=int(1e5)
x = np.linspace(xi, xe, npts)
def fMB(p,x,m):
return np.exp(-x*np.sqrt(1+p*p/(m*m)))*p*p
def neq(x,m):
return (gx/(2*np.pi*np.pi))*quad(fMB, 0, np.inf, args=(x,m))[0]
def neq_nr(x,m):
return 2*(m**2/(2*np.pi*x))**(3/2)*np.exp(-x)
def stot(x):
return (2*np.pi*np.pi/45)*gs*Ms*Ms*Ms/(x*x*x)
def Yeq(x,m):
return neq(x,m)/stot(x)
Yeq2=np.vectorize(Yeq)
def Yeq_nr(x):
return 0.145*(gx/gs)*(x)**(3/2)*np.exp(-x)
def Yeq_r(x):
return 0.278*(3*gx/4)/gs
def Ytot(x):
if np.any(x<=1):
return Yeq_r(x)
else:
return Yeq_nr(x)
def eqd(yl,x,Ms,σv):
'''
Ms [GeV] : Singlet Mass
σv: [1/GeV^2] : ⟨σv⟩
'''
H = 1.67*g**(1/2)*Ms**2/Mp
dyl = -neq(x,Ms)*σv*(yl**2-Yeq(x,Ms)**2)/(x**(-2)*H*x*Yeq(x,Ms)) #occorre ancora dividere per Yeq_nr(x) oppure Yeq(x)
return dyl
y0=1e-15
yl0 = odeint( eqd, y0, x,args=(Ms,sigmav[0]), full_output=True)
yl1 = odeint( eqd, y0, x,args=(Ms,sigmav[1]), full_output=True)
yl2 = odeint( eqd, y0, x,args=(Ms,sigmav[2]), full_output=True)
fig = plt.figure(figsize=(11,8))
plt.loglog(x,yl0[0], label = r'$\langle σ v\rangle = %s {\rm GeV}^{-2}$'%(sigmav[0]))
plt.loglog(x,yl1[0], label = r'$\langle σ v\rangle = %s {\rm GeV}^{-2}$'%(sigmav[1]))
plt.loglog(x,yl2[0], label = r'$\langle σ v\rangle = %s {\rm GeV}^{-2}$'%(sigmav[2]))
plt.loglog(x,Yeq_nr(x), '--', label = '$Y_{EQ}^{nr}$')
plt.loglog(x,Yeq2(x,Ms), '--', label = '$Y_{EQ}$')
plt.ylim(ymax=0.1,ymin=y0)
plt.xlim(xmax=xe,xmin=xi)
plt.xlabel('$x = m_χ/T$', size= 15)
plt.ylabel('$Y$', size= 15)
plt.title('$m_χ = %s$ GeV'%(Ms), size= 15)
plt.legend(loc='best',fontsize=12)
plt.grid(True)
plt.savefig('abundance.jpg',bbox_inches='tight', dpi=150)
In particular, as soon as I use little values of sigmav (ranging from 10^-12 to 10^-25) the solution is well displayed, but making use of bigger values (starting from 10^-11) I obtain problems and I guess is a order of magnitudes problem, but I don't know how to handle it!
Thanks to everyone!
Edit 1:
I'm uploading a plot making use of three different values of sigmav and as you may see the bigger one (1e-10) is showing (I guess) precision problems plot_1

calling args in a loop-function

for MCMC I use emcee package this tutorial. Instead of the equation of thispart which is fractional and so easy I use this form, I mean I use matrix form(not its code) and wrote the following code.
for more explanation of my code:
def new_calculation(n) is the equation for each component of matrix and def log_likelihood(theta,hh): is the mentioned matrix.
the problem is, I need args to use in soln = minimize(nll, initial, args=(hh)) and def log_probability(theta,hh):
I use hh as args but the Python says the hh is not defined. the problem may be for definition of arguments and function. I do not know how to fix it.
import numpy as np
import emcee
import matplotlib.pyplot as plt
from math import *
import numpy as np
from scipy.integrate import quad
from scipy.integrate import odeint
xx=np.array([0.01,0.012,0.014,0.016])
yy=np.array([32.95388698,33.87900347,33.84214074,34.11856704])
Cov=[[137,168],[28155,-2217]]
rc=0.09
c=0.7
H01 = 70
O_m1 = 0.28
z0=0
M1=10
np.random.seed(123)
def ant(z,O_m,O_D):
return 1/sqrt(((1+z)**2)*(1+O_m*z))
def new_calculation(n):
O_D=1-O_m-(1/(2*rc*yyn))
q=quad(ant,0,xx[n],args=(O_m,O_D))[0]
h=log10((1+xx[n])*q)
fn=(yy[n]-M-h)
return fn
def log_likelihood(theta,hh):
H0, O_m,M= theta
f_list = []
for i in range(2): # the value '2' reflects matrix size
f_list.append(new_calculation(i))
rdag=[f_list]
rmat=[[f] for f in f_list]
hh=np.linalg.det(np.dot(rdag,Cov),rmat)*0.000001
return hh
from scipy.optimize import minimize
np.random.seed(42)
nll = lambda *args: -log_likelihood(*args)
initial = np.array([H01, O_m1,M1]) + 0.1*np.random.randn(3)
soln = minimize(nll, initial, args=(hh))
H0_ml, O_m0_ml = soln.x
def log_prior(theta):
H0, O_D = theta
if 65 < H0 < 75 and 0.22 < O_m < 0.32 and 0 < M < 12:
return 0.0
return -np.inf
def log_probability(theta, mm,zz,hh):
lp = log_prior(theta)
if not np.isfinite(lp):
return -np.inf
return lp + log_likelihood(theta, mm,zz,hh)
y0=H0
pos = soln.x + 1e-4*np.random.randn(200, 3)
nwalkers, ndim = pos.shape
sampler = emcee.EnsembleSampler(nwalkers, ndim, log_probability, args=(rdag, rmat))
sampler.run_mcmc(pos, 500);
fig = plt.figure(2,figsize=(10, 10))
fig.clf()
for j in range(ndim):
ax = fig.add_subplot(ndim,1,j+1)
ax.plot(np.array([sampler.chain[:,i,j] for i in range(nwalkers)]),"k", alpha = 0.3)
ax.set_ylabel(("H0", "O_m")[j], fontsize = 15)
plt.xlabel('Steps', fontsize = 15)
fig.show()
I appreciate your help and your attention.

Asymmetric error bars in Scipy's odrpack

I am using Scipy's odrpack to fit a linear function to some data that has uncertainties in both the x and y dimensions. Each data point has it's own uncertainty that is asymmetric.
I can fit a function using symmetric uncertainties, but this is not a true representation of my data.
How can I perform the fit with this in mind?
This is my code so far. It receives input data as a command line argument, and the uncertainties i'm using are just random numbers at the moment. (also, two fits are happening, one for positive data points another for the negative. The reasons are unrelated to this question)
import sys
import numpy as np
import scipy.odr.odrpack as odrpack
def f(B, x):
return B[0]*x + B[1]
xdata = sys.argv[1].split(',')
xdata = [float(i) for i in xdata]
xdata = np.array(xdata)
#find indices of +/- data
zero_ind = np.where(xdata >= 0)[0][0]
x_p = xdata[zero_ind:]
x_m = xdata[:zero_ind+1]
ydata = sys.argv[2].split(',')
ydata = [float(i) for i in ydata]
ydata = np.array(ydata)
y_p = ydata[zero_ind:]
y_m = ydata[:zero_ind+1]
sx_m = np.random.random(len(x_m))
sx_p = np.random.random(len(x_p))
sy_m = np.random.random(len(y_m))
sy_p = np.random.random(len(y_p))
linear = odrpack.Model(f)
data_p = odrpack.RealData(x_p, y_p, sx=sx_p, sy=sy_p)
odr_p = odrpack.ODR(data_p, linear, beta0=[1.,2.])
out_p = odr_p.run()
data_m = odrpack.RealData(x_m, y_m, sx=sx_m, sy=sy_m)
odr_m = odrpack.ODR(data_m, linear, beta0=[1.,2.])
out_m = odr_m.run()
Thanks!
I will just give you solution with random data,I could not bother to import your data
import numpy as np
import scipy.odr.odrpack as odrpack
np.random.seed(1)
N = 10
x = np.linspace(0,5,N)*(-1)
y = 2*x - 1 + np.random.random(N)
sx = np.random.random(N)
sy = np.random.random(N)
def f(B, x):
return B[0]*x + B[1]
linear = odrpack.Model(f)
# mydata = odrpack.Data(x, y, wd=1./np.power(sx,2), we=1./np.power(sy,2))
mydata = odrpack.RealData(x, y, sx=sx, sy=sy)
myodr = odrpack.ODR(mydata, linear, beta0=[1., 2.])
myoutput = myodr.run()
myoutput.pprint()
Than we got
Beta: [ 1.92743947 -0.94409236]
Beta Std Error: [ 0.03117086 0.11273067]
Beta Covariance: [[ 0.02047196 0.06690713]
[ 0.06690713 0.26776027]]
Residual Variance: 0.04746112419196648
Inverse Condition #: 0.10277763521624257
Reason(s) for Halting:
Sum of squares convergence

Explanation of Scipy fmin Warning

What does "Desired error not necessarily achieved due to precision loss" mean in the context of the scipy_fmin methods? I can't seem to find an explanation anywhere.
Here is my code:
import math
import numpy
import random
import scipy.optimize as opt
import matplotlib.pyplot as plt
from numpy import array
from numpy import dot
from random import randint
from numpy import matrix
import sys
ns = []
st = []
lam_funtrix = []
time_steps = 1000
delta_t = 0.1
mu = -0.7
def gen_st():
global st
st = []
for i in range(0, time_steps):
st.append(random.normalvariate(0,1) * math.sqrt(delta_t))
def f(val):
return math.exp(val)
def get_lam(t):
rate = mu
return pow(delta_t, -1) * f(rate)
def white_noise():
global ns
for i in range(0, time_steps):
lam = get_lam(i) * delta_t
spike_at_bin = numpy.random.poisson(lam)
ns.append(spike_at_bin)
def gen_lam_log(i, mu):
rate = mu
return pow(delta_t, -1) * f(rate)
def gen_lam_fun(mu):
global lam_funtrix
lam_funtrix = []
for i in range(0, time_steps):
lam_funtrix.append(gen_lam_log(i, mu))
def log_like(t):
mu = t
gen_lam_fun(mu)
sum = 0
for i in range(0,time_steps):
val = lam_funtrix[i]
sum = sum - ((ns[i] * math.log(val*delta_t)) - (val*delta_t))
return sum
def der_mu():
sum = 0.0
for i in range(0, time_steps):
sum -= (ns[i] - lam_funtrix[i] * delta_t)
return sum
def first_der(t):
mu = t
gen_lam_fun(mu)
dm = der_mu()
return dm
gen_st()
white_noise()
init_guess = array([0])
vals = opt.fmin_cg(log_like, init_guess, fprime=first_der)
print vals
The code is a little off since I pared it down a bit.
Warning: Desired error not necessarily achieved due to precision loss.
Current function value: 822.835581
Iterations: 1
Function evaluations: 18
Gradient evaluations: 6
[-0.7943019]

Curve fitting in Scipy with 3d data and parameters

I am working on fitting a 3d distribution function in scipy. I have a numpy array with counts in x- and y-bins, and I am trying to fit that to a rather complicated 3-d distribution function. The data is fit to 26 (!) parameters, which describe the shape of its two constituent populations.
I learned here that I have to pass my x- and y-coordinates as 'args' when I call leastsq. The code presented by unutbu works as written for me, but when I try to apply it to my specific case, I am given the error "TypeError: leastsq() got multiple values for keyword argument 'args' "
Here's my code (sorry for the length):
import numpy as np
import matplotlib.pyplot as plt
import scipy.optimize as spopt
from textwrap import wrap
import collections
cl = 0.5
ch = 3.5
rl = -23.5
rh = -18.5
mbins = 10
cbins = 10
def hist_data(mixed_data, mbins, cbins):
import numpy as np
H, xedges, yedges = np.histogram2d(mixed_data[:,1], mixed_data[:,2], bins = (mbins, cbins), weights = mixed_data[:,3])
x, y = 0.5 * (xedges[:-1] + xedges[1:]), 0.5 * (yedges[:-1] + yedges[1:])
return H.T, x, y
def gauss(x, s, mu, a):
import numpy as np
return a * np.exp(-((x - mu)**2. / (2. * s**2.)))
def tanhlin(x, p0, p1, q0, q1, q2):
import numpy as np
return p0 + p1 * (x + 20.) + q0 * np.tanh((x - q1)/q2)
def func3d(p, x, y):
import numpy as np
from sys import exit
rsp0, rsp1, rsq0, rsq1, rsq2, rmp0, rmp1, rmq0, rmq1, rmq2, rs, rm, ra, bsp0, bsp1, bsq0, bsq1, bsq2, bmp0, bmp1, bmq0, bmq1, bmq2, bs, bm, ba = p
x, y = np.meshgrid(coords[0], coords[1])
rs = tanhlin(x, rsp0, rsp1, rsq0, rsq1, rsq2)
rm = tanhlin(x, rmp0, rmp1, rmq0, rmq1, rmq2)
ra = schechter(x, rap, raa, ram) # unused
bs = tanhlin(x, bsp0, bsp1, bsq0, bsq1, bsq2)
bm = tanhlin(x, bmp0, bmp1, bmq0, bmq1, bmq2)
ba = schechter(x, bap, baa, bam) # unused
red_dist = ra / (rs * np.sqrt(2 * np.pi)) * gauss(y, rs, rm, ra)
blue_dist = ba / (bs * np.sqrt(2 * np.pi)) * gauss(y, bs, bm, ba)
result = red_dist + blue_dist
return result
def residual(p, coords, data):
import numpy as np
model = func3d(p, coords)
res = (model.flatten() - data.flatten())
# can put parameter restrictions in here
return res
def poiss_err(data):
import numpy as np
return np.where(np.sqrt(H) > 0., np.sqrt(H), 2.)
# =====
H, x, y = hist_data(mixed_data, mbins, cbins)
data = H
coords = x, y
# x and y will be the projected coordinates of the data H onto the plane z = 0
# x has bins of width 0.5, with centers at -23.25, -22.75, ... , -19.25, -18.75
# y has bins of width 0.3, with centers at 0.65, 0.95, ... , 3.05, 3.35
Param = collections.namedtuple('Param', 'rsp0 rsp1 rsq0 rsq1 rsq2 rmp0 rmp1 rmq0 rmq1 rmq2 rs rm ra bsp0 bsp1 bsq0 bsq1 bsq2 bmp0 bmp1 bmq0 bmq1 bmq2 bs bm ba')
p_guess = Param(rsp0 = 0.152, rsp1 = 0.008, rsq0 = 0.044, rsq1 = -19.91, rsq2 = 0.94, rmp0 = 2.279, rmp1 = -0.037, rmq0 = -0.108, rmq1 = -19.81, rmq2 = 0.96, rs = 1., rm = -20.5, ra = 10000., bsp0 = 0.298, bsp1 = 0.014, bsq0 = -0.067, bsq1 = -19.90, bsq2 = 0.58, bmp0 = 1.790, bmp1 = -0.053, bmq0 = -0.363, bmq1 = -20.75, bmq2 = 1.12, bs = 1., bm = -20., ba = 2000.)
opt, cov, infodict, mesg, ier = spopt.leastsq(residual, p_guess, poiss_err(H), args = coords, maxfev = 100000, full_output = True)
Here's my data, just with fewer bins:
[[ 1.00000000e+01 1.10000000e+01 2.10000000e+01 1.90000000e+01
1.70000000e+01 2.10000000e+01 2.40000000e+01 1.90000000e+01
2.80000000e+01 1.90000000e+01]
[ 1.40000000e+01 4.50000000e+01 6.00000000e+01 6.80000000e+01
1.34000000e+02 1.97000000e+02 2.23000000e+02 2.90000000e+02
3.23000000e+02 3.03000000e+02]
[ 3.00000000e+01 1.17000000e+02 3.78000000e+02 9.74000000e+02
1.71900000e+03 2.27700000e+03 2.39000000e+03 2.25500000e+03
1.85600000e+03 1.31000000e+03]
[ 1.52000000e+02 9.32000000e+02 2.89000000e+03 5.23800000e+03
6.66200000e+03 6.19100000e+03 4.54900000e+03 3.14600000e+03
2.09000000e+03 1.33800000e+03]
[ 5.39000000e+02 2.58100000e+03 6.51300000e+03 8.89900000e+03
8.52900000e+03 6.22900000e+03 3.55000000e+03 2.14300000e+03
1.19000000e+03 6.92000000e+02]
[ 1.49600000e+03 4.49200000e+03 8.77200000e+03 1.07610000e+04
9.76700000e+03 7.04900000e+03 4.23200000e+03 2.47200000e+03
1.41500000e+03 7.02000000e+02]
[ 2.31800000e+03 7.01500000e+03 1.28870000e+04 1.50840000e+04
1.35590000e+04 8.55600000e+03 4.15600000e+03 1.77100000e+03
6.57000000e+02 2.55000000e+02]
[ 1.57500000e+03 3.79300000e+03 5.20900000e+03 4.77800000e+03
3.26600000e+03 1.44700000e+03 5.31000000e+02 1.85000000e+02
9.30000000e+01 4.90000000e+01]
[ 7.01000000e+02 1.21600000e+03 1.17600000e+03 7.93000000e+02
4.79000000e+02 2.02000000e+02 8.80000000e+01 3.90000000e+01
2.30000000e+01 1.90000000e+01]
[ 2.93000000e+02 3.93000000e+02 2.90000000e+02 1.97000000e+02
1.18000000e+02 6.40000000e+01 4.10000000e+01 1.20000000e+01
1.10000000e+01 4.00000000e+00]]
Thanks very much!
So what leastsq does is try to:
"Minimize the sum of squares of a set of equations"
-scipy docs
as it says it's minimizing a set of functions and therefore doesn't actually take any x or y data inputs in the easiest manner if you look at the arguments here so you can do it as you like and pass a residual function however, it's significantly easier to just use curve_fit which does it for you :) and creates the necessary equations
For fitting you should use: curve_fit if you are ok with the generic residual they use which is actually the function you pass itself res = leastsq(func, p0, args=args, full_output=1, **kw) if you look in the code here.
e.g. If I fit the rosenbrock function in 2d and guess the y-parameter:
from scipy.optimize import curve_fit
from itertools import imap
import numpy as np
# use only an even number of arguments
def rosen2d(x,a):
return (1-x)**2 + 100*(a - (x**2))**2
#generate some random data slightly off
datax = np.array([.01*x for x in range(-10,10)])
datay = 2.3
dataz = np.array(map(lambda x: rosen2d(x,datay), datax))
optimalparams, covmatrix = curve_fit(rosen2d, datax, dataz)
print 'opt:',optimalparams
fitting the colville function in 4d:
from scipy.optimize import curve_fit
import numpy as np
# 4 dimensional colville function
# definition from http://www.sfu.ca/~ssurjano/colville.html
def colville(x,x3,x4):
x1,x2 = x[:,0],x[:,1]
return 100*(x1**2 - x2)**2 + (x1-1)**2 + (x3-1)**2 + \
90*(x3**2 - x4)**2 + \
10.1*((x2 - 1)**2 + (x4 - 1)**2) + \
19.8*(x2 - 1)*(x4 - 1)
#generate some random data slightly off
datax = np.array([[x,x] for x in range(-10,10)])
#add gaussian noise
datax+= np.random.rand(*datax.shape)
#set 2 of the 4 parameters to constants
x3 = 3.5
x4 = 4.5
#calculate the function
dataz = colville(datax, x3, x4)
#fit the function
optimalparams, covmatrix = curve_fit(colville, datax, dataz)
print 'opt:',optimalparams
Using a custom residual function:
from scipy.optimize import leastsq
import numpy as np
# 4 dimensional colville function
# definition from http://www.sfu.ca/~ssurjano/colville.html
def colville(x,x3,x4):
x1,x2 = x[:,0],x[:,1]
return 100*(x1**2 - x2)**2 + (x1-1)**2 + (x3-1)**2 + \
90*(x3**2 - x4)**2 + \
10.1*((x2 - 1)**2 + (x4 - 1)**2) + \
19.8*(x2 - 1)*(x4 - 1)
#generate some random data slightly off
datax = np.array([[x,x] for x in range(-10,10)])
#add gaussian noise
datax+= np.random.rand(*datax.shape)
#set 2 of the 4 parameters to constants
x3 = 3.5
x4 = 4.5
def residual(p, x, y):
return y - colville(x,*p)
#calculate the function
dataz = colville(datax, x3, x4)
#guess some initial parameter values
p0 = [0,0]
#calculate a minimization of the residual
optimalparams = leastsq(residual, p0, args=(datax, dataz))[0]
print 'opt:',optimalparams
Edit: you used both the position and the keyword arg for args: if you look at the docs you'll see it uses position 3, but also can be used as a keyword argument. You used both which means the function is as expected, confused.

Categories