scipy curve_fit fails on exponential fit - python

When I try to do an exponential fit using curve_fit, scipy returns an error. Am I doing something wrong? Removing the negative sign from np.exp(-b * t) allows curve_fit to work, but the values it returns are way off.
#!/usr/bin/python
import numpy as np
import scipy as sp
from scipy.optimize import curve_fit
import scipy.optimize as opt
import matplotlib.pyplot as plt
x = [40,45,50,55,60]
y = [0.99358851674641158, 0.79779904306220106, 0.60200956937799055, 0.49521531100478472, 0.38842105263157894]
def model_func(t, a, b, c):
return a * np.exp(-b * t) + c
opt_parms, parm_cov = sp.optimize.curve_fit(model_func, x, y, maxfev=1000)
a,b,c = opt_parms
print a,b,c
print x
print y
print model_func(x, a,b,c)
Fails with error:
Traceback (most recent call last):
File "asdf.py", line 18, in <module>
opt_parms, parm_cov = sp.optimize.curve_fit(model_func, x, y, maxfev=1000)
File "/usr/lib/python2.7/dist-packages/scipy/optimize/minpack.py", line 426, in curve_fit
res = leastsq(func, p0, args=args, full_output=1, **kw)
File "/usr/lib/python2.7/dist-packages/scipy/optimize/minpack.py", line 276, in leastsq
m = _check_func('leastsq', 'func', func, x0, args, n)[0]
File "/usr/lib/python2.7/dist-packages/scipy/optimize/minpack.py", line 13, in _check_func
res = atleast_1d(thefunc(*((x0[:numinputs],) + args)))
File "/usr/lib/python2.7/dist-packages/scipy/optimize/minpack.py", line 346, in _general_function
return function(xdata, *params) - ydata
ValueError: operands could not be broadcast together with shapes (0) (5)

change x and y to numpy arrays
x = np.array([40,45,50,55,60])
y = np.array([0.99358851674641158, 0.79779904306220106, 0.60200956937799055, 0.49521531100478472, 0.38842105263157894])
then I think you are good, because the function requires vectorized computation, whereas lists are not adequate.

Related

Error non-linear-regression python curve-fit

Hello guys i want to make non-linear regression in python with curve fit
this is my code:
#fit a fourth degree polynomial to the economic data
from numpy import arange
from scipy.optimize import curve_fit
from matplotlib import pyplot
import math
x = [17.47,20.71,21.08,18.08,17.12,14.16,14.06,12.44,11.86,11.19,10.65]
y = [5,35,65,95,125,155,185,215,245,275,305]
# define the true objective function
def objective(x, a, b, c, d, e):
return ((a)-((b)*(x/3-5)))+((c)*(x/305)**2)-((d)*(math.log(305))-math.log(x))+((e)*(math.log(305)-(math.log(x))**2))
popt, _ = curve_fit(objective, x, y)
# summarize the parameter values
a, b, c, d, e = popt
# plot input vs output
pyplot.scatter(x, y)
# define a sequence of inputs between the smallest and largest known inputs
x_line = arange(min(x), max(x), 1)
# calculate the output for the range
y_line = objective(x_line, a, b, c, d, e)
# create a line plot for the mapping function
pyplot.plot(x_line, y_line, '--', color='red')
pyplot.show()
this is my error :
Traceback (most recent call last):
File "C:\Users\Fahmi\PycharmProjects\pythonProject\main.py", line 16, in
popt, _ = curve_fit(objective, x, y)
File "C:\Users\Fahmi\PycharmProjects\pythonProject\venv\lib\site-packages\scipy\optimize\minpack.py", line 784, in curve_fit
res = leastsq(func, p0, Dfun=jac, full_output=1, **kwargs)
File "C:\Users\Fahmi\PycharmProjects\pythonProject\venv\lib\site-packages\scipy\optimize\minpack.py", line 410, in leastsq
shape, dtype = _check_func('leastsq', 'func', func, x0, args, n)
File "C:\Users\Fahmi\PycharmProjects\pythonProject\venv\lib\site-packages\scipy\optimize\minpack.py", line 24, in _check_func
res = atleast_1d(thefunc(((x0[:numinputs],) + args)))
File "C:\Users\Fahmi\PycharmProjects\pythonProject\venv\lib\site-packages\scipy\optimize\minpack.py", line 484, in func_wrapped
return func(xdata, params) - ydata
File "C:\Users\Fahmi\PycharmProjects\pythonProject\main.py", line 13, in objective
return ((a)-((b)(x/3-5)))+((c)(x/305)**2)-((d)(math.log(305))-math.log(x))+((e)(math.log(305)-(math.log(x))**2))
TypeError: only size-1 arrays can be converted to Python scalars
thanks before
This is a known problem with the math library. Simply use numpy and your problem should be fixed as numpy functions have support for scalars and arrays.
#fit a fourth degree polynomial to the economic data
import numpy as np
from scipy.optimize import curve_fit
import matplotlib.pyplot as plt
x = [17.47,20.71,21.08,18.08,17.12,14.16,14.06,12.44,11.86,11.19,10.65]
y = [5,35,65,95,125,155,185,215,245,275,305]
# define the true objective function
def objective(x, a, b, c, d, e):
return ((a)-((b)*(x/3-5)))+((c)*(x/305)**2)-((d)*(np.log(305))-np.log(x))+((e)*(np.log(305)-(np.log(x))**2))
popt, _ = curve_fit(objective, x, y)
# summarize the parameter values
a, b, c, d, e = popt
# plot input vs output
plt.scatter(x, y)
# define a sequence of inputs between the smallest and largest known inputs
x_line = np.arange(np.min(x), np.max(x), 1)
# calculate the output for the range
y_line = objective(x_line, a, b, c, d, e)
# create a line plot for the mapping function
plt.plot(x_line, y_line, '--', color='red')
plt.show()

How to pass an array of input parameters in scipy.optimize.minimize?

I want to use scipy.optimize.minimize to solve for a set of parameters by minimizing an error function.
The function called "error" returns the squared error for the function that I am trying to find z1,z2,z3(the parameters) for.
I have an array of x(called "b" in the function) and y(called "real" in the function) values.
The code below works fine if I set x and y to some integer, but not if I try to pass in an array of x and y values, to act as the variable "b" and "real" in the equation to be minimized.
Trying to pass in an array of X and Y values results in the error pasted below.
Is there a way to pass in arrays to act as a variable in an equation for the minimize function, instead of just a single integer?
Here is what my code looks like:
import numpy as np
import pandas as pd
from scipy.optimize import minimize
#dataset file, with a column named x,y with 1000 rows
f = pd.read_csv('Test_Data_.txt', sep='\t')
#initial guess
x0 = [1, 2, 3]
#f['x'] and f['y'] are columns with 1000 rows
#x = f['x'].values
#y = f['y'].values
x = 1 #these parameters work fine
y = 4
#a function called inside the function to be minimized
def est(z1, z2, z3, b):
return z1 * b**2 + z2 * b + z3
#function to minimize
def error(x, real, b):
return (real - est(x[0], x[1], x[2], b))**2
print(minimize(error, x0, args = ( x, y), method='BFGS', tol=1e-6))
Feeding in the array of x and y values produces the error:
Traceback (most recent call last):
File "problem1.py", line 24, in <module>
minimize(error, x0, args = ( np.array(list(f['y'].values)), np.array(list(f['x'].values))), method='BFGS', tol=1e-6)
File "/usr/local/lib/python3.5/dist-packages/scipy/optimize/_minimize.py", line 595, in minimize
return _minimize_bfgs(fun, x0, args, jac, callback, **options)
File "/usr/local/lib/python3.5/dist-packages/scipy/optimize/optimize.py", line 970, in _minimize_bfgs
gfk = myfprime(x0)
File "/usr/local/lib/python3.5/dist-packages/scipy/optimize/optimize.py", line 300, in function_wrapper
return function(*(wrapper_args + args))
File "/usr/local/lib/python3.5/dist-packages/scipy/optimize/optimize.py", line 730, in approx_fprime
return _approx_fprime_helper(xk, f, epsilon, args=args)
File "/usr/local/lib/python3.5/dist-packages/scipy/optimize/optimize.py", line 670, in _approx_fprime_helper
grad[k] = (f(*((xk + d,) + args)) - f0) / d[k]
ValueError: setting an array element with a sequence.

Division by zero ? (In newton iteration method)

I'm doing a newton iteration to find T_a. Everything seems fine in the code except in one the very first definitions.
My rho(T_a) returns a division by zero (it assumes that T_a is zero while it's just a variable. If I change the T_a in the equation to something like 100, everything runs smoothly.
Any idea why it's returning a division by zero?
from numpy import *
import numpy as np
import pylab
import scipy
from scipy.optimize import leastsq
from math import *
import matplotlib.pyplot as plt
from scipy import integrate
# THETA NOTATION:
#pi/2: substellar point
#-pi/2: antistellar point
#0: terminators
#define constants used in equations:
alb = 0.2 #constant albedo
F = 866 #J/s*m**2
R = 287.0 #J/K*kg
U = 5.0 #m/s
C_p = 1000 #J/K*kg
C_d = 0.0015
p1 = 10**4
p2 = 10**5.0
p3 = 10**6.0 #Pa
sig = 5.67*10**-8.0 #J/s*m**2*K**4 #Stefan-Boltzmann cst
def rho(T_a):
p1=10000.0
R=287.0 #J/K*kg
return (p1/(T_a*R))
def a(T_a):
U = 5 #m/s
C_p = 1000 #J/K*kg
C_d = 0.0015
return rho(T_a)*C_p*C_d*U
#################################################
##### PART 2 : check integrals equality
#################################################
#define the RHS and LHS of integral equality
def LHS(theta):
return (1-alb)*F*np.sin(theta)*np.cos(theta)
#define the result of each integral
Left = integrate.quad(lambda theta: LHS(theta), 0, pi/2)[0]
#define a function 1-(result LHS/result RHS) >>> We look for the zero of this
x0=130.0 #guess a value for T_a
#T_a = 131.0
#Python way of solving for the zero of the function
#Define T_g in function of T_a, have RHS(T_a) return T_g**4 etc, have result_RHS(T_a) return int.RHS(T_a),
#have func(T_a) return result_LHS/result_RHS
def T_g(T_a,theta):
return np.roots(array([(sig),0,0,(a(T_a)),((-a(T_a)*T_a)-LHS(theta))]))[3]
def RHS(theta,T_a):
return sig*T_g(T_a,theta)**4*np.cos(theta)
def result_RHS(T_a,theta):
return integrate.quad(lambda theta: RHS(T_a,theta), -pi/2, pi/2)[0]
def function(T_a,theta):
return 1-((Left/result_RHS(T_a,theta)))
theta = np.arange(-pi/2, pi/2, pi/20)
T_a_0 = scipy.optimize.newton(function,x0,fprime=None,args=(theta,),tol= (10**-5),maxiter=50000)
Output:
Traceback (most recent call last):
File "/Users/jadecheclair/Documents/PHY479Y/FindT_a.py", line 85, in <module>
T_a_0 = scipy.optimize.newton(function,x0,fprime=None,args=(theta,),tol=(10**-5),maxiter=50000)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/scipy/optimize/zeros.py", line 120, in newton
q0 = func(*((p0,) + args))
File "/Users/jadecheclair/Documents/PHY479Y/FindT_a.py", line 81, in function
return 1-((Left/result_RHS(T_a,theta)))
File "/Users/jadecheclair/Documents/PHY479Y/FindT_a.py", line 78, in result_RHS
return integrate.quad(lambda theta: RHS(T_a,theta), -pi/2, pi/2)[0]
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/scipy/integrate/quadpack.py", line 247, in quad
retval = _quad(func,a,b,args,full_output,epsabs,epsrel,limit,points)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/scipy/integrate/quadpack.py", line 312, in _quad
return _quadpack._qagse(func,a,b,args,full_output,epsabs,epsrel,limit)
File "/Users/jadecheclair/Documents/PHY479Y/FindT_a.py", line 78, in <lambda>
return integrate.quad(lambda theta: RHS(T_a,theta), -pi/2, pi/2)[0]
File "/Users/jadecheclair/Documents/PHY479Y/FindT_a.py", line 75, in RHS
return sig*T_g(T_a,theta)**4*np.cos(theta)
File "/Users/jadecheclair/Documents/PHY479Y/FindT_a.py", line 72, in T_g
return np.roots(array([(sig),0,0,(a(T_a)),((-a(T_a)*T_a)-LHS(theta))]))[3]
File "/Users/jadecheclair/Documents/PHY479Y/FindT_a.py", line 38, in a
return rho(T_a)*C_p*C_d*U
File "/Users/jadecheclair/Documents/PHY479Y/FindT_a.py", line 32, in rho
return (p1/(T_a*R))
ZeroDivisionError: float division by zero
Your RHS function is defined slightly differently to all the others, in that it has theta first and T_a as its second argument:
def RHS(theta,T_a):
return sig*T_g(T_a,theta)**4*np.cos(theta)
I think that's why you passed the arguments in the wrong order here:
lambda theta: RHS(T_a,theta)
Get them in the right order and you should be OK.
As a side-note, some of your imports look like they could cause weird bugs:
from numpy import *
from math import *
Numpy and the math module have at least a few function names in common, like sqrt. It's safer to just do import math and import numpy as np, and access the functions through the module name. Otherwise what happens when you call sqrt could change depending on the order you do your imports in.
You reversed your parameters:
In result_RHS you call: RHS(T_a,theta), but the parameter definition of RHS is def RHS(theta,T_a)
Swap those in in the definition and the error no longer occurs. Your definition should look like this:
def RHS(T_a, theta)

2D Gaussian Fitting to Histogram

I have the following function definition of a 2D Gaussian:
# Return a gaussian distribution at an angle alpha from the x-axis
# from astroML for use with curve_fit
def mult_gaussFun_Fit((x,y),*m):
A,x0,y0,varx,vary,rho,alpha = m
X,Y = np.meshgrid(x,y)
assert rho != 1
a = 1/(2*(1-rho**2))
Z = A*np.exp(-a*((X-x0)**2/(varx)+(Y-y0)**2/(vary)-(2*rho/(np.sqrt(varx*vary)))*(X-x0)*(Y-y0)))
return Z.ravel()
I use the following code to attempt a curve_fit of data drawn from a bivariate gaussian that is converted to a 2D histogram. I am receiving broadcast errors and I am not sure as to why this is happening.
import numpy as np
from scipy.optimize import curve_fit
import matplotlib.pyplot as plt
import gauss
import plotutils
# Produce a number of points in x-y from 1 distribution.
mean = [0,0]
cov = [[3,0],[0,1]]
N = 3000
x,y = np.random.multivariate_normal(mean,cov,N).T
# Prep bins for histogram
bin_size = 0.2
max_edge = 2.5*(np.sqrt(cov[0][0])+np.sqrt(cov[1][1]))
min_edge = -max_edge
bin_num = (max_edge-min_edge)/bin_size
bin_numPlus1 = bin_num + 1
bins = np.linspace(min_edge,max_edge,bin_numPlus1)
# Produce 2D histogram
H,xedges,yedges = np.histogram2d(x,y,bins,normed=False)
bin_centers_x = (xedges[:-1]+xedges[1:])/2.0
bin_centers_y = (yedges[:-1]+yedges[1:])/2.0
# Initial Guess
p0 = (H.max(),mean[0],mean[1],cov[0][0],cov[1][1],0.5,np.pi/4)
# Curve Fit parameters
coeff, var_matrix = curve_fit(gauss.mult_gaussFun_Fit,(bin_centers_x,bin_centers_y),H,p0=p0)
The error is:
Traceback (most recent call last):
File "/home/luis/Documents/SRC2014/galsim_work/2D_Gaussian_Estimate.py", line 44, in <module>
coeff, var_matrix = curve_fit(gauss.mult_gaussFun_Fit,(bin_centers_x,bin_centers_y),H,p0=p0)
File "/usr/local/lib/python2.7/dist-packages/scipy/optimize/minpack.py", line 555, in curve_fit
res = leastsq(func, p0, args=args, full_output=1, **kw)
File "/usr/local/lib/python2.7/dist-packages/scipy/optimize/minpack.py", line 369, in leastsq
shape, dtype = _check_func('leastsq', 'func', func, x0, args, n)
File "/usr/local/lib/python2.7/dist-packages/scipy/optimize/minpack.py", line 20, in _check_func
res = atleast_1d(thefunc(*((x0[:numinputs],) + args)))
File "/usr/local/lib/python2.7/dist-packages/scipy/optimize/minpack.py", line 445, in _general_function
return function(xdata, *params) - ydata
ValueError: operands could not be broadcast together with shapes (4624) (68,68)
I simply needed to perform
H = H.ravel()
and that solves it.

numpy curve_fitting fail to run a specific set

I have a set of numbers which cause error when I try to do a curve fitting to them. I'm quite certain I managed to do this before with the same numbers (I'm certain I did with other data sets). What is causing this error then?
The X, Y, Err values are (by order of appearance)
[0.0, 0.6931471805599453, 1.3862943611198906]
[-5.354761064902713, -6.190455611580044, -6.558604540577015]
[0.0014079400762288246, 0.0006083544693643583, 0.0002989970199491765]
and kappa is equal to 8
This is the function I try to fit (largely a + 2*x)
out = []
for x in X:
y = log(kappa)
y += 4*log(pi)
y += 2*x
y -= 2*log(2)
out.append(-y)
return np.array(out)
this is how I call curve_fit
popt,pcov = curve_fit(fitFunc1,self.X[0:3],self.Y[0:3],sigma=self.Err[0:3],p0=kappa)
and this is the error I get
popt,pcov = curve_fit(fitFunc1,self.X[0:3],self.Y[0:3],sigma=self.Err[0:3],p0=kappa)
File "/usr/lib/python2.7/dist-packages/scipy/optimize/minpack.py", line 506, in curve_fit
res = leastsq(func, p0, args=args, full_output=1, **kw)
File "/usr/lib/python2.7/dist-packages/scipy/optimize/minpack.py", line 355, in leastsq
gtol, maxfev, epsfcn, factor, diag)
minpack.error: Error occurred while calling the Python function named _weighted_general_function
edit 1
Added kappa value (8)
edit 2
Here is a minimal working example of this
#!/usr/bin/python
import numpy as np
from scipy.optimize import curve_fit
from math import log,pi
X = [0.0, 0.6931471805599453, 1.3862943611198906]
Y = [-5.354761064902713, -6.190455611580044, -6.558604540577015]
Err = [0.0014079400762288246, 0.0006083544693643583, 0.0002989970199491765]
kappa = 8
def func(X,kappa):
out = []
for x in X:
y = log(kappa)
y += 4*log(pi)
y += 2*x
y -= 2*log(2)
out.append(-y)
return np.array(out)
popt,pcov = curve_fit(func,X,Y,sigma=Err,p0=kappa)
First, convert your input arrays to numpy arrays. This allows you to use broadcasting in your func. In addition, you should check if kappa<=0 and return a bad fit value to prevent from evaluating outside the domain you are probably interested in:
import numpy as np
from scipy.optimize import curve_fit
X = np.array([0.0, 0.6931471805599453, 1.3862943611198906])
Y = np.array([-5.354761064902713, -6.190455611580044, -6.558604540577015])
Err = np.array([0.0014079400762288246, 0.0006083544693643583, 0.0002989970199491765])
kappa = 8.0
def func(X,kappa):
if kappa <=0: return np.inf
return -(np.log(kappa) + 4*np.log(np.pi) + 2*X - 2*np.log(2))
popt,pcov = curve_fit(func,X,Y,sigma=Err,p0=kappa)

Categories