Fit a curve with a sum using curve_fit - python

I am having a problem fitting a curve with scipy.
This is the equation I want to fit my data in:
This is the code:
import numpy as np
import os
from mpmath import *
from pandas import read_csv
from scipy.optimize import curve_fit
from matplotlib import pyplot
from sympy import *
M = 0.0129
L = 0.006
PI = np.pi
mp.dps=10
data = np.genfromtxt('Daten.csv',dtype=float,delimiter=',')
x = data[1:,0]
y = data[1:,0]
def objective(x,Mps,B,k,D):
return (M-Mps)*(1-np.exp(-k*x))+ Mps*(-np.exp(-B*x)*np.power(D/(B*L*L),0.5)*np.power(np.tan((B*L*L)/D),0.5)- (8/(PI*PI))*\
nsum(lambda n: np.exp(-(2*n+1)*(2*n+1)*PI*PI*D*x/(4*L*L))/((2*n+1)*(2*n+1)*(1-(2*n+2)*(2*n+2)*(D*PI*PI/(4*B*L*L)))) ,[0,float('inf')]))
def obtainpar(x,y)
popt,_ = curve_fit(objective,x,y)
print '{Mps,B,k,D}=', popt
Then I obtain the following message:
Traceback (most recent call last):
File "C:/Users/gerardo.salazar/Desktop/hector.py", line 29, in <module>
popt,_ = curve_fit(objective,x,y)
File "C:\ProgramData\Anaconda3\lib\site-packages\scipy\optimize\minpack.py", line 763, in curve_fit
res = leastsq(func, p0, Dfun=jac, full_output=1, **kwargs)
File "C:\ProgramData\Anaconda3\lib\site-packages\scipy\optimize\minpack.py", line 388, in leastsq
shape, dtype = _check_func('leastsq', 'func', func, x0, args, n)
File "C:\ProgramData\Anaconda3\lib\site-packages\scipy\optimize\minpack.py", line 26, in _check_func
res = atleast_1d(thefunc(*((x0[:numinputs],) + args)))
File "C:\ProgramData\Anaconda3\lib\site-packages\scipy\optimize\minpack.py", line 463, in func_wrapped
return func(xdata, *params) - ydata
File "C:/Users/gerardo.salazar/Desktop/hector.py", line 27, in objective
nsum(lambda n: np.exp(-(2*n+1)*(2*n+1)*PI*PI*D*x/(4*L*L))/((2*n+1)*(2*n+1)*(1-(2*n+2)*(2*n+2)*(D*PI*PI/(4*B*L*L)))) ,[0,float('inf')]))
File "C:\ProgramData\Anaconda3\lib\site-packages\mpmath\calculus\extrapolation.py", line 1718, in nsum
return +ctx.adaptive_extrapolation(update, emfun, options)
File "C:\ProgramData\Anaconda3\lib\site-packages\mpmath\calculus\extrapolation.py", line 1165, in adaptive_extrapolation
update(partial, xrange(index, index+step))
File "C:\ProgramData\Anaconda3\lib\site-packages\mpmath\calculus\extrapolation.py", line 1706, in update
psum = psum + g(ctx.mpf(k))
File "C:\ProgramData\Anaconda3\lib\site-packages\mpmath\calculus\extrapolation.py", line 1753, in g
return f(*args)
File "C:\ProgramData\Anaconda3\lib\site-packages\mpmath\calculus\extrapolation.py", line 1808, in g
return f(*args)
File "C:/Users/gerardo.salazar/Desktop/hector.py", line 27, in <lambda>
nsum(lambda n: np.exp(-(2*n+1)*(2*n+1)*PI*PI*D*x/(4*L*L))/((2*n+1)*(2*n+1)*(1-(2*n+2)*(2*n+2)*(D*PI*PI/(4*B*L*L)))) ,[0,float('inf')]))
TypeError: loop of ufunc does not support argument 0 of type mpf which has no callable exp method
I cannot share the data, but it is just a simply (x,y) points, nothing special. Thank you for the help and suggestion how could I make this happens.

As mentioned by #piterbarg there are several problems with mixing numpy and mpmath. My idea was to stay with mpmath and swap to numpy after the calculation of a value.curve_fit, etc. send numpy arrays though, such that I have to take care of the iterable object. Finally, it turns out that negative D and B are a problem. Instead of putting boundaries, which is an option of course, a use the absolute value inside the function. The result can be, hence, negative...but isn't.
Slightly cleaned up and augmented accordingly the code looks like:
import matplotlib.pyplot as plt
import numpy as np
from mpmath import pi as PI
from mpmath import nsum
from mpmath import exp
from mpmath import sqrt
from mpmath import tan
from scipy.optimize import curve_fit
from matplotlib import pyplot
M = 0.0129
L = 0.006
### might be of interest to:
# ~import mpmath
# ~mpmath.dps=10
def objective( x, Mps, b, k, d ):
if isinstance( x, ( list, tuple, np.ndarray ) ):
out = np.fromiter(
( objective( item, Mps, b, k, d ) for item in x ),
np.float
) ## always return np.array
else:
D = abs( d ) ### fails for neg
B = abs( b ) ### fails for neg
dil2 = D / L**2
out = ( M - Mps ) * ( 1 - exp( -k * x ) )
out += Mps * (
-exp( -B * x ) * sqrt( dil2 / B )
* sqrt( tan( B / dil2 ) )
-8 / PI**2
* nsum( lambda n:
exp( -( 2 * n + 1 )**2 * PI**2 * dil2 * x / 4 ) / (
( 2 * n + 1 )**2 * ( 1 - ( 2 * n + 2 )**2
* ( dil2 * PI**2 / ( 4 * B ) ) )
), [ 0, float( 'inf' ) ]
)
)
return out
############ test
xl = np.linspace( -3, 15, 55 )
yl = objective( xl, 0.5, 1, 0.3, 1.2 )
yl *= np.random.normal( loc=1, scale=0.1, size=len( yl ) )
popt, pcov = curve_fit( objective, xl, yl, p0=[ 1, 1, 1, 1 ] )
print( popt )
print( pcov )
### always look at pcov
### shows that fit is basically insenitive to parameter D
xfull = np.linspace( -3, 15, 155 )
yfull = objective( xfull, *popt )
fig = plt.figure()
ax = fig.add_subplot( 1, 1, 1 )
ax.plot( xl, yl, ls='', marker='o' )
ax.plot( xfull, yfull )
plt.show()
Note, importing everything via * is usually a bad idea.

Related

Extending a polynomial to fit data: creating an arbitrary amount of functional parameters

I would like to fit a function to a given data set and if a condition isn't met, extend the function and repeat the procedure - similar to increasing the highest order of a Taylor series. My problem is that I don't know how to extend the function. This function is supposed to look like this
def func(x, a0,a1,a2,...)
return (a0 * H0 + a1 * H1 + a2 * H2 + ...) (x)
H0(x), H1(x), H2(x),... are already known functions. I expect this method to use 20-1000 of the functions H0,H1,... so I have to find a way to not define each parameter a0, a1, a2,... by hand.
My idea was to define the function with a maximum amount of parameters and then manipulate it within a loop (reduce the number of parameters somehow and then increase them with each iteration)
# choose N: an arbitrary number of parameters
# create N-many functions H0, H1, ... HN-1 -> put them into an numpy array HArray
def func(x, parameters): # somehow reduce the number of parameters to N
# convert parameters into numpy array -> parameterArray
result = parameterArray * HArray (x) # (a * H0 + b * H1 + c * H2 + ...) (x)
return result
# fit the function to a given dataset
As complete code
import numpy as np
from scipy.optimize import curve_fit
x = np.linspace(0,2*np.pi,num=10000) # xdata
y = np.sin(x) # ydata
error = 0.0001 # fit condition
K = 1000
for N in range(K):
def func(x, parameters): # somehow reduce the number of parameters to N
parameterArray = np.array([p for p in parameters])
HArray = np.array([x**i for i in range(N)]) # a polynomial as example
return parameterArray * HArray (x)
popt, pcov = curve_fit(func, x, y)
stdDev = np.sqrt(np.diag(pcov))
if stdDev < error: break
In order for the curve_fit to work, the function needs the appropriate number of parameters. I also had the idea of fixating the last K-N parameters while fitting, but I don't know how to do that either.
That is, if I get it right, a linear fit. There is, hence, no need for curve_fit. The most simple approach would be:
import numpy as np
def h0( x ):
return 1
def h1( x ):
return x
def h2( x ):
return x**2
def h3( x ):
return x**3
def h4( x ):
return np.sin( x )
def my_linear_fit( xdata, ydata, funclist ):
ST = np.array( [ f( xdata ) for f in funclist ] )
S = np.transpose( ST )
A = np.dot( ST, S )
AI = np.linalg.inv( A )
K = np.dot( AI, ST )
sol = np.dot( K, ydata )
yth = np.dot( S, sol )
diff = ydata - yth
s2 = np.sum( diff**2 ) / ( len( diff ) - len( funclist ) )
cov = s2 * np.dot( K, np.transpose( K ) )
return sol, cov
"""
testing
"""
xl = np.linspace( -2, 8, 23 )
yl = np.fromiter(
( 2.1 * h1( x ) + 0.21 * h3(x) + 1.56 * h4(x) for x in xl),
float
)
yn = yl + np.random.normal( size=len(xl), scale=0.2 )
opt, popt = my_linear_fit( xl, yn, ( h1, h3, h4 ) )
print( opt )
print( popt )
To avoid problems with matrix inversion etc there are ways to make this a bit more sophisticated using matrix decomposition and so on, but the main idea stays the same. If the input functions need additional parameters one might need to play with lambda function and/or extend the code accordingly.
Edit 1
Going to very high orders results in matrix inversion or floating point precision problems. We can get around this by using mpmath
Note, that numpy is not used anymore and some data handling needs to be done in a more manual fashion now.
import matplotlib.pyplot as plt
from mpmath import mpf
from mpmath import fabs
from mpmath import matrix
from mpmath import mp
def pol( x, n ):
return x**n
def make_func_list( order ):
out = list()
for i in range( order + 1 ):
out.append( lambda x, i=i: pol( x, i ) )
return out
def my_linear_fit( xdata, ydata, funclist ):
ymat = matrix( ydata )
ST = matrix( [ [ f( x ) for x in xdata ] for f in funclist ] )
S = ST.T
A = ST* S
AI = A**(-1)
K = AI * ST
sol = K * ymat
yth = S * sol
diff = ymat - yth
s2l = [ x**2 for x in diff ]
s2 = 0
for x in s2l:
s2 += x
s2 /= ( len( ydata ) - len( funclist ) )
cov = s2 * K * K.T
return sol, cov
"""
testing
"""
mp.prec = 50
xlth = [ mpf( -2 + (5 + 2) * i / 154 )for i in range( 155 ) ]
xl = [ mpf( -2 + (5 + 2) * i / 54 )for i in range( 55 ) ]
xmax = max( [ fabs( x ) for x in xl ] )
xls = [ x / xmax for x in xl ]
xlsth = [ x / xmax for x in xlth ]
yl = [
2.1 * mp.sin( 3.83 * x ) for x in xl
]
fig = plt.figure()
ax = fig.add_subplot( 2, 1, 1 )
bx = fig.add_subplot( 2, 1, 2 )
ax.plot( xl, yl, ls='', marker='o' )
bx.plot( xls, yl, ls='', marker='o' )
for order in[ 2, 4, 12, 18 ]:
fl = make_func_list( order )
opt, popt = my_linear_fit( xls, yl, fl )
print( opt.T )
fit = [ [ o * f( x ) for x in xlsth ] for o,f in zip( opt, fl ) ]
fitsum = fit[0]
for line in fit[ 1::]:
fitsum = [ x + y for x, y in zip( fitsum, line )]
bx.plot( xlsth, fitsum)
plt.show()
Edit 2
Actually, numpy has a build in mechanism for this. I am not sure how it is done in detail as it eventually calls functions from a library, but I'd guess it uses SVD. QR decomposition is still be helpful to get the covariance matrix.
from scipy.linalg import qr
def my_linear_fit( xdata, ydata, funclist ):
ST = np.array( [ f( xdata ) for f in funclist ] )
S = np.transpose( ST )
q, r = qr( S )
sol = np.linalg.lstsq( S, ydata )[0]
yth = np.dot( S, sol )
diff = ydata - yth
s2 = np.sum( diff**2 ) / ( len( diff ) - len( funclist ) )
cov = np.linalg.inv( np.dot( np.transpose( r ), r ) ) * s2
return sol, cov
"""
testing
"""
xl = np.linspace( -2, 8, 55 )
xmax = max( np.abs( xl ) )
xls = xl / xmax
yl = np.fromiter(
( 2.1 * np.sin( 2.83 * x ) for x in xl),
float
)
fig = plt.figure()
ax = fig.add_subplot( 2, 1, 1 )
bx = fig.add_subplot( 2, 1, 2 )
ax.plot( xl, yl, ls='', marker='o' )
bx.plot( xls, yl, ls='', marker='o' )
for order in[ 5, 10, 15, 20 ]:
fl = make_func_list( order )
opt, popt = my_linear_fit( xls, yl, fl )
fit = np.array( [ o * f( xls ) for o,f in zip( opt, fl ) ] )
fit = np.sum( fit, axis=0 )
fits = np.array( [ o/(xmax**n) * f( xl ) for n, o, f in zip( range(order + 1), opt, fl ) ] )
fits = np.sum( fits, axis=0 )
ax.plot( xl, fits)
bx.plot( xls, fit)
plt.show()
Edit 3
This would be the solution using only QR decomposition. In my case it works easily up to order 20 (with re-scaling)
from scipy.linalg import solve_triangular
def my_linear_fit( xdata, ydata, funclist ):
n = len( funclist )
ST = np.array( [ f( xdata ) for f in funclist ] )
S = np.transpose( ST )
q, r = qr( S )
rred = r[ : n ] ### making it square...skip the zeros
yred = np.dot( np.transpose( q ), ydata )[ : n ]
sol = solve_triangular( rred, yred )
yth = np.dot( S, sol )
diff = ydata - yth
s2 = np.sum( diff**2 ) / ( len( diff ) - len( funclist ) )
cov = np.linalg.inv( np.dot( np.transpose( r ), r ) ) * s2
return sol, cov

How can I fit a good Lorentzian on python using scipy.optimize.curve_fit?

I'm trying to fit a Lorentzian function with more than one absorption peak (Mössbauer spectra), but the curve_fit function it not working properly, fitting just few peaks. How can I fit it?
Figure: Trying to adjusting multi-Lorentzian
Below I show my code. Please, help me.
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
def mymodel_hema(x,a1,b1,c1,a2,b2,c2,a3,b3,c3,a4,b4,c4,a5,b5,c5,a6,b6,c6):
f = 160000 - (c1*a1)/(c1+(x-b1)**2) - (c2*a2)/(c2+(x-b2)**2) - (c3*a3)/(c3+(x-b3)**2) - (c4*a4)/(c4+(x-b4)**2) - (c5*a5)/(c5+(x-b5)**2) - (c6*a6)/(c6+(x-b6)**2)
return f
def main():
abre = np.loadtxt('HEMAT_1.dat')
x = np.zeros(len(abre))
y = np.zeros(len(abre))
for i in range(len(abre)):
x[i] = abre[i,0]
y[i] = abre[i,1]
popt,pcov = curve_fit(mymodel_hema, x, y,maxfev=1000000000)
My data --> https://drive.google.com/file/d/1LvCKNdv0oBza_TDwuyNwd29PgQv22VPA/view?usp=sharing
This code uses leastsq instead of curve_fit as the latter one requires a fixed number of parameters. Here I do not want this as I let the code "decide" how many peaks are there. Note that I scaled the data to simplify the fit. The true fitting parameters are calculated easily be scaling back ( and standard error propagation )
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import leastsq
def lorentzian( x, x0, a, gam ):
return a * gam**2 / ( gam**2 + ( x - x0 )**2)
def multi_lorentz( x, params ):
off = params[0]
paramsRest = params[1:]
assert not ( len( paramsRest ) % 3 )
return off + sum( [ lorentzian( x, *paramsRest[ i : i+3 ] ) for i in range( 0, len( paramsRest ), 3 ) ] )
def res_multi_lorentz( params, xData, yData ):
diff = [ multi_lorentz( x, params ) - y for x, y in zip( xData, yData ) ]
return diff
xData, yData = np.loadtxt('HEMAT_1.dat', unpack=True )
yData = yData / max(yData)
generalWidth = 1
yDataLoc = yData
startValues = [ max( yData ) ]
counter = 0
while max( yDataLoc ) - min( yDataLoc ) > .1:
counter += 1
if counter > 20: ### max 20 peak...emergency break to avoid infinite loop
break
minP = np.argmin( yDataLoc )
minY = yData[ minP ]
x0 = xData[ minP ]
startValues += [ x0, minY - max( yDataLoc ), generalWidth ]
popt, ier = leastsq( res_multi_lorentz, startValues, args=( xData, yData ) )
yDataLoc = [ y - multi_lorentz( x, popt ) for x,y in zip( xData, yData ) ]
print popt
testData = [ multi_lorentz(x, popt ) for x in xData ]
fig = plt.figure()
ax = fig.add_subplot( 1, 1, 1 )
ax.plot( xData, yData )
ax.plot( xData, testData )
plt.show()
Providing
[ 9.96855817e-01 4.94106598e+02 -2.82103813e-01 4.66272773e+00
2.80688160e+01 -2.72449246e-01 4.71728295e+00 1.31577189e+02
-2.29698620e-01 4.20685229e+00 4.01421993e+02 -1.85917255e-01
5.57859380e+00 2.29704607e+02 -1.47193792e-01 3.91112196e+00
3.03387957e+02 -1.37127711e-01 4.39571905e+00]
and

Fitting a straight line in 3D with errors on x,y,z

I have a few spatial data points (x,y,z) with errors on all coordinates and I would like to fit a straight line to them. I struggle at finding the chi^2 to minimise. I found the 2D case here (numerical recipies).see also this pic for just the formula of the chi^2
but I have troubles working it out in 3D - does anyone have experience/an idea?
Are there any python libraries that can deal with problems like this?
For the 2D case there is a paper: Krystek and Anton, Meas, Sci. Technol. 18 (2007) 3438-3442. I am sure that this can be generalized to 3D. The result would avoid an iterative process, but the details are probably quite cumbersome.
As an alternative an iterative solution may look like this:
import matplotlib.pyplot as plt
import mpl_toolkits.mplot3d as m3d
import numpy as np
from random import random
from scipy.optimize import leastsq, fmin
def line_points( s, p, t ):
return [ s * t[0] + p[0], s * t[1] + p[1], s * t[2] + p[2] ]
def weighted_dist( s, p, t, xVec, sigmaVec ):
q = line_points( s, p, t )
d = ( q[0] - xVec[0] )**2 / sigmaVec[0]**2
d += ( q[1] - xVec[1] )**2 / sigmaVec[1]**2
d += ( q[2] - xVec[2] )**2 / sigmaVec[2]**2
return np.sqrt( d )
def weighted_od( p, t, xVec, sigmaVec ):
f = lambda s: weighted_dist( s, p, t, xVec, sigmaVec )
sol = fmin( f, 0, disp=False )
d = weighted_dist( sol[0], p, t, xVec, sigmaVec )
return d
def residuals( params, data, sigmas ): ###data of type [ allx, ally, allz], sigma of type [allsx, allsy, allsz]
px, py, pz, tx, ty, tz = params
out = list()
for x0, y0, z0, sx, sy, sz in zip( *( data + sigmas ) ):
out += [weighted_od( [ py, py, pz ], [ tx, ty, tz ], [ x0, y0, z0 ], [ sx, sy, sz ] ) ]
print sum(out)
return out
myP = np.array( [ 1 , 1, 3 ] )
myT = np.array( [ -1 ,-3, .8 ] )
myT /= np.linalg.norm( myT )
sList = np.linspace( -3, 3, 100 )
lineList = [ line_points( s, myP, myT ) for s in sList]
xData = [p[0] + .2 * ( 2 * random() - 1 ) for p in lineList ]
yData = [p[1] + .4 * ( 2 * random() - 1 ) for p in lineList ]
zData = [p[2] + .8 * ( 2 * random() - 1 ) for p in lineList ]
xyzData = [ xData, yData, zData ]
sssData = [ len(xData) * [.2], len(xData) * [.4], len(xData) * [.8] ]
residuals( [ 1, 1, 3, -1,-3,.8 ], xyzData, sssData )
myFit, err = leastsq(residuals, [ 1, 1, 2 , -1, -2, -1 ], args=( xyzData, sssData ) )
print myFit
fitP = myFit[:3]
fitT = myFit[3:]
fitTN= np.linalg.norm( fitT )
fitT = [ fitT[0] / fitTN, fitT[1] / fitTN, fitT[2] / fitTN ]
fitLineList = [ line_points( s, fitP, fitT ) for s in sList ]
ax = m3d.Axes3D(plt.figure() )
ax.plot( *zip(*lineList) )
ax.plot( *zip(*fitLineList) )
ax.scatter3D( xData, yData, zData )
plt.show()
Providing:
[ 1. 1.00009764 2.98911266 121.35860193 364.44920212
-92.27043484]
and
The code surely can be made nicer. One could fit, e.g. theta and phi of the direction vector instead of the three components. A more careful handling of python lists and numpy arrays would help as well, I guess.
For errors and the cov matrix check this

Python LMFIT restriction fit parameters

I'm trying to fit a function to some data in Python using the LMFIT library for nonlinear functions. It's easy enough, but I want to know if there's a way to restrict some properties of the fitted values.
For example, in the following code I fit my data to optimize values A, B and C. But I also want the ratio of A to B to be pi/4 times some integer. Is there a way to impose this restriction?
from lmfit import Model
import numpy
from numpy import cos, sin, pi, linspace
Upload data:
data = numpy.genfromtxt('data')
axis = numpy.genfromtxt('axis')
Define function:
def func(x, A, B, C):
return (A*cos(x)*cos(x) + B*sin(x)*sin(x) + 2*C*sin(x)*cos(x))**2
I must make an initial guess for my parameters:
a = 0.009
b = 0.3
c = 0.3
Then create a model to fit my function:
func_model = Model(func)
Fit the function to input data, with initial guesses (A = a, B = b, C = c):
result = func_model.fit(data, x=axis, A = a, B = b, C = c)
fitted_vals = result.best_values #dictionary structure
Afit = fitted_vals['A']
Bfit = fitted_vals['B']
Cfit = fitted_vals['C']
How can I make sure that the ratio of Afit to Bfit is pi/4 times some integer?
If it's not possible, is anyone aware of software that has this capability?
The problem with the standard fit is the estimate of the Jacobian. If a parameter is discrete the derivative is zero almost everywhere. A workaround might be that one uses leastsq with a self defined residual function and additionally providing the derivatives. One can set the parameter discrete in the residual function but let it be continuous in the derivative. I'm not saying that this is the general solution to this type of problem, but in case of the OP's function, it works quite OK.
Edit - Code would be:
# -*- coding: utf-8 -*-
import matplotlib.pyplot as plt
import numpy as np
from scipy.optimize import leastsq
def f0( x, A, B, C ):
return ( A * np.cos( x )**2 + B * np.sin( x )**2 + 2 * C * np.sin( x ) * np.cos( x ) )
def func(x, A, B, C):
return f0( x, A, B, C )**2
a = 0.009
b = 0.3
c = 0.4
xList = np.linspace( -1, 6, 500 )
yList = np.fromiter( ( func( x, a, b, c ) for x in xList ), np.float )
def residuals( p, x, y ):
return func(x, p[0], int(p[1]) * np.pi / 2. * p[0], p[2] ) - y
def dfunc( p, x, y ): #Derivative
return [
f0( x, p[0], int( p[1] ) * np.pi / 2. * p[0] , p[2] ) * ( np.cos( x )**2 + p[1] * np.pi / 2. * np.sin( x )**2 ),
f0( x, p[0], int( p[1] ) * np.pi / 2. * p[0] , p[2] ) * ( p[0] * np.pi / 2.* np.sin( x )**2 ),
f0( x, p[0], int( p[1] ) * np.pi / 2. * p[0] , p[2] ) * ( 2 * np.sin( x ) * np.cos( x ) ),
]
plsq, cov, infodict, mesg, ier = leastsq( residuals, [ 0.009, .3/.01, .4 ], args=( xList, yList ), Dfun=dfunc, col_deriv=1, full_output=True )
fit = func(xList, plsq[0], int( plsq[1] ) * np.pi / 2. * plsq[0], plsq[2] )
print plsq
print int( plsq[1] )
fig1 = plt.figure( 1, figsize=( 6, 4 ), dpi=80 )
ax = fig1.add_subplot( 1, 1, 1 )
ax.plot( xList, yList )
ax.plot( xList, fit, ls='--')
plt.show()
Providing:
>>[8.68421935e-03 2.22248626e+01 4.00032135e-01]
>>22
I think the answer is No. The solvers in scipy.optimize that lmfit wraps do not support discrete variables,only continuous variables.

Minimizing a function using python for data fitting

I have a function as the following
q = 1 / sqrt( ((1+z)**2 * (1+0.01*o_m*z) - z*(2+z)*(1-o_m)) )
h = 5 * log10( (1+z)*q ) + 43.1601
I have experimental answers of above equation and once I must to put some data into above function and solve equation below
chi=(q_exp-q_theo)**2/err**2 # this function is a sigma, sigma chi from z=0 to z=1.4 (in the data file)
z, err and q_exp are in the data file(2.txt). Now I have to choose a range for o_m (0.2 to 0.4) and find in what o_m, the chi function will be minimized.
my code is:
from math import *
from scipy.integrate import quad
min = None
l = None
a = None
b = None
c = 0
def ant(z,om,od):
return 1/sqrt( (1+z)**2 * (1+0.01*o_m*z) - z*(2+z)*o_d )
for o_m in range(20,40,1):
o_d=1-0.01*o_m
with open('2.txt') as fp:
for line in fp:
n = list( map(float, line.split()) )
q = quad(ant,n[0],n[1],args=(o_m,o_d))[0]
h = 5.0 * log10( (1+n[1])*q ) + 43.1601
chi = (n[2]-h)**2 / n[3]**2
c = c + chi
if min is None or min>c:
min = c
l = o_m
print('chi=',q,'o_m=',0.01*l)
n[1],n[2],n[3],n[4] are z1, z2, q_exp and err, respectively in the data file. and z1 and z2 are the integration range.
I need your help and I appreciate your time and your attention.
Please do not rate a negative value. I need your answers.
Here is my understanding of the problem.
First I generate some data by the following code
import numpy as np
from scipy.integrate import quad
from random import random
def boxmuller(x0,sigma):
u1=random()
u2=random()
ll=np.sqrt(-2*np.log(u1))
z0=ll*np.cos(2*np.pi*u2)
z1=ll*np.cos(2*np.pi*u2)
return sigma*z0+x0, sigma*z1+x0
def q_func(z, oM, oD):
den= np.sqrt( (1.0 + z)**2 * (1+0.01 * oM * z) - z * (2+z) * (1-oD) )
return 1.0/den
def h_func(z,q):
out = 5 * np.log10( (1.0 + z) * q ) + .25#43.1601
return out
def q_Int(z1,z2,oM,oD):
out=quad(q_func, z1,z2,args=(oM,oD))
return out
ooMM=0.3
ooDD=1.0-ooMM
dataList=[]
for z in np.linspace(.3,20,60):
z1=.1+.1*z*.01*z**2
z2=z1+3.0+.08+z**2
q=q_Int(z1,z2,ooMM,ooDD)[0]
h=h_func(z,q)
sigma=np.fabs(.01*h)
h=boxmuller(h,sigma)[0]
dataList+=[[z,z1,z2,h,sigma]]
dataList=np.array(dataList)
np.savetxt("data.txt",dataList)
which I would then fit in the following way
import matplotlib
matplotlib.use('Qt5Agg')
from matplotlib import pyplot as plt
import numpy as np
from scipy.integrate import quad
from scipy.optimize import leastsq
def q_func(z, oM, oD):
den= np.sqrt( (1.0 + z)**2 * (1+0.01 * oM * z) - z * (2+z) * (1-oD) )
return 1.0/den
def h_func(z,q):
out = 5 * np.log10( (1.0 + z) * q ) + .25#43.1601
return out
def q_Int(z1,z2,oM,oD):
out=quad(q_func, z1,z2,args=(oM,oD))
return out
def residuals(parameters,data):
om,od=parameters
zList=data[:,0]
yList=data[:,3]
errList=data[:,4]
qList=np.fromiter( (q_Int(z1,z2, om,od)[0] for z1,z2 in data[ :,[1,2] ]), np.float)
hList=np.fromiter( (h_func(z,q) for z,q in zip(zList,qList)), np.float)
diffList=np.fromiter( ( (y-h)/e for y,h,e in zip(yList,hList,errList) ), np.float)
return diffList
dataList=np.loadtxt("data.txt")
###fitting
startGuess=[.4,.8]
bestFitValues, cov,info,mesg, ier = leastsq(residuals, startGuess , args=( dataList,),full_output=1)
print bestFitValues,cov
fig=plt.figure()
ax=fig.add_subplot(1,1,1)
ax.plot(dataList[:,0],dataList[:,3],marker='x')
###fitresult
fqList=[q_Int(z1,z2,bestFitValues[0], bestFitValues[1])[0] for z1,z2 in zip(dataList[:,1],dataList[:,2])]
fhList=[h_func(z,q) for z,q in zip(dataList[:,0],fqList)]
ax.plot(dataList[:,0],fhList,marker='+')
plt.show()
giving output
>>[ 0.31703574 0.69572673]
>>[[ 1.38135263e-03 -2.06088258e-04]
>> [ -2.06088258e-04 7.33485166e-05]]
and the graph
Note that for leastsq the covariance matrix is in reduced form and needs to be rescaled.
Unconcsiosely, this question overlap my other question. The correct answer is:
from math import *
import numpy as np
from scipy.integrate import quad
min=l=a=b=chi=None
c=0
z,mo,err=np.genfromtxt('Union2.1_z_dm_err.txt',unpack=True)
def ant(z,o_m): #0.01*o_m is steps of o_m
return 1/sqrt(((1+z)**2*(1+0.01*o_m*z)-z*(2+z)*(1-0.01*o_m)))
for o_m in range(20,40):
c=0
for i in range(len(z)):
q=quad(ant,0,z[i],args=(o_m,))[0] #Integration o to z
h=5*log10((1+z[i])*(299000/70)*q)+25 #function of dL
chi=(mo[i]-h)**2/err[i]**2 #chi^2 test function
c=c+chi
l=o_m
print('chi^2=',c,'Om=',0.01*l,'OD=',1-0.01*l)

Categories