How to fit data logarithmic in python? - python

I've been trying to fit some data I have gained from some simulations. From the curve, I guess a logarithmic fit would be ideal. However, the curve comes looking out quite funky. I've also tried higher order polynomials and np.polyfit, but I couldn't get either to work.
Any help would be appreciated!
from scipy.optimize import curve_fit
import numpy as np
import matplotlib.pyplot as plt
xdata=[9.24104360013e-06, 4.72619458107e-06, 4.03957328857e-06, 9.78301182748e-06, 1.36994566431e-05, 1.16294573409e-05, 7.70899546232e-06, 2.72587766232e-06, 2.19089955631e-06, 5.34851640035e-06, 7.84434545123e-06, 7.6524185787e-06, 1.00592536363e-05, 6.08711035578e-07, 4.08259572135e-07, 5.74424798328e-07, 6.20036326494e-07, 4.34755225756e-06, 4.72832211908e-06, 1.25156011417e-06, 1.44996714816e-05, 3.79992166335e-06, 4.45935911838e-06, 6.6307841155e-06, 2.38540191336e-06, 9.4649801666e-07, 9.11518608157e-06, 3.1944675219e-06, 5.32674287313e-06, 1.48463901861e-05, 3.41127723277e-06, 3.40027150288e-06, 3.33064781566e-06, 2.12828505238e-06, 7.22565690506e-06, 7.86527964811e-06, 2.25791582571e-06, 1.94875869207e-05, 1.54712884424e-05, 5.82300791075e-06, 9.5783833758e-06, 1.89519143607e-05, 1.03731970283e-05, 2.53090894753e-05, 9.26047056658e-06, 1.05428610146e-05, 2.89162870493e-05, 4.78624726782e-05, 1.00005855557e-05, 6.88617910928e-05]
ydata=[0.00281616449359, 0.00257023004939, 0.00250030932407, 0.00284317789756, 0.00300158447316, 0.00291690879783, 0.00274898865728, 0.0023625485679, 0.0023018015629, 0.00259860025555, 0.00269155777824, 0.00265941197135, 0.0028073724168, 0.00192920496041, 0.00182900945464, 0.00191452746379, 0.00193227563253, 0.00253266811688, 0.00255961306471, 0.00212426145702, 0.00285906942634, 0.00247877245272, 0.0025348504727, 0.00269881922057, 0.00232270371493, 0.00204672286703, 0.00281306442303, 0.00241938445736, 0.00261083321385, 0.00287440363274, 0.00244324770882, 0.00244364989768, 0.00244593671433, 0.00228714406931, 0.00263301289418, 0.00269385915315, 0.0022968948347, 0.00313898537645, 0.00305650121575, 0.00265291893623, 0.00278748794063, 0.00312801724905, 0.00289450806538, 0.00313176225397, 0.00284010926578, 0.0028957865422, 0.00335438183977, 0.00360421739757, 0.00270734995952, 0.00377301191882]
plt.plot(xdata,ydata,'o')
x = np.array(xdata, dtype=float) #transform your data in a numpy array of floats
y = np.array(ydata, dtype=float) #so the curve_fit can work
#def func(x,a,b,c):
# return a*x**2+ b*x +c
def func(x,a,b):
return a*np.log(x)+ b
popt, pcov = curve_fit(func, x, y)
plt.plot(x, func(x, *popt), label="Fitted Curve")
plt.show()

Sort x before plotting
x_sorted = np.sort(x)
plt.plot(x_sorted, func(x_sorted, *popt), label="Fitted Curve")
plt.show()

Related

why is it so bad that using acceptance-rejection-sampling for computing integral

I want to compute $\int_1^2 x^2 dx$ using the acceptance-rejection-sampling method, but it performs not well as my expectation.
import random
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
sns.set_style('darkgrid')
plt.rcParams['figure.figsize'] = (12,8)
def acceptRejectSampling(number,p,c):
sample=[]
while len(sample)<number:
y = random.uniform(2, 4)
u = random.uniform(0, c)
if u <= p(y):
sample.append(y)
return sample
def f(x):#the domain of x is [1,2]
return x**2
def p(y):# pdf of y, the domain of y is [2,4]
return 0.5/(y**(0.5))
S=7/3.0
X=np.random.uniform(1,2,1000)
print("area:",S)
print("monte calro estimator: ",np.mean([f(i) for i in X]))
#acceptance rejection sampling
y=np.linspace(2,4,200)
c=0.5/np.power(2,0.5)
cc=[c for i in range(200)]
samples=acceptRejectSampling(10000,p,c)
print("acceptance-rejection-sampling estimator: ", np.mean(samples))
plt.plot(y,cc, '--',label='c*g(x)')
py=[p(i) for i in y]
plt.plot(y, py,label='p(y)')
plt.hist(samples, bins=50, density=True,label='sampling')
plt.legend()
plt.show()
the output of code is:
aera: 2.3333333333333335
monte calro estimator: 2.391469143847661
acceptance-rejection-sampling estimator: 2.938527759943371
I am confused that why acceptance-rejection-sampling performs so bad and I want to consult you guys on this problem. please help me, thank you very much!

Exponential regression function Python

I am trying to implement a exponential regression function. sp stands for sympy. I use numpy and sympy. Firstly, in func_exp I tried to use np.exp but it generated an error (attribute error), so I decided to use sympy instead. Well, this is the code
import numpy as np
from numpy.linalg import matrix_rank
import scipy
import scipy.integrate
import random
import matplotlib.pyplot as plt
import matplotlib as mpl
from mpl_toolkits.mplot3d import Axes3D
from sympy import integrate
import sympy as sp
x, y = sp.symbols('x, y')
sp.init_printing(use_unicode=True,use_latex='mathjax')
def exponential_regression (x_data, y_data):
def func_exp(x, a, b):
return a*sp.exp(b*x)
popt, pcov = scipy.optimize.curve_fit(func_exp, x_data, y_data)
a = popt[0] # componente a, Parámetro ÓPTimo (popt).
b = popt[1] # componente b, Parámetro ÓPTimo (popt).
plt.figure()
puntos = plt.plot(x_data, y_data, 'x', color='xkcd:maroon')
curva_regresion = plt.plot(x_data, func_exp(x_data, a, b), color='xkcd:teal')
plt.show(puntos, curva_regresion)
return func_exp(x, a, b)
I try to execute:
x_data = np.arange(0, 51) # Crea un array de 0 a 50.
y_data = np.array([0.001, 0.199, 0.394, 0.556, 0.797, 0.891, 1.171, 1.128, 1.437,
1.525, 1.720, 1.703, 1.895, 2.003, 2.108, 2.408, 2.424,2.537,
2.647, 2.740, 2.957, 2.58, 3.156, 3.051, 3.043, 3.353, 3.400,
3.606, 3.659, 3.671, 3.750, 3.827, 3.902, 3.976, 4.048, 4.018,
4.286, 4.353, 4.418, 4.382, 4.444, 4.485, 4.465, 4.600, 4.681,
4.737, 4.792, 4.845, 4.909, 4.919, 5.100])
exponential_regression(x_data, y_data)
And I get:
exponential_regression(x_data, y_data)
TypeError: Cannot cast array data from dtype('O') to dtype('float64') according to the rule 'safe'
Traceback (most recent call last):
File "<ipython-input-122-ee7c243ae4b0>", line 1, in <module>
exponential_regression(x_data, y_data)
File "/Volumes/TOSHIBA/spline.py", line 35, in exponential_regression
popt, pcov = scipy.optimize.curve_fit(func_exp, x_data, y_data)
File "/Applications/anaconda3/lib/python3.6/site-packages/scipy/optimize/minpack.py", line 742, in curve_fit
res = leastsq(func, p0, Dfun=jac, full_output=1, **kwargs)
File "/Applications/anaconda3/lib/python3.6/site-packages/scipy/optimize/minpack.py", line 387, in leastsq
gtol, maxfev, epsfcn, factor, diag)
error: Result from function call is not a proper array of floats.
What is wrong? Thanks in advance!
Here is a minimal example for your fit function as close as possible to your code but removing all unnecessary elements. You can easily remove c to adhere to your requirements:
import numpy as np
from scipy.optimize import curve_fit
import matplotlib.pyplot as plt
def func_exp(x, a, b, c):
#c = 0
return a * np.exp(b * x) + c
def exponential_regression (x_data, y_data):
popt, pcov = curve_fit(func_exp, x_data, y_data, p0 = (-1, 0.01, 1))
print(popt)
puntos = plt.plot(x_data, y_data, 'x', color='xkcd:maroon', label = "data")
curva_regresion = plt.plot(x_data, func_exp(x_data, *popt), color='xkcd:teal', label = "fit: {:.3f}, {:.3f}, {:.3f}".format(*popt))
plt.legend()
plt.show()
return func_exp(x_data, *popt)
x_data = np.arange(0, 51)
y_data = np.array([0.001, 0.199, 0.394, 0.556, 0.797, 0.891, 1.171, 1.128, 1.437,
1.525, 1.720, 1.703, 1.895, 2.003, 2.108, 2.408, 2.424,2.537,
2.647, 2.740, 2.957, 2.58, 3.156, 3.051, 3.043, 3.353, 3.400,
3.606, 3.659, 3.671, 3.750, 3.827, 3.902, 3.976, 4.048, 4.018,
4.286, 4.353, 4.418, 4.382, 4.444, 4.485, 4.465, 4.600, 4.681,
4.737, 4.792, 4.845, 4.909, 4.919, 5.100])
exponential_regression(x_data, y_data)
Output with c = 0:
Output with c != 0:
Main changes explained:
Removed sympy - it has nothing to do with the fitting procedure.
The definition of the exponential fit function is placed outside exponential_regression, so it can be accessed from other parts of the script. It uses np.exp because you work with numpy arrays in scipy.
Added the parameter p0 which contains the initial guesses for the parameters. Fit functions are often sensitive to this initial guess because of local extrema.
Unpack variables with *popt to make it more flexible for different numbers of variables. a = popt[0], b = popt[1], etc.
Removed unnecessary imports. Keep your namespace free from clutter.

morse potential fit using python and curve fit from scipy

I am trying to fit a morse potential using a python and scipy.
The morse potential is defined as:
V = D*(exp(-2*m*(x-u)) - 2*exp(-m*(x-u)))
where D, m and u are the parameters I need to extract.
Unfortunately the fit is not satisfactory as you can see below (sorry I do not have 10 reputation so the image has to be clicked). Could anyone help me please? I must say I am not the best programmer with python.
Here is my code:
from scipy.optimize import curve_fit
import numpy as np
import matplotlib.pyplot as plt
xdata2=np.array([1.0 ,1.1 ,1.2 ,1.3 ,1.4 ,1.5 ,1.6 ,1.7 ,1.8 ,1.9 ,2.0 ,2.1 ,2.2 ,2.3 ,2.4 ,2.5 ,2.6 ,2.7 ,2.8 ,2.9 ,3.0 ,3.1 ,3.2 ,3.3 ,3.4 ,3.5 ,3.6 ,3.7 ,3.8 ,3.9 ,4.0 ,4.1 ,4.2 ,4.3 ,4.4 ,4.5 ,4.6 ,4.7 ,4.8 ,4.9 ,5.0 ,5.1 ,5.2 ,5.3 ,5.4 ,5.5 ,5.6 ,5.7 ,5.8 ,5.9])
ydata2=[-1360.121815,-1368.532641,-1374.215047,-1378.090480,-1380.648178,-1382.223113,-1383.091562,-1383.479384,-1383.558087,-1383.445803,-1383.220380,-1382.931531,-1382.609269,-1382.273574,-1381.940879,-1381.621299,-1381.319042,-1381.036231,-1380.772039,-1380.527051,-1380.301961,-1380.096257,-1379.907700,-1379.734621,-1379.575837,-1379.430693,-1379.299282,-1379.181303,-1379.077272,-1378.985220,-1378.903626,-1378.831588,-1378.768880,-1378.715015,-1378.668910,-1378.629996,-1378.597943,-1378.572742,-1378.554547,-1378.543296,-1378.539843,-1378.543593,-1378.554519,-1378.572747,-1378.597945,-1378.630024,-1378.668911,-1378.715015,-1378.768915,-1378.831593]
t=np.linspace(0.1,7)
def morse(q, m, u, x ):
return (q * (np.exp(-2*m*(x-u))-2*np.exp(-m*(x-u))))
popt, pcov = curve_fit(morse, xdata2, ydata2, maxfev=40000000)
yfit = morse(t,popt[0], popt[1], popt[2])
print popt
plt.plot(xdata2, ydata2,"ro")
plt.plot(t, yfit)
plt.show()
Old fit before gboffi's comment
I am guessing the exact depth of the morse potential does not interest you overly much. So I added an additional parameter to shift the morse potential up and down (v), includes #gboffis comment. Furthermore, the first argument of your function must be the arguments, not the parameters you want to fit (see http://docs.scipy.org/doc/scipy-0.16.1/reference/generated/scipy.optimize.curve_fit.html)
In addition, such fits are dependent on your starting position. The following should give you what you want.
from scipy.optimize import curve_fit
import numpy as np
import matplotlib.pyplot as plt
xdata2=np.array([1.0 ,1.1 ,1.2 ,1.3 ,1.4 ,1.5 ,1.6 ,1.7 ,1.8 ,1.9 ,2.0 ,2.1 ,2.2 ,2.3 ,2.4 ,2.5 ,2.6 ,2.7 ,2.8 ,2.9 ,3.0 ,3.1 ,3.2 ,3.3 ,3.4 ,3.5 ,3.6 ,3.7 ,3.8 ,3.9 ,4.0 ,4.1 ,4.2 ,4.3 ,4.4 ,4.5 ,4.6 ,4.7 ,4.8 ,4.9 ,5.0 ,5.1 ,5.2 ,5.3 ,5.4 ,5.5 ,5.6 ,5.7 ,5.8 ,5.9])
ydata2=[-1360.121815,-1368.532641,-1374.215047,-1378.090480,-1380.648178,-1382.223113,-1383.091562,-1383.479384,-1383.558087,-1383.445803,-1383.220380,-1382.931531,-1382.609269,-1382.273574,-1381.940879,-1381.621299,-1381.319042,-1381.036231,-1380.772039,-1380.527051,-1380.301961,-1380.096257,-1379.907700,-1379.734621,-1379.575837,-1379.430693,-1379.299282,-1379.181303,-1379.077272,-1378.985220,-1378.903626,-1378.831588,-1378.768880,-1378.715015,-1378.668910,-1378.629996,-1378.597943,-1378.572742,-1378.554547,-1378.543296,-1378.539843,-1378.543593,-1378.554519,-1378.572747,-1378.597945,-1378.630024,-1378.668911,-1378.715015,-1378.768915,-1378.831593]
t=np.linspace(0.1,7)
tstart = [1.e+3, 1, 3, 0]
def morse(x, q, m, u , v):
return (q * (np.exp(-2*m*(x-u))-2*np.exp(-m*(x-u))) + v)
popt, pcov = curve_fit(morse, xdata2, ydata2, p0 = tstart, maxfev=40000000)
print popt # [ 5.10155662 1.43329962 1.7991549 -1378.53461345]
yfit = morse(t,popt[0], popt[1], popt[2], popt[3])
#print popt
#
#
#
plt.plot(xdata2, ydata2,"ro")
plt.plot(t, yfit)
plt.show()

Exponential Fit Between 2 Lists

I have two lists that I am trying to do an exponential fit of form y=a*e^(bx) between. I am using an approach similar to the second answer from here but the results are not matching what I know to be true from testing with excel. Here is my code:
import numpy as np
from scipy.optimize import curve_fit
exp_constants = [62.5, 87.5, 112.5, 137.5, 162.5, 187.5, 212.5, 237.5, 262.5, 287.5]
means = [211.94, 139.30, 80.09, 48.29, 26.94, 12.12, 3.99, 1.02, 0.09, 0.02]
def func(x1, a, b):
return a * np.exp(b * x1)
popt, pcov = curve_fit(func, exp_constants, means)
When returning popt[0] and popt[1] I get 3.222e-127 and 1.0 respectively. However, when checking with excel the correct exponential equation should be y=7231.3e^(-0.04x). I am not very familiar with the curve_fit approach, is there something that I am missing in my code or a better approach to getting the correct exponential fit?
Edit: Here is the plot that is made with the following code:
plt.figure()
plt.plot(exp_constants, means, 'ko', label="Data")
plt.plot(exp_constants, func(exp_constants, *popt), 'r-', label="Fitted Curve")
plt.legend()
plt.show
I guess the problem is that you do not provide an initial guess for the parameters, so as per the manual, curve_fit uses [1, 1] as a guess. The optimization might then get stuck at some local minimum. One other thing you should do is to change your xdata and ydata lists to numpy arrays, as shown by this answer:
import numpy as np
from scipy.optimize import curve_fit
exp_constants = np.array([62.5, 87.5, 112.5, 137.5, 162.5, 187.5, 212.5,
237.5, 262.5, 287.5])
means = np.array([211.94, 139.30, 80.09, 48.29, 26.94, 12.12, 3.99,
1.02, 0.09, 0.02])
def func(x1, a, b):
return a * np.exp(b * x1)
guess = [100, -0.1]
popt, pcov = curve_fit(func, exp_constants, means, p0 = guess)
The exact value of the guess is not important, but you should probably have at least the order of magnitude and the signs right, so that the optimization can converge to the optimal value. I just used some random numbers close to the 'correct answer' you mentioned. When you don't know what to guess, you can do a polyfit(xdata, log(ydata), 1) and some basic math to get an initial value, as shown by this answer to the question you linked.
Quick plot:
x = np.linspace(exp_constants[0], exp_constants[-1], 1000)
plt.plot(exp_constants, means, 'ko', x, popt[0]*np.exp(popt[1]*x), 'r')
plt.show()
Result:

curve fitting with a known function numpy

I have a x and y one-dimension numpy array and I would like to reproduce y with a known function to obtain "beta". Here is the code I am using:
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
y = array([ 0.04022493, 0.04287536, 0.03983657, 0.0393201 , 0.03810298,
0.0363814 , 0.0331144 , 0.03074823, 0.02795767, 0.02413816,
0.02180802, 0.01861309, 0.01632699, 0.01368056, 0.01124232,
0.01005323, 0.00867196, 0.00940864, 0.00961282, 0.00892419,
0.01048963, 0.01199101, 0.01533408, 0.01855704, 0.02163586,
0.02630014, 0.02971127, 0.03511223, 0.03941218, 0.04280329,
0.04689105, 0.04960554, 0.05232003, 0.05487037, 0.05843364,
0.05120701])
x= array([ 0., 0.08975979, 0.17951958, 0.26927937, 0.35903916,
0.44879895, 0.53855874, 0.62831853, 0.71807832, 0.80783811,
0.8975979 , 0.98735769, 1.07711748, 1.16687727, 1.25663706,
1.34639685, 1.43615664, 1.52591643, 1.61567622, 1.70543601,
1.7951958 , 1.88495559, 1.97471538, 2.06447517, 2.15423496,
2.24399475, 2.33375454, 2.42351433, 2.51327412, 2.60303391,
2.6927937 , 2.78255349, 2.87231328, 2.96207307, 3.05183286,
3.14159265])
def func(x,beta):
return 1.0/(4.0*np.pi)*(1+beta*(3.0/2*np.cos(x)**2-1.0/2))
guesses = [20]
popt,pcov = curve_fit(func,x,y,p0=guesses)
y_fit = 1/(4.0*np.pi)*(1+popt[0]*(3.0/2*np.cos(x)**2-1.0/2))
plt.figure(1)
plt.plot(x,y,'ro',x,y_fit,'k-')
plt.show()
The code works but the fitting is completely off (see picture). Any idea why?
It looks like the formula to use contains an additional parameter, i.e. p
def func(x,beta,p):
return p/(4.0*np.pi)*(1+beta*(3.0/2*np.cos(x)**2-1.0/2))
guesses = [20,5]
popt,pcov = curve_fit(func,x,y,p0=guesses)
y_fit = func(angle_plot,*popt)
plt.figure(2)
plt.plot(x,y,'ro',x,y_fit,'k-')
plt.show()
print popt # [ 1.23341604 0.27362069]
In the popt which one is beta and which one is p?
This is perhaps not what you want but, if you are just trying to get a good fit to the data, you could use np.polyfit:
fit = np.polyfit(x,y,4)
fit_fn = np.poly1d(fit)
plt.scatter(x,y,label='data',color='r')
plt.plot(x,fit_fn(x),color='b',label='fit')
plt.legend(loc='upper left')
Note that fit gives the coefficient values of, in this case, a 4th order polynomial:
>>> fit
array([-0.00877534, 0.05561778, -0.09494909, 0.02634183, 0.03936857])
This is going to be as good as you can get (assuming you get the equation right as #mdurant suggested), an additional intercept term is required to further improve the fit:
def func(x,beta, icpt):
return 1.0/(4.0*np.pi)*(1+beta*(3.0/2*np.cos(x)**2-1.0/2))+icpt
guesses = [20, 0]
popt,pcov = curve_fit(func,x,y,p0=guesses)
y_fit = func(x, *popt)
plt.figure(1)
plt.plot(x,y,'ro', x,y_fit,'k-')
print popt #[ 0.33748816 -0.05780343]

Categories