In this case, there are 3 ODE's that describe a SIR model. The issue comes in I want to calculate which beta and gamma values are the best to fit onto the datapoints from the x_axis and y_axisvalues. The method I'm currently using is to integrate the ODE's using odeintfrom the scipy library and the curve_fit method also from the same library. In this case, how would you calculate the values for beta and gamma to fit the datapoints?
P.S. the current error is this: ValueError: operands could not be broadcast together with shapes (3,) (14,)
#initial values
S_I_R = (0.762/763, 1/763, 0)
x_axis = [m for m in range(1,15)]
y_axis = [3,8,28,75,221,291,255,235,190,125,70,28,12,5]
# ODE's that describe the system
def equation(SIR_Values,t,beta,gamma):
Array = np.zeros((3))
SIR = SIR_Values
Array[0] = -beta * SIR[0] * SIR[1]
Array[1] = beta * SIR[0] * SIR[1] - gamma * SIR[1]
Array[2] = gamma * SIR[1]
return Array
# Results = spi.odeint(equation,S_I_R,time)
#fitting the values
beta_values,gamma_values = curve_fit(equation, x_axis,y_axis)
# Starting values
S0 = 762/763
I0 = 1/763
R0 = 0
x_axis = np.array([m for m in range(0,15)])
y_axis = np.array([1,3,8,28,75,221,291,255,235,190,125,70,28,12,5])
y_axis = np.divide(y_axis,763)
def sir_model(y, x, beta, gamma):
S = -beta * y[0] * y[1]
R = gamma * y[1]
I = beta * y[0] * y[1] - gamma * y[1]
return S, I, R
def fit_odeint(x, beta, gamma):
return spi.odeint(sir_model, (S0, I0, R0), x, args=(beta, gamma))[:,1]
popt, pcov = curve_fit(fit_odeint, x_axis, y_axis)
beta,gamma = popt
fitted = fit_odeint(x_axis,*popt)
plt.plot(x_axis,y_axis, 'o', label = "infected per day")
plt.plot(x_axis, fitted, label = "fitted graph")
plt.xlabel("Time (in days)")
plt.ylabel("Fraction of infected")
plt.title("Fitted beta and gamma values")
plt.legend()
plt.show()
As this example from scipy documentation, the function must output an array with the same size as x_axis and y_axis.
I'm trying to fit the three peaks using python. I'm able to fit the first peak, but having problem in converging the fitting function to the next two peaks. Can someone please help me?
I guess there is some problem with the initial guesses!
Here is the code and figure:
from __future__ import division
import numpy as np
import scipy.signal
from scipy.optimize import curve_fit
import matplotlib.pyplot as plt
from matplotlib import rcParams
rcParams['font.family'] = 'sans-serif'
""" Fitting Function"""
def _2gauss(x, amp1, cen1, sigma1, amp2, cen2, sigma2):
return amp1*(1/(sigma1*(np.sqrt(2*np.pi))))*(np.exp((-1.0/2.0)*(((x-cen1)/sigma1)**2))) + \
amp2*(1/(sigma2*(np.sqrt(2*np.pi))))*(np.exp((-1.0/2.0)*(((x-cen2)/sigma2)**2)))+ \
amp3*(1/(sigma3*(np.sqrt(2*np.pi))))*(np.exp((-1.0/2.0)*(((x-cen3)/sigma3)**2)))
data_12 = np.loadtxt("ExcitationA.txt", skiprows=30, dtype=np.float64)
xData, yData = np.hsplit(data_12,2)
x = xData[:,0]
y = yData[:,0]
n = len(x)
amp1 = 400
sigma1 = 10
cen1 = 400
amp2 = 400
sigma2 = 5
cen2 = 400
amp3 = 340
sigma3 = 6
cen3 = 340
popt, pcov = curve_fit(_2gauss, x, y, p0= [amp1, cen1, sigma1, amp2, cen2, sigma2])
fig, ax = plt.subplots(figsize=(8, 6))
ax.plot(x, y, 'b', markersize=1, label="12°C")
ax.plot(x, _2gauss(x, *popt), markersize='1',label="Fit function", linewidth=4, color='purple')
plt.show()
As there are 9 parameters, to obtain a good fit, the initial values for those parameters should be close. An idea is to experiment drawing
p0 = [amp1, cen1, sigma1, amp2, cen2, sigma2, amp3, cen3, sigma3]
ax.plot(x, _2gauss(x, *p0))
until the parameters are more or less equal. In this example, it is important that the centers cen1, cen2 and cen3 are close to the observed local maxima (340, 355, 375).
Once you have reasonable initial values, you can start the fit. Also note that in the originally posted example code amp3, cen3, sigma3 are missing as parameters to the function _2gauss.
import numpy as np
from scipy.optimize import curve_fit
import matplotlib.pyplot as plt
def gauss_1(x, amp1, cen1, sigma1):
return amp1 * (1 / (sigma1 * (np.sqrt(2 * np.pi)))) * (np.exp((-1.0 / 2.0) * (((x - cen1) / sigma1) ** 2)))
def gauss_3(x, amp1, cen1, sigma1, amp2, cen2, sigma2, amp3, cen3, sigma3):
""" Fitting Function"""
return amp1 * (1 / (sigma1 * (np.sqrt(2 * np.pi)))) * (np.exp((-1.0 / 2.0) * (((x - cen1) / sigma1) ** 2))) + \
amp2 * (1 / (sigma2 * (np.sqrt(2 * np.pi)))) * (np.exp((-1.0 / 2.0) * (((x - cen2) / sigma2) ** 2))) + \
amp3 * (1 / (sigma3 * (np.sqrt(2 * np.pi)))) * (np.exp((-1.0 / 2.0) * (((x - cen3) / sigma3) ** 2)))
x = np.array([300.24, 301.4, 302.56, 303.72, 304.88, 306.04, 307.2, 308.36, 309.51, 310.67, 311.83, 312.99, 314.04, 314.93, 315.77, 316.56, 317.3, 318.03, 318.77, 319.5, 320.23, 321.02, 321.86, 325.76, 326.6, 327.54, 328.49, 329.17, 329.69, 330.27, 330.84, 331.16, 335.85, 336.37, 337.05, 337.79, 339.58, 341.43, 342.42, 343.87, 345.01, 346.07, 346.91, 347.53, 348.06, 348.53, 348.89, 351.33, 351.8, 352.11, 352.42, 352.75, 353.15, 353.6, 354.04, 354.36, 354.87, 355.77, 356.72, 357.36, 357.83, 358.25, 358.69, 358.96, 359.29, 359.61, 359.93, 360.25, 360.58, 360.86, 361.16, 361.39, 361.61, 361.96, 362.3, 362.62, 363.0, 363.43, 363.94, 364.55, 365.18, 366.14, 367.3, 368.19, 368.82, 369.45, 370.03, 371.07, 371.54, 371.96, 372.31, 372.69, 373.11, 373.52, 373.99, 374.67, 375.68, 376.58, 377.11, 377.54, 377.81, 378.09, 378.4, 378.71, 378.94, 379.08, 379.3, 379.52, 379.73, 379.95, 380.17, 380.34, 380.61, 380.82, 380.99, 381.22, 381.44, 381.66, 381.88, 382.1, 382.32, 382.53, 382.75, 382.97, 383.24, 383.74, 384.0, 384.28, 384.49, 384.71, 384.92, 385.14, 385.36, 385.58, 385.9, 386.26, 386.6, 386.92, 387.29, 387.71, 388.31, 388.84, 389.53, 390.38, 391.39, 392.56, 393.72, 394.89, 396.05, 397.22, 397.69, 398.38, 398.86, 399.54, 400.02, 400.71, 401.18, 401.87, 402.34, 403.03, 403.19, 404.19, 405.36, 406.52, 407.68, 408.84, 410.01, 411.17, 412.33, 413.49, 414.65, 415.81, 416.98, 417.61])
y = np.array([3.6790e-01, 4.1930e-01, 4.6530e-01, 5.1130e-01, 5.6300e-01, 6.1750e-01, 6.6780e-01, 7.2950e-01, 7.8830e-01, 8.4960e-01, 9.0950e-01, 9.6660e-01, 1.0463e+00, 1.1324e+00, 1.2241e+00, 1.3026e+00, 1.3889e+00, 1.4780e+00, 1.5598e+00, 1.6432e+00, 1.7318e+00, 1.8256e+00, 1.9050e+00, 2.1595e+00, 2.2477e+00, 2.3343e+00, 2.4183e+00, 2.5115e+00, 2.5970e+00, 2.6825e+00, 2.7657e+00, 2.8198e+00, 3.8983e+00, 3.9956e+00, 4.0846e+00, 4.1526e+00, 4.2787e+00, 4.2256e+00, 4.2412e+00, 4.2731e+00, 4.3265e+00, 4.4073e+00, 4.4905e+00, 4.5831e+00, 4.6717e+00, 4.7660e+00, 4.8395e+00, 5.6288e+00, 5.7239e+00, 5.8141e+00, 5.9076e+00, 6.0026e+00, 6.1034e+00, 6.2157e+00, 6.3235e+00, 6.4114e+00, 6.5063e+00, 6.5709e+00, 6.5175e+00, 6.4349e+00, 6.3479e+00, 6.2638e+00, 6.2102e+00, 6.0616e+00, 5.9664e+00, 5.8697e+00, 5.7625e+00, 5.6546e+00, 5.5494e+00, 5.4404e+00, 5.3384e+00, 5.2396e+00, 5.1462e+00, 5.0412e+00, 4.9467e+00, 4.8592e+00, 4.7655e+00, 4.6709e+00, 4.5807e+00, 4.4803e+00, 4.3947e+00, 4.3347e+00, 4.3286e+00, 4.3918e+00, 4.4800e+00, 4.5637e+00, 4.6489e+00, 4.8435e+00, 4.9454e+00, 5.0396e+00, 5.1258e+00, 5.2200e+00, 5.3082e+00, 5.3945e+00, 5.4874e+00, 5.5974e+00, 5.6396e+00, 5.5880e+00, 5.4984e+00, 5.4082e+00, 5.3213e+00, 5.2270e+00, 5.1271e+00, 5.0247e+00, 4.9258e+00, 4.8324e+00, 4.7317e+00, 4.6336e+00, 4.5323e+00, 4.4258e+00, 4.3166e+00, 4.2152e+00, 4.1011e+00, 3.9754e+00, 3.8646e+00, 3.7401e+00, 3.6061e+00, 3.4715e+00, 3.3381e+00, 3.2120e+00, 3.0865e+00, 2.9610e+00, 2.8361e+00, 2.7126e+00, 2.6289e+00, 2.2796e+00, 2.1818e+00, 2.0747e+00, 1.9805e+00, 1.8864e+00, 1.7942e+00, 1.7080e+00, 1.6236e+00, 1.5279e+00, 1.4145e+00, 1.2931e+00, 1.1805e+00, 1.0785e+00, 9.8490e-01, 8.9590e-01, 7.9850e-01, 7.0670e-01, 6.2110e-01, 5.2990e-01, 4.4250e-01, 3.7360e-01, 3.1090e-01, 2.5880e-01, 2.0680e-01, 1.6760e-01, 1.4570e-01, 1.2690e-01, 1.1060e-01, 9.5900e-02, 9.0600e-02, 8.0600e-02, 7.0600e-02, 5.8100e-02, 4.4200e-02, 4.4200e-02, 4.4200e-02, 4.1400e-02, 3.4900e-02, 2.4200e-02, 1.9600e-02, 1.5300e-02, 1.5000e-02, 1.1800e-02, 1.3200e-02, 7.8000e-03, 5.0000e-03, 1.0000e-02, 4.6000e-03, 0.0])
amp1 = 100
sigma1 = 9
cen1 = 375
amp2 = 100
sigma2 = 7
cen2 = 355
amp3 = 100
sigma3 = 10
cen3 = 340
p0 = [amp1, cen1, sigma1, amp2, cen2, sigma2, amp3, cen3, sigma3]
y0 = gauss_3(x, *p0)
popt, pcov = curve_fit(gauss_3, x, y, p0=p0)
fig, ax = plt.subplots(figsize=(8, 6))
ax.plot(x, y, 'b', label="given curve")
ax.plot(x, y0, 'g', ls=':', label="initial fit params")
ax.plot(x, gauss_3(x, *popt), ls=':', label="Fit function", linewidth=4, color='purple')
for i, (a, c, s )in enumerate( popt.reshape(-1, 3)):
ax.plot(x, gauss_1(x, a, c, s), ls='-', label=f"gauss {i+1}", linewidth=1, color='crimson')
ax.legend()
ax.autoscale(axis='x', tight=True)
plt.show()
I am trying to fit x y data which look something like
x = np.linspace(-2, 2, 1000)
a = 0.5
yl = np.ones_like(x[x < a]) * -0.4 + np.random.normal(0, 0.05, x[x < a].shape[0])
yr = np.ones_like(x[x >= a]) * 0.4 + np.random.normal(0, 0.05, x[x >= a].shape[0])
y = np.concatenate((yl, yr))
plt.scatter(x, y, s=2, color='k')
I'm using a variation of the Heaviside step function
def f(x, a, b): return 0.5 * b * (np.sign(x - a))
and fitting with
popt, pcov = curve_fit(f, x, y, p0=p)
where p is some initial guess.
for any p curve_fit fit only b and not a
for example:
popt, pcov = curve_fit(f, x, y, p0=[-1.0, 0])
we get that popt is [-1., 0.20117665]
popt, pcov = curve_fit(f, x, y, p0=[.5, 2])
we get taht popt is [.5, 0.79902]
popt, pcov = curve_fit(f, x, y, p0=[1.5, -2])
we get taht popt is [1.5, 0.40128229]
why curve_fit not fitting a?
As mentioned by others, curve_fit (and all the other solvers in scipy.optimize) work well for optimizing continuous but not discrete variables. They all work by making small (like, at the 1.e-7 level) changes to the parameter values and seeing what (if any) change that makes in the result, and using that change to refine those values until the smallest residual is found. With your model function using np.sign:
def f(x, a, b): return 0.5 * b * (np.sign(x - a))
such a small change in the value of a will not change the model or fit result at all. That is, first the fit will try the starting value of, say, a=-1.0 or a=0.5, and then will try a=-0.999999995 or a=0.500000005. Those will both give the same result for np.sign(x-a). The fit does not know that it would need to change a by 1 to have any effect on the result. It cannot know this. np.sign() and np.sin() differ by one letter, but behave very differently in this respect.
It is pretty common for real data to take a step but to be sampled finely enough so that the step does not happen completely in one step. In that case, you would be able to model the step with a variety of functional forms (linear ramp, error function, arc-tangent, logistic, etc). The thorough answer from #JamesPhilipps gives one approach. I would probably use lmfit (being one of its main authors) and be willing to guess starting values for the parameters from looking at the data, perhaps:
import numpy as np
x = np.linspace(-2, 2, 1000)
a = 0.5
yl = np.ones_like(x[x < a]) * -0.4 + np.random.normal(0, 0.05, x[x < a].shape[0])
yr = np.ones_like(x[x >= a]) * 0.4 + np.random.normal(0, 0.05, x[x >= a].shape[0])
y = np.concatenate((yl, yr))
from lmfit.models import StepModel, ConstantModel
model = StepModel() + ConstantModel()
params = model.make_params(center=0, sigma=1, amplitude=1., c=-0.5)
result = model.fit(y, params, x=x)
print(result.fit_report())
import matplotlib.pyplot as plt
plt.scatter(x, y, label='data')
plt.plot(x, result.best_fit, marker='o', color='r', label='fit')
plt.show()
which would give a good fit and print out results of
[[Model]]
(Model(step, form='linear') + Model(constant))
[[Fit Statistics]]
# fitting method = leastsq
# function evals = 50
# data points = 1000
# variables = 4
chi-square = 2.32729556
reduced chi-square = 0.00233664
Akaike info crit = -6055.04839
Bayesian info crit = -6035.41737
## Warning: uncertainties could not be estimated:
[[Variables]]
amplitude: 0.80013762 (init = 1)
center: 0.50083312 (init = 0)
sigma: 4.6009e-04 (init = 1)
c: -0.40006255 (init = -0.5)
Note that it will find the center of the step because it assumed there was some finite width (sigma) to the step, but then found that width to be smaller than the step size in x. But also note that it cannot calculate the uncertainties in the parameters because, as above, a small change in center (your a) near the solution does not change the resulting fit. FWIW the StepModel can use a linear, error-function, arc-tangent, or logistic as the step function.
If you had constructed the test data to have a small width to the step, say with
something like
from scipy.special import erf
y = 0.638 * erf((x-0.574)/0.005) + np.random.normal(0, 0.05, len(x))
then the fit would have been able to find the best solution and evaluate the uncertainties.
I hope that explains why the fit with your model function could not refine the value of a, and what might be done about it.
Here is a graphical Python fitter using your data and function, with scipy's differential_evolution genetic algorithm module used to provide the initial parameter estimates for curve_fit. That module uses the Latin Hypercube algorithm to ensure a thorough search of parameter space, which requires bounds within which to search. In this example, those bounds are taken from the data max and min values.
import numpy, scipy, matplotlib
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from scipy.optimize import differential_evolution
import warnings
# generate data for testing
x = numpy.linspace(-2, 2, 1000)
a = 0.5
yl = numpy.ones_like(x[x < a]) * -0.4 + numpy.random.normal(0, 0.05, x[x < a].shape[0])
yr = numpy.ones_like(x[x >= a]) * 0.4 + numpy.random.normal(0, 0.05, x[x >= a].shape[0])
y = numpy.concatenate((yl, yr))
# alias data to match pervious example
xData = x
yData = y
def func(x, a, b): # variation of the Heaviside step function
return 0.5 * b * (numpy.sign(x - a))
# function for genetic algorithm to minimize (sum of squared error)
def sumOfSquaredError(parameterTuple):
warnings.filterwarnings("ignore") # do not print warnings by genetic algorithm
val = func(xData, *parameterTuple)
return numpy.sum((yData - val) ** 2.0)
def generate_Initial_Parameters():
# min and max used for bounds
maxX = max(xData)
minX = min(xData)
parameterBounds = []
parameterBounds.append([minX, maxX]) # search bounds for a
parameterBounds.append([minX, maxX]) # search bounds for b
# "seed" the numpy random number generator for repeatable results
result = differential_evolution(sumOfSquaredError, parameterBounds, seed=3)
return result.x
# by default, differential_evolution completes by calling curve_fit() using parameter bounds
geneticParameters = generate_Initial_Parameters()
# now call curve_fit without passing bounds from the genetic algorithm,
# just in case the best fit parameters are aoutside those bounds
fittedParameters, pcov = curve_fit(func, xData, yData, geneticParameters)
print('Fitted parameters:', fittedParameters)
print()
modelPredictions = func(xData, *fittedParameters)
absError = modelPredictions - yData
SE = numpy.square(absError) # squared errors
MSE = numpy.mean(SE) # mean squared errors
RMSE = numpy.sqrt(MSE) # Root Mean Squared Error, RMSE
Rsquared = 1.0 - (numpy.var(absError) / numpy.var(yData))
print()
print('RMSE:', RMSE)
print('R-squared:', Rsquared)
print()
##########################################################
# graphics output section
def ModelAndScatterPlot(graphWidth, graphHeight):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
axes = f.add_subplot(111)
# first the raw data as a scatter plot
axes.plot(xData, yData, 'D')
# create data for the fitted equation plot
xModel = numpy.linspace(min(xData), max(xData))
yModel = func(xModel, *fittedParameters)
# now the model as a line plot
axes.plot(xModel, yModel)
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
plt.show()
plt.close('all') # clean up after using pyplot
graphWidth = 800
graphHeight = 600
ModelAndScatterPlot(graphWidth, graphHeight)
Or you could say that a heavyside can be approximated by a sigmoïd function:
or in your case
You add a parameter k, but hopefully it will be big enough in the end, and you get rid of it to find the two other parameters.
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
x = np.linspace(-2, 2, 1000)
a = 0.5
yl = np.ones_like(x[x < a]) * -0.4 + np.random.normal(0, 0.05, x[x < a].shape[0])
yr = np.ones_like(x[x >= a]) * 0.4 + np.random.normal(0, 0.05, x[x >= a].shape[0])
y = np.concatenate((yl, yr))
plt.scatter(x, y, s=2, color='k')
# def f(x, a, b): return 0.5 * b * (np.sign(x - a))
def g(x, a, b, k): return b / (1 + np.exp(-2 * k * (x - a))) - b / 2
y_sigmoid = g(x, a, 0.8, 10)
plt.scatter(x, y_sigmoid, s=2, color='g')
popt, pcov = curve_fit(g, x, y, p0=[-1.0, 0, 1])
# popt, pcov = curve_fit(f, x, y, p0=[-1.0, 0])
print(popt)
plt.scatter(x, g(x, *popt), s=2, color='r')
which gives, as expected:
[5.02081214e-01 8.03257583e-01 3.33970547e+03]
(green: random soft sigmoid, red: curve_fit result)