Printing Curve Fit Function - python

I have been struggling to find a way to get the determined parameters for the curve fit function below to print. The graph properly matches my data, but I can't figure out how to get the equation it produced. Any help would be appreciated!
import matplotlib.pyplot as plt
import numpy as np
from scipy.optimize import curve_fit
x_data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]
y_data = [.99, 1, .98, .93, .85, .77, .67, .56, .46, .36, .27, .19, .12, .07, .03, .01, 0, .01, .05, .09, .16, .24, .33, .44, .55, .65, .76, .85, .93, .98, 1]
x_val = np.array(x_data)
y_val = np.array(y_data)
def fitFunc(x, a, b, c, d):
return a * np.sin((2* np.pi / b) * x - c) + d
print(a, b, c, d)
plt.plot(x_val, y_val, marker='.', markersize=0, linewidth='0.5', color='green')
popt, pcov = curve_fit(fitFunc, x_val, y_val)
plt.plot(x_val, fitFunc(x_val, *popt), color='orange', linestyle='--')

Here is a graphing example that uses your data, note the equation. This example uses initial parameter estimates that were manually estimated from a scatterplot of the data, the default curve_fit estimates are all 1.0 by default and those do not work well in this case.
import numpy as np
import scipy, matplotlib
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
xData = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0])
yData = np.array([.99, 1.0, 0.98, 0.93, 0.85, 0.77, 0.67, 0.56, 0.46, 0.36, 0.27, 0.19, 0.12, 0.07, 0.03, 0.01, 0, 0.01, 0.05, 0.09, 0.16, 0.24, 0.33, 0.44, 0.55, 0.65, 0.76, 0.85, 0.93, 0.98, 1.0])
def fitFunc(x, amplitude, center, width, offset):
return amplitude * np.sin(np.pi * (x - center) / width) + offset
# these are the curve_fit default parameter estimates, and
# do not work well for this data and equation - manually estimate below
#initialParameters = np.array([1.0, 1.0, 1.0, 1.0])
# eyeball the scatterplot for some better, simple, initial parameter estimates
initialParameters = np.array([0.5, 1.0, 16.0, 0.5])
# curve fit the test data using initial parameters
fittedParameters, pcov = curve_fit(fitFunc, xData, yData, initialParameters)
print(fittedParameters)
modelPredictions = fitFunc(xData, *fittedParameters)
absError = modelPredictions - yData
SE = np.square(absError) # squared errors
MSE = np.mean(SE) # mean squared errors
RMSE = np.sqrt(MSE) # Root Mean Squared Error, RMSE
Rsquared = 1.0 - (np.var(absError) / np.var(yData))
print('RMSE:', RMSE)
print('R-squared:', Rsquared)
print()
##########################################################
# graphics output section
def ModelAndScatterPlot(graphWidth, graphHeight):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
axes = f.add_subplot(111)
# first the raw data as a scatter plot
axes.plot(xData, yData, 'D')
# create data for the fitted equation plot
xModel = np.linspace(min(xData), max(xData))
yModel = fitFunc(xModel, *fittedParameters)
# now the model as a line plot
axes.plot(xModel, yModel)
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
plt.show()
plt.close('all') # clean up after using pyplot
graphWidth = 800
graphHeight = 600
ModelAndScatterPlot(graphWidth, graphHeight)

Related

How to colormap errorbars (x and y) in a scatter plot with a dataset (nd.array)?

I'm trying to create a scatter plot with x and y errors that have different marker and errorbar colors in four sections (e.g. red for x=0 to x=2, blue for x=2 to c=5, etc.). I have used a colormap with bounds for the markers, but I haven't been able to do something similar for the errorbars. I've tried to set the markers, errorbars, and caps as the same color in the scatter colormap using this answer to a similar question, but I wasn't able to get it to work for my code (comes up with an error about lengths of data not matching or unable to convert to tuple). I think I haven't been able to correctly modify it for the colormap I use for the markers, or this isn't the best way to go about getting the right result.
This is an example with some made up data:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors
bounds = [0,1.5,3,4.5,5]
colors = ["r", "b", "g", "y"]
cmap = matplotlib.colors.ListedColormap(colors)
norm = matplotlib.colors.BoundaryNorm(bounds, len(colors))
x = np.array([0.0, 0.0, 1.0, 2.0, 2.0, 3.0, 4.0, 4.0, 5.0, 5.0])
y = np.array([0.0, 0.1, 0.8, 0.9, 0.7, 0.1, -0.8, -0.5, -1.0, -0.7])
x_err = np.array([0.05, 0.06, 0.04, 0.045, 0.04, 0.06, 0.05, 0.055, 0.02, 0.05])
y_err = np.array([0.04, 0.05, 0.03, 0.055, 0.145, 0.065, 0.045, 0.15, 0.015, 0.17])
plt.scatter(x, y, marker='D', c=x, cmap=cmap, norm=norm)
plt.errorbar(x, y, xerr=x_err, yerr=y_err, fmt='.', lw=2, capsize=3, alpha=0.7, zorder=0)
plt.show()
which gives
.
How can I get the errorbars to have the same colormap as the one used in the scatter plot?
This is certainly not the fastest method but it works: get the colors for each x-value using to_rgba and then plot the error bars pointwise (probably slow for large data arrays):
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors
import matplotlib.cm
bounds = [0,1.5,3,4.5,5]
colors = ["r", "b", "g", "y"]
cmap = matplotlib.colors.ListedColormap(colors)
norm = matplotlib.colors.BoundaryNorm(bounds, len(colors))
x = np.array([0.0, 0.0, 1.0, 2.0, 2.0, 3.0, 4.0, 4.0, 5.0, 5.0])
y = np.array([0.0, 0.1, 0.8, 0.9, 0.7, 0.1, -0.8, -0.5, -1.0, -0.7])
x_err = np.array([0.05, 0.06, 0.04, 0.045, 0.04, 0.06, 0.05, 0.055, 0.02, 0.05])
y_err = np.array([0.04, 0.05, 0.03, 0.055, 0.145, 0.065, 0.045, 0.15, 0.015, 0.17])
plt.scatter(x, y, marker='D', c=x, cmap=cmap, norm=norm)
colors = matplotlib.cm.ScalarMappable(norm,cmap).to_rgba(x)
for i,_ in enumerate(x):
plt.errorbar(x[i], y[i], xerr=x_err[i], yerr=y_err[i], fmt='.', lw=2, capsize=3, alpha=0.7, zorder=0, ecolor=colors[i])
plt.show()

Python Curve_Fit Exponential / Power / Log Curve - Improve Results

I am trying to fit this data which is asymptotically approaching zero (but never reaching it).
I believe the best curve is an Inverse Logistic Function, but open to suggestions. The Key is the decaying "S-curve" shape which is expected.
Here is the code I have so far, and the plot image below, which is a pretty ugly fit.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
# DATA
x = pd.Series([1,1,264,882,913,1095,1156,1217,1234,1261,1278,1460,1490,1490,1521,1578,1612,1612,1668,1702,1704,1735,1793,2024,2039,2313,2313,2558,2558,2617,2617,2708,2739,2770,2770,2831,2861,2892,2892,2892,2892,2892,2923,2923,2951,2951,2982,2982,3012,3012,3012,3012,3012,3012,3012,3073,3073,3073,3104,3104,3104,3104,3135,3135,3135,3135,3165,3165,3165,3165,3165,3196,3196,3196,3226,3226,3257,3316,3347,3347,3347,3347,3377,3377,3438,3469,3469]).values
y = pd.Series([1000,600,558.659217877095,400,300,100,7.75,6,8.54,6.66666666666667,7.14,1.1001100110011,1.12,0.89,1,2,0.666666666666667,0.77,1.12612612612613,0.7,0.664010624169987,0.65,0.51,0.445037828215398,0.27,0.1,0.26,0.1,0.1,0.13,0.16,0.1,0.13,0.1,0.12,0.1,0.13,0.14,0.14,0.17,0.11,0.15,0.09,0.1,0.26,0.16,0.09,0.09,0.05,0.09,0.09,0.1,0.1,0.11,0.11,0.09,0.09,0.11,0.08,0.09,0.09,0.1,0.06,0.07,0.07,0.09,0.05,0.05,0.06,0.07,0.08,0.08,0.07,0.1,0.08,0.08,0.05,0.06,0.04,0.04,0.05,0.05,0.04,0.06,0.05,0.05,0.06]).values
# Inverse Logistic Function
# https://en.wikipedia.org/wiki/Logistic_function
def func(x, L ,x0, k, b):
y = 1/(L / (1 + np.exp(-k*(x-x0)))+b)
return y
# FIT DATA
p0 = [max(y), np.median(x),1,min(y)] # this is an mandatory initial guess
popt, pcov = curve_fit(func, x, y,p0, method='dogbox',maxfev=10000)
# PERFORMANCE
modelPredictions = func(x, *popt)
absError = modelPredictions - y
SE = np.square(absError) # squared errors
MSE = np.mean(SE) # mean squared errors
RMSE = np.sqrt(MSE) # Root Mean Squared Error, RMSE
Rsquared = 1.0 - (np.var(absError) / np.var(y))
print('Parameters:', popt)
print('RMSE:', RMSE)
print('R-squared:', Rsquared)
#PLOT
plt.figure()
plt.plot(x, y, 'ko', label="Original Noised Data")
plt.plot(x, func(x, *popt), 'r-', label="Fitted Curve")
plt.legend()
plt.yscale('log')
#plt.xscale('log')
plt.show()
Here is the result when this code is run... and what I would Like to achieve!
How can I better optimize the curve_fit, so that instead of the code generated RED line, I get something closer to the BLUE drawn line?
Thank you!!
From your plot of data and expected fit, I would guess that you do not really want to model your data y as a logistic-like step function but log(y) as a logistic-like step function.
So, I think you would probably want to use a logistic step function, perhaps adding a linear component to model the log of this data. I would do this with lmfit, as it comes with the models built-in, gives better reporting of resulting, and allows you to greatly simplify your fitting code as with (disclaimer: I am a lead author):
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from lmfit.models import StepModel, LinearModel
# DATA
x = pd.Series([1, 1, 264, 882, 913, 1095, 1156, 1217, 1234, 1261, 1278,
1460, 1490, 1490, 1521, 1578, 1612, 1612, 1668, 1702, 1704,
1735, 1793, 2024, 2039, 2313, 2313, 2558, 2558, 2617, 2617,
2708, 2739, 2770, 2770, 2831, 2861, 2892, 2892, 2892, 2892,
2892, 2923, 2923, 2951, 2951, 2982, 2982, 3012, 3012, 3012,
3012, 3012, 3012, 3012, 3073, 3073, 3073, 3104, 3104, 3104,
3104, 3135, 3135, 3135, 3135, 3165, 3165, 3165, 3165, 3165,
3196, 3196, 3196, 3226, 3226, 3257, 3316, 3347, 3347, 3347,
3347, 3377, 3377, 3438, 3469, 3469]).values
y = pd.Series([1000, 600, 558.659217877095, 400, 300, 100, 7.75, 6, 8.54,
6.66666666666667, 7.14, 1.1001100110011, 1.12, 0.89, 1, 2,
0.666666666666667, 0.77, 1.12612612612613, 0.7,
0.664010624169987, 0.65, 0.51, 0.445037828215398, 0.27, 0.1,
0.26, 0.1, 0.1, 0.13, 0.16, 0.1, 0.13, 0.1, 0.12, 0.1, 0.13,
0.14, 0.14, 0.17, 0.11, 0.15, 0.09, 0.1, 0.26, 0.16, 0.09,
0.09, 0.05, 0.09, 0.09, 0.1, 0.1, 0.11, 0.11, 0.09, 0.09,
0.11, 0.08, 0.09, 0.09, 0.1, 0.06, 0.07, 0.07, 0.09, 0.05,
0.05, 0.06, 0.07, 0.08, 0.08, 0.07, 0.1, 0.08, 0.08, 0.05,
0.06, 0.04, 0.04, 0.05, 0.05, 0.04, 0.06, 0.05, 0.05, 0.06]).values
model = StepModel(form='logistic') + LinearModel()
params = model.make_params(amplitude=-5, center=1000, sigma=100, intercept=0, slope=0)
result = model.fit(np.log(y), params, x=x)
print(result.fit_report())
plt.plot(x, y, 'ko', label="Original Noised Data")
plt.plot(x, np.exp(result.best_fit), 'r-', label="Fitted Curve")
plt.legend()
plt.yscale('log')
plt.show()
That will print out a report with fit statistics and best-fit values of:
[[Model]]
(Model(step, form='logistic') + Model(linear))
[[Fit Statistics]]
# fitting method = leastsq
# function evals = 73
# data points = 87
# variables = 5
chi-square = 9.38961801
reduced chi-square = 0.11450754
Akaike info crit = -183.688405
Bayesian info crit = -171.358865
[[Variables]]
amplitude: -4.89008796 +/- 0.29600969 (6.05%) (init = -5)
center: 1180.65823 +/- 15.2836422 (1.29%) (init = 1000)
sigma: 94.0317580 +/- 18.5328976 (19.71%) (init = 100)
slope: -0.00147861 +/- 8.1151e-05 (5.49%) (init = 0)
intercept: 6.95177838 +/- 0.17170849 (2.47%) (init = 0)
[[Correlations]] (unreported correlations are < 0.100)
C(amplitude, slope) = -0.798
C(amplitude, sigma) = -0.649
C(amplitude, intercept) = -0.605
C(center, intercept) = -0.574
C(sigma, slope) = 0.542
C(sigma, intercept) = 0.348
C(center, sigma) = -0.335
C(amplitude, center) = 0.282
and produce a plot like this
You could certainly reproduce all that with scipy.optimize.curve_fit if you desired, but I would leave that as an exercise.
In your case I'd fit a hyperbolic tangent1 to the base-10 logarithm of your data.
Let's use
                                       log10 (y) = y₀ - a tanh (λ(x-x₀))
as your function
Approximately your x runs from 0 to 3500, your log10(y) from 3 to -1, with the provision that tanh(2) = -tanh(2) ≈ 1 we have
            y₀+a = 3, y0-a= -1 ⇒ y₀ = 1, a = 2;
            λ = (2-(-2)) / (3500-0); x₀ = (3500-0)/2.
(this rough estimate is necessary to provede curve_fit with an initial guess, otherwise the procedure gets lost).
Omitting the boilerplate I have eventually
X = np.linspace(0, 3500, 701)
plt.scatter(x, np.log10(y), label='data')
plt.plot(X, 1-2*np.tanh(4/3500*(X-1750)), label='hand fit')
(y0, a, l, x0), *_ = curve_fit(
lambda x, y0, a, l,x 0: y0 - a*np.tanh(l*(x-x0)),
x, np.log10(y),
p0=[1, 2, 4/3500, 3500/2])
plt.plot(X, y0-a*np.tanh(l*(X-x0)), label='curve_fit fit')
plt.legend()
Note 1: the logistic function is the hyperbolic tangent in disguise
I see that your plot uses log scaling, and I found that several different sigmoidal equations gave what appear to be good fits to the natural log of the Y data. Here is a graphical Python fitter using the natural log of the Y data with a four-parameter Logistic equation:
import numpy, scipy, matplotlib
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
import warnings
xData = numpy.array([1,1,264,882,913,1095,1156,1217,1234,1261,1278,1460,1490,1490,1521,1578,1612,1612,1668,1702,1704,1735,1793,2024,2039,2313,2313,2558,2558,2617,2617,2708,2739,2770,2770,2831,2861,2892,2892,2892,2892,2892,2923,2923,2951,2951,2982,2982,3012,3012,3012,3012,3012,3012,3012,3073,3073,3073,3104,3104,3104,3104,3135,3135,3135,3135,3165,3165,3165,3165,3165,3196,3196,3196,3226,3226,3257,3316,3347,3347,3347,3347,3377,3377,3438,3469,3469], dtype=float)
yData = numpy.array([1000,600,558.659217877095,400,300,100,7.75,6,8.54,6.66666666666667,7.14,1.1001100110011,1.12,0.89,1,2,0.666666666666667,0.77,1.12612612612613,0.7,0.664010624169987,0.65,0.51,0.445037828215398,0.27,0.1,0.26,0.1,0.1,0.13,0.16,0.1,0.13,0.1,0.12,0.1,0.13,0.14,0.14,0.17,0.11,0.15,0.09,0.1,0.26,0.16,0.09,0.09,0.05,0.09,0.09,0.1,0.1,0.11,0.11,0.09,0.09,0.11,0.08,0.09,0.09,0.1,0.06,0.07,0.07,0.09,0.05,0.05,0.06,0.07,0.08,0.08,0.07,0.1,0.08,0.08,0.05,0.06,0.04,0.04,0.05,0.05,0.04,0.06,0.05,0.05,0.06], dtype=float)
# fit the natural lpg of the data
yData = numpy.log(yData)
warnings.filterwarnings("ignore") # do not print "invalid value" warnings during fit
def func(x, a, b, c, d): # Four-Parameter Logistic from zunzun.com
return d + (a - d) / (1.0 + numpy.power(x / c, b))
# these are the same as the scipy defaults
initialParameters = numpy.array([1.0, 1.0, 1.0, 1.0])
# curve fit the test data
fittedParameters, pcov = curve_fit(func, xData, yData, initialParameters)
modelPredictions = func(xData, *fittedParameters)
print('Parameters:', fittedParameters)
print()
##########################################################
# graphics output section
def ModelAndScatterPlot(graphWidth, graphHeight):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
axes = f.add_subplot(111)
# first the raw data as a scatter plot
axes.plot(xData, yData, 'D')
# create data for the fitted equation plot
xModel = numpy.linspace(min(xData), max(xData))
yModel = func(xModel, *fittedParameters)
# now the model as a line plot
axes.plot(xModel, yModel)
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Natural Log of Y Data') # Y axis data label
plt.show()
plt.close('all') # clean up after using pyplot
graphWidth = 800
graphHeight = 600
ModelAndScatterPlot(graphWidth, graphHeight)
p0 = [max(y), np.median(x),1,min(y)] # this is an mandatory initial guess
Just to clarify, since this might be your issue, you shouldn't use "1.0" as your initial guess k. You should use 1.0 / (max(x) - min(x))
If your X's are data that ranges over say, [1200, 8000]. Then, using 1.0 will really struggle converge. You want to use 1/6800 as k, so you start off with a normalized [-1, 1] as your initial x-range.
Main reason being, p.exp(4000) will generally fail to evaluate, which will cause python to struggle to fit the function.

Python: how to fit function to points?

I am trying to fit a curve to some points.
### Analysis: cost function
md = 215 / 0.89
wl = [0, 0.5, 1, 1.5, 2, 3, 4, 5, 6]
d = [0, 0.49, 0.71, 0.84, 0.95, 0.98, 1.0, 1.0, 1.0]
dr = []
for i in d: dr.append(i*md)
f, ax = plt.subplots(figsize=(9.5, 6.5))
ax = setFont(ax, 'Arial', 14)
ax.plot(wl, dr, lw=2)
grid()
This is a typical logistic function. This what I am doing
from scipy.optimize import curve_fit
def func(t,alpha, a):
return 241.573 / 1+ (a * np.exp(alpha * t))
# coefficients and curve fit for curve
popt, pcov = curve_fit(func, wl, dr)
alpha, a = popt
v_fit = func(wl, alpha, a)
But I get the error
TypeError: can't multiply sequence by non-int of type 'numpy.float64'
The error is due to the fact that wl isn't a numpy array:
import numpy as np
from scipy.optimize import curve_fit
md = 215 / 0.89
wl = np.array([0, 0.5, 1, 1.5, 2, 3, 4, 5, 6])
d = np.array([0, 0.49, 0.71, 0.84, 0.95, 0.98, 1.0, 1.0, 1.0])
dr = np.array([i*md for i in d])
def func(t,alpha, a):
return 241.573 / 1+ (a * np.exp(alpha * t))
# coefficients and curve fit for curve
popt, pcov = curve_fit(func, wl, dr)
alpha, a = popt
v_fit = func(wl, alpha, a)
Based on sample code found here:
from scipy.optimize import curve_fit
import numpy as np
# assume func is defined to calculate your y-values and has the signature func(x, a, b, c)
xdata = np.linspace(0,4,50)
np.random.seed(1729)
ydata = [func(x, 2.5, 1.3, 0.5)+(0.2*np.random.normal(size=xdata.size)) for x in xdata]
plt.plot(xdata, ydata, 'b-', label='data')

Predicting Sine waves in python

I'm trying to write an algorithm in Python that predicts the output of a sine wave. For example, if the input is 90 (in degrees), the output is 1.
When I try Linear Regression, the output is pretty bad.
[in]
import pandas as pd
from sklearn.linear_model import LinearRegression
dic = [0, 30, 60, 90, 120, 150, 180, 210, 240, 270, 300, 330, 360]
dc = [0, 0.5, 0.866, 1, .866, 0.5, 0, -0.5, -0.866, -1, -0.866, -0.5, 0]
test = [1, 10, 100]
df = pd.DataFrame(dic)
dfy = pd.DataFrame(dc)
test = pd.DataFrame(test)
clf = LinearRegression()
clf.fit(df, dfy)
[out]
[[0.7340967 ]
[0.69718681]
[0.32808791]]
And Logistic doesn't fit at all because it is for classification. What approaches would be better suited to this problem?
Here is a graphical non-linear fitter using your data and a sine function. The numpy sine function uses radians, so the sine function used here rescales the input. I guessed the initial parameter estimates by looking at a scatterplot of the data, and from the RMSE of nearly 0.0 and the R-squared of almost 1.0 the data would seem to have no noise component.
import numpy, scipy, matplotlib
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
dic = [0.0, 30.0, 60.0, 90.0, 120.0, 150.0, 180.0, 210.0, 240.0, 270.0, 300.0, 330.0, 360.0]
dc = [0.0, 0.5, 0.866, 1.0, 0.866, 0.5, 0.0, -0.5, -0.866, -1.0, -0.866, -0.5, 0.0]
# rename data to match previous example code
xData = dic
yData = dc
def func(x, amplitude, center, width):
return amplitude * numpy.sin(numpy.pi * (x - center) / width)
# these are estimated from a scatterplot of the data
initialParameters = numpy.array([-1.0, 180.0, 180.0])
# curve fit the test data
fittedParameters, pcov = curve_fit(func, xData, yData, initialParameters)
modelPredictions = func(xData, *fittedParameters)
absError = modelPredictions - yData
SE = numpy.square(absError) # squared errors
MSE = numpy.mean(SE) # mean squared errors
RMSE = numpy.sqrt(MSE) # Root Mean Squared Error, RMSE
Rsquared = 1.0 - (numpy.var(absError) / numpy.var(yData))
print('Parameters:', fittedParameters)
print('RMSE:', RMSE)
print('R-squared:', Rsquared)
print()
##########################################################
# graphics output section
def ModelAndScatterPlot(graphWidth, graphHeight):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
axes = f.add_subplot(111)
# first the raw data as a scatter plot
axes.plot(xData, yData, 'D')
# create data for the fitted equation plot
xModel = numpy.linspace(min(xData), max(xData))
yModel = func(xModel, *fittedParameters)
# now the model as a line plot
axes.plot(xModel, yModel)
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
plt.show()
plt.close('all') # clean up after using pyplot
graphWidth = 800
graphHeight = 600
ModelAndScatterPlot(graphWidth, graphHeight)

Scipy Curve Fit Optimize not working for log scale values

So I am trying to fit a set of data points to this equation:
abs(I) = Io(exp((qV)/(nKT)) - 1) --- Shockley diode equation
to a bunch of data points I was given. Knowing the V and the I values, I need to optimize the Io and the n values to get me data closely matching the data set I was given.
However, scipy optimize curve fit is not giving me the values I want, which is where n = ~1.15 and Io = ~1.8E-13, and is instead giving me n = 2.12 and I = 2.11E-11. I suspect this is due to the data set values being very small numbers, messing with the optimization, but even when i set the initial guess to be n = 1.15 and Io = 1.8E-13, the optimization values do not change.
Does anyone have any tips on how to fix this?
import numpy as np
import math
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
Voltage = np.array([-0.5 , -0.49, -0.48, -0.47, -0.46, -0.45, -0.44, -0.43, -0.42,
-0.41, -0.4 , -0.39, -0.38, -0.37, -0.36, -0.35, -0.34, -0.33,
-0.32, -0.31, -0.3 , -0.29, -0.28, -0.27, -0.26, -0.25, -0.24,
-0.23, -0.22, -0.21, -0.2 , -0.19, -0.18, -0.17, -0.16, -0.15,
-0.14, -0.13, -0.12, -0.11, -0.1 , -0.09, -0.08, -0.07, -0.06,
-0.05, -0.04, -0.03, -0.02, -0.01, 0. , 0.01, 0.02, 0.03,
0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1 , 0.11, 0.12,
0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.2 , 0.21,
0.22, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 0.3 ,
0.31, 0.32, 0.33, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39, 0.4 ])
Current = np.array([ 6.99000000e-13, 6.83000000e-13, 6.57000000e-13,
6.46000000e-13, 6.19000000e-13, 6.07000000e-13,
5.86000000e-13, 5.73000000e-13, 5.55000000e-13,
5.37000000e-13, 5.27000000e-13, 5.08000000e-13,
4.92000000e-13, 4.75000000e-13, 4.61000000e-13,
4.43000000e-13, 4.32000000e-13, 4.18000000e-13,
3.99000000e-13, 3.91000000e-13, 3.79000000e-13,
3.66000000e-13, 3.54000000e-13, 3.43000000e-13,
3.34000000e-13, 3.18000000e-13, 3.06000000e-13,
2.96000000e-13, 2.86000000e-13, 2.77000000e-13,
2.66000000e-13, 2.59000000e-13, 2.54000000e-13,
2.43000000e-13, 2.33000000e-13, 2.22000000e-13,
2.16000000e-13, 2.07000000e-13, 2.00000000e-13,
1.94000000e-13, 1.85000000e-13, 1.77000000e-13,
1.68000000e-13, 1.58000000e-13, 1.48000000e-13,
1.35000000e-13, 1.21000000e-13, 1.03000000e-13,
7.53000000e-14, 4.32000000e-14, 2.33000000e-15,
6.46000000e-14, 1.57000000e-13, 2.82000000e-13,
4.58000000e-13, 7.07000000e-13, 1.06000000e-12,
1.57000000e-12, 2.28000000e-12, 3.29000000e-12,
4.75000000e-12, 6.80000000e-12, 9.76000000e-12,
1.39000000e-11, 1.82000000e-11, 2.57000000e-11,
3.67000000e-11, 5.21000000e-11, 7.39000000e-11,
1.04000000e-10, 1.62000000e-10, 2.27000000e-10,
3.21000000e-10, 4.48000000e-10, 6.21000000e-10,
8.70000000e-10, 1.20000000e-09, 1.66000000e-09,
2.27000000e-09, 3.08000000e-09, 4.13000000e-09,
5.46000000e-09, 7.05000000e-09, 8.85000000e-09,
1.11000000e-08, 1.39000000e-08, 1.74000000e-08,
2.05000000e-08, 2.28000000e-08, 2.52000000e-08,
2.91000000e-08])
def diode_function(V, n, Io):
kt = 300 * 1.38 * math.pow(10, -23)
q = 1.60 * math.pow(10, -19)
I_final = Io * (np.exp( (q * V) / (n * kt) ) - 1)
return abs(I_final)
p0 = [1.15, 1.8e-13]
popt, pcov = curve_fit(diode_function, Voltage, Current, p0 = p0)
print(popt)
fig = plt.figure()
ax = fig.add_subplot(121)
ax.set_title('I_d vs V_d')
ax.set_xlabel('V_d')
ax.set_ylabel('I_d')
ax.set_yscale('log')
plt.plot(Voltage, Current, 'ko', label="Original Data")
plt.plot(Voltage, diode_function(Voltage, *popt), 'r-', label="Fitted Curve")
plt.legend(loc='best')
ax = fig.add_subplot(122)
ax.set_title('I_d vs V_d')
ax.set_xlabel('V_d')
ax.set_ylabel('I_d')
ax.set_yscale('log')
popt = [1.15,1.8e-13]
plt.plot(Voltage, Current, 'ko', label="Original Data")
plt.plot(Voltage, diode_function(Voltage, *popt), 'r-', label="Fitted Curve")
plt.legend(loc='best')
plt.show()
Picture of the graph:
The left graph is with scipy optimization and the right graph is the one I want
I guess you are on the right track, using the logarithm to scale the data such that the differences are much lower. In order to prevent problems with logarithms, one usual option is to add a constant. Instead of log(x), one would use log(x+constant). This constant needs to be 1 or higher.
Using different constants still gives different results though, again because larger values are weighted higher in the least-squares method.
# imports and data as in question
def diode_function(V, n, Io):
kt = 300 * 1.38e-23
q = 1.60e-19
I_final = Io * (np.exp( (q * V) / (n * kt) ) - 1)
return np.abs(I_final)
p0 = [1.15, 1.8e-13]
popt, pcov = curve_fit(diode_function, Voltage, Current, p0 = p0)
fig, ax = plt.subplots()
ax.set_title('I_d vs V_d')
ax.set_xlabel('V_d')
ax.set_ylabel('I_d')
ax.set_yscale('log')
ax.plot(Voltage, Current, 'ko', label="Original Data")
offsets = [1,15]
colors = ["limegreen", "crimson"]
for offset, color in zip(offsets,colors):
logdf = lambda V,n,Io: np.log10(diode_function(V, n, Io)+offset)
poptn, pcovn = curve_fit(logdf, Voltage, np.log10(Current+offset), p0 = p0)
ax.plot(Voltage, 10**(logdf(Voltage, *poptn))-offset,
color=color, label="fit (offset: {})".format(offset))
ax.legend(loc='best')
plt.show()

Categories